Table 1: Alcohol-specific age-standardised death rates per 100,000 population, deaths registered in the United Kingdom, 2001 to 2016

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://www.ons.gov.uk/file?uri=/peoplepopulationandcommunity/healthandsocialcare/causesofdeath/datasets/alcoholspecificdeathsintheukmaindataset/current/maindatatablesforalcoholspecificdeaths.xls'
    inputFile = sourceFolder / 'maindatatablesforalcoholspecificdeaths.xls'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='Table 1')[0]

Loading in\maindatatablesforalcoholspecificdeaths.xls which has size 185856 bytes
Table names: ['Table 1']


https://www.ons.gov.uk/file?uri=/peoplepopulationandcommunity/healthandsocialcare/causesofdeath/datasets/alcoholspecificdeathsintheukmaindataset/current/maindatatablesforalcoholspecificdeaths.xls

In [3]:
observations = tab.excel_ref('C7').expand(DOWN).expand(RIGHT).is_not_blank()

In [4]:
observations

{<F10 11.8>, <C10 6386.0>, <P16 17.3>, <C8 5915.0>, <K16 7.7>, <I12 7.7>, <K12 8.0>, <H8 1878.0>, <H16 2239.0>, <K13 8.0>, <E21 11.1>, <N10 16.1>, <I10 7.3>, <N8 15.4>, <H20 2314.0>, <P14 18.3>, <F13 12.5>, <E17 11.8>, <F11 12.1>, <O16 16.3>, <H21 2364.0>, <J12 7.4>, <D8 10.9>, <P13 17.5>, <H13 2259.0>, <J10 7.0>, <K8 7.0>, <F16 12.2>, <J16 7.1>, <H10 2072.0>, <J19 6.8>, <F19 11.5>, <H9 1934.0>, <D10 11.5>, <I13 7.7>, <O21 15.0>, <C15 6910.0>, <M18 4434.0>, <M16 4831.0>, <F17 12.3>, <F7 10.8>, <J15 7.3>, <D13 12.2>, <O17 16.2>, <F22 12.0>, <J13 7.4>, <C17 7179.0>, <P21 15.9>, <P20 16.2>, <K20 7.7>, <O18 14.7>, <D11 11.8>, <N7 14.9>, <D20 11.4>, <K22 7.8>, <P22 16.7>, <D21 11.3>, <E7 10.3>, <I19 7.1>, <H18 2235.0>, <O20 15.3>, <P17 17.2>, <D16 12.0>, <C13 6960.0>, <C11 6622.0>, <I18 7.2>, <J7 6.3>, <N20 15.7>, <M19 4591.0>, <J21 7.2>, <O8 15.0>, <K10 7.6>, <P10 16.6>, <H17 2345.0>, <F14 13.0>, <I8 6.7>, <E9 11.1>, <J22 7.2>, <H7 1823.0>, <H22 2399.0>, <I9 6.9>, <N19 15.5>, <P8 15.9>, <E

In [5]:
Sex = tab.excel_ref('C5').expand(RIGHT).is_not_whitespace()
Sex

{<C5 'All persons'>, <H5 'Females'>, <M5 'Males'>}

In [6]:
deaths = tab.excel_ref('C6').expand(RIGHT).is_not_blank()
deaths

{<D6 'Rate per 100,000 persons1'>, <N6 'Rate per 100,000 persons1'>, <O6 'Lower 95% confidence limit'>, <H6 'Deaths'>, <F6 'Upper 95% confidence limit'>, <P6 'Upper 95% confidence limit'>, <C6 'Deaths'>, <M6 'Deaths'>, <I6 'Rate per 100,000 persons1'>, <E6 'Lower 95% confidence limit'>, <K6 'Upper 95% confidence limit'>, <J6 'Lower 95% confidence limit'>}

In [7]:
# lc = tab.excel_ref('D6').expand(RIGHT).filter(contains_string('Lower 95% confidence limit'))
# lc

In [8]:
# uc = tab.excel_ref('F6').expand(RIGHT).filter(contains_string('Upper 95% confidence limit'))
# uc

In [9]:
Year = tab.excel_ref('A7').expand(DOWN) - tab.excel_ref('A23').expand(DOWN)  
Year

{<A19 2013.0>, <A21 2015.0>, <A12 2006.0>, <A13 2007.0>, <A20 2014.0>, <A10 2004.0>, <A15 2009.0>, <A11 2005.0>, <A9 2003.0>, <A17 2011.0>, <A14 2008.0>, <A8 2002.0>, <A18 2012.0>, <A7 2001.0>, <A22 2016.0>, <A16 2010.0>}

In [10]:
Dimensions = [
            HDim(Year,'Year',DIRECTLY,LEFT),
            HDim(Sex,'Sex',CLOSEST,LEFT),
            HDim(deaths, 'Alcohol Specific Deaths',DIRECTLY,ABOVE),
            HDimConst('Measure Type', 'Count'),
            HDimConst('Unit','People')
            ]

In [11]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
if is_interactive():
    savepreviewhtml(c1)

0,1,2,3
OBS,Year,Sex,Alcohol Specific Deaths

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
Back to contents,,,,,,,,,,,,,,,,
"Table 1: Alcohol-specific age-standardised death rates per 100,000 population, deaths registered in the United Kingdom, 2001 to 20161,2,3,4",,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,
,,All persons,,,,,Females,,,,,Males,,,,
Year,,Deaths,"Rate per 100,000 persons1",Lower 95% confidence limit,Upper 95% confidence limit,,Deaths,"Rate per 100,000 persons1",Lower 95% confidence limit,Upper 95% confidence limit,,Deaths,"Rate per 100,000 persons1",Lower 95% confidence limit,Upper 95% confidence limit,
2001.0,,5701.0,10.6,10.3,10.8,,1823.0,6.6,6.3,6.9,,3878.0,14.9,14.4,15.3,
2002.0,,5915.0,10.9,10.6,11.2,,1878.0,6.7,6.4,7.0,,4037.0,15.4,15.0,15.9,
2003.0,,6255.0,11.4,11.1,11.7,,1934.0,6.9,6.6,7.2,,4321.0,16.3,15.8,16.8,
2004.0,,6386.0,11.5,11.3,11.8,,2072.0,7.3,7.0,7.6,,4314.0,16.1,15.6,16.6,


In [12]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Year,Sex,Alcohol Specific Deaths,Measure Type,Unit
0,5701.0,2001.0,All persons,Deaths,Count,People
1,10.6,2001.0,All persons,"Rate per 100,000 persons1",Count,People
2,10.3,2001.0,All persons,Lower 95% confidence limit,Count,People
3,10.8,2001.0,All persons,Upper 95% confidence limit,Count,People
4,1823.0,2001.0,Females,Deaths,Count,People
5,6.6,2001.0,Females,"Rate per 100,000 persons1",Count,People
6,6.3,2001.0,Females,Lower 95% confidence limit,Count,People
7,6.9,2001.0,Females,Upper 95% confidence limit,Count,People
8,3878.0,2001.0,Males,Deaths,Count,People
9,14.9,2001.0,Males,"Rate per 100,000 persons1",Count,People


In [13]:
new_table['Year'] = pd.to_numeric(new_table['Year'], errors='coerce').fillna(0)

In [14]:
new_table['Year'] = new_table['Year'].astype(int)

In [15]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [16]:
# new_table['Year'] = new_table['Year'].astype(str)

In [17]:
# new_table['Value'] = new_table['Value'].astype(int)

In [18]:
new_table.dtypes

Value                      float64
Year                         int32
Sex                         object
Alcohol Specific Deaths     object
Measure Type                object
Unit                        object
dtype: object

In [19]:
new_table

Unnamed: 0,Value,Year,Sex,Alcohol Specific Deaths,Measure Type,Unit
0,5701.0,2001,All persons,Deaths,Count,People
1,10.6,2001,All persons,"Rate per 100,000 persons1",Count,People
2,10.3,2001,All persons,Lower 95% confidence limit,Count,People
3,10.8,2001,All persons,Upper 95% confidence limit,Count,People
4,1823.0,2001,Females,Deaths,Count,People
5,6.6,2001,Females,"Rate per 100,000 persons1",Count,People
6,6.3,2001,Females,Lower 95% confidence limit,Count,People
7,6.9,2001,Females,Upper 95% confidence limit,Count,People
8,3878.0,2001,Males,Deaths,Count,People
9,14.9,2001,Males,"Rate per 100,000 persons1",Count,People


In [20]:
nt1 = new_table[new_table['Alcohol Specific Deaths'] == 'Lower 95% confidence limit']

In [21]:
nt1

Unnamed: 0,Value,Year,Sex,Alcohol Specific Deaths,Measure Type,Unit
2,10.3,2001,All persons,Lower 95% confidence limit,Count,People
6,6.3,2001,Females,Lower 95% confidence limit,Count,People
10,14.4,2001,Males,Lower 95% confidence limit,Count,People
14,10.6,2002,All persons,Lower 95% confidence limit,Count,People
18,6.4,2002,Females,Lower 95% confidence limit,Count,People
22,15.0,2002,Males,Lower 95% confidence limit,Count,People
26,11.1,2003,All persons,Lower 95% confidence limit,Count,People
30,6.6,2003,Females,Lower 95% confidence limit,Count,People
34,15.8,2003,Males,Lower 95% confidence limit,Count,People
38,11.3,2004,All persons,Lower 95% confidence limit,Count,People


In [22]:
nt1.columns = ['Lower 95% confidence limit' if x=='Value' else x for x in new_table.columns]

In [23]:
nt2 = new_table[new_table['Alcohol Specific Deaths'] == 'Upper 95% confidence limit']

In [24]:
nt2

Unnamed: 0,Value,Year,Sex,Alcohol Specific Deaths,Measure Type,Unit
3,10.8,2001,All persons,Upper 95% confidence limit,Count,People
7,6.9,2001,Females,Upper 95% confidence limit,Count,People
11,15.3,2001,Males,Upper 95% confidence limit,Count,People
15,11.2,2002,All persons,Upper 95% confidence limit,Count,People
19,7.0,2002,Females,Upper 95% confidence limit,Count,People
23,15.9,2002,Males,Upper 95% confidence limit,Count,People
27,11.7,2003,All persons,Upper 95% confidence limit,Count,People
31,7.2,2003,Females,Upper 95% confidence limit,Count,People
35,16.8,2003,Males,Upper 95% confidence limit,Count,People
39,11.8,2004,All persons,Upper 95% confidence limit,Count,People


In [25]:
nt2.columns = ['Upper 95% confidence limit' if x=='Value' else x for x in new_table.columns]

In [26]:
new_table.count()

Value                      192
Year                       192
Sex                        192
Alcohol Specific Deaths    192
Measure Type               192
Unit                       192
dtype: int64

In [27]:
new_table = new_table[new_table['Alcohol Specific Deaths'] != 'Upper 95% confidence limit']

In [28]:
new_table = new_table[new_table['Alcohol Specific Deaths'] != 'Lower 95% confidence limit']

In [29]:
Final_table = pd.merge(new_table, nt1, how = 'inner', on = ['Year','Sex'])

In [30]:
Final_table.head(2)

Unnamed: 0,Value,Year,Sex,Alcohol Specific Deaths_x,Measure Type_x,Unit_x,Lower 95% confidence limit,Alcohol Specific Deaths_y,Measure Type_y,Unit_y
0,5701.0,2001,All persons,Deaths,Count,People,10.3,Lower 95% confidence limit,Count,People
1,10.6,2001,All persons,"Rate per 100,000 persons1",Count,People,10.3,Lower 95% confidence limit,Count,People


In [31]:
Final_table = Final_table[['Value','Year','Sex','Alcohol Specific Deaths_x','Measure Type_x','Unit_x',
                           'Lower 95% confidence limit']]

In [32]:
Final_table = pd.merge(Final_table, nt2, how = 'inner', on = ['Year','Sex'])

In [33]:
Final_table.head(1)

Unnamed: 0,Value,Year,Sex,Alcohol Specific Deaths_x,Measure Type_x,Unit_x,Lower 95% confidence limit,Upper 95% confidence limit,Alcohol Specific Deaths,Measure Type,Unit
0,5701.0,2001,All persons,Deaths,Count,People,10.3,10.8,Upper 95% confidence limit,Count,People


In [34]:
Final_table = Final_table[['Year','Sex','Alcohol Specific Deaths_x','Measure Type','Value','Lower 95% confidence limit', 'Upper 95% confidence limit','Unit']]

In [35]:
Final_table

Unnamed: 0,Year,Sex,Alcohol Specific Deaths_x,Measure Type,Value,Lower 95% confidence limit,Upper 95% confidence limit,Unit
0,2001,All persons,Deaths,Count,5701.0,10.3,10.8,People
1,2001,All persons,"Rate per 100,000 persons1",Count,10.6,10.3,10.8,People
2,2001,Females,Deaths,Count,1823.0,6.3,6.9,People
3,2001,Females,"Rate per 100,000 persons1",Count,6.6,6.3,6.9,People
4,2001,Males,Deaths,Count,3878.0,14.4,15.3,People
5,2001,Males,"Rate per 100,000 persons1",Count,14.9,14.4,15.3,People
6,2002,All persons,Deaths,Count,5915.0,10.6,11.2,People
7,2002,All persons,"Rate per 100,000 persons1",Count,10.9,10.6,11.2,People
8,2002,Females,Deaths,Count,1878.0,6.4,7.0,People
9,2002,Females,"Rate per 100,000 persons1",Count,6.7,6.4,7.0,People


In [36]:
Final_table.columns = ['Alcohol Specific Deaths' if x=='Alcohol Specific Deaths_x' else x for x in Final_table.columns]

In [37]:
def user_perc(x,y):
    
    if str(x) == 'Deaths':
        return ''
    else:
        return y
    
Final_table['Lower 95% confidence limit'] = Final_table.apply(lambda row: user_perc(row['Alcohol Specific Deaths'],row['Lower 95% confidence limit']), axis = 1)

In [38]:
def user_perc(x,y):
    
    if str(x) == 'Deaths':
        return ''
    else:
        return y
    
Final_table['Upper 95% confidence limit'] = Final_table.apply(lambda row: user_perc(row['Alcohol Specific Deaths'],row['Upper 95% confidence limit']), axis = 1)

In [39]:
Final_table['Alcohol Specific Deaths'] = Final_table['Alcohol Specific Deaths'].str.rstrip('1')

In [40]:
Final_table['Sex'] = Final_table['Sex'].map(
    lambda x: {
        'All persons' : 'T', 
        'Males' : 'M',
        'Females': 'F'         
        }.get(x, x))

In [41]:
Final_table['Age'] =  'All'

In [42]:
Final_table['Geography'] =  'K02000001'

In [43]:
# if is_interactive():
#     destinationFolder = Path('out')
#     destinationFolder.mkdir(exist_ok=True, parents=True)
#     Final_table.to_csv(destinationFolder / ('tab1.csv'), index = False)