Table 2: Alcohol-specific age-specific death rates per 100,000 population, deaths registered in the United Kingdom, 2001 to 2016

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://www.ons.gov.uk/file?uri=/peoplepopulationandcommunity/healthandsocialcare/causesofdeath/datasets/alcoholspecificdeathsintheukmaindataset/current/maindatatablesforalcoholspecificdeaths.xls'
    inputFile = sourceFolder / 'maindatatablesforalcoholspecificdeaths.xls'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)
    tab = loadxlstabs(inputFile, sheetids='Table 2')[0]

Loading in\maindatatablesforalcoholspecificdeaths.xls which has size 185856 bytes
Table names: ['Table 2']


https://www.ons.gov.uk/file?uri=/peoplepopulationandcommunity/healthandsocialcare/causesofdeath/datasets/alcoholspecificdeathsintheukmaindataset/current/maindatatablesforalcoholspecificdeaths.xls

In [2]:
observations = tab.excel_ref('D16').expand(DOWN).expand(RIGHT).is_not_blank()

In [3]:
observations

{<D140 936.0>, <Q116 0.5>, <R18 'z'>, <J306 3.0>, <S135 0.9>, <J23 54.0>, <F296 22.3>, <M313 21.0>, <K40 0.2>, <D243 56.0>, <J26 294.0>, <F249 0.1>, <F105 25.1>, <Q261 9.8>, <R120 19.4>, <K42 2.0>, <D194 206.0>, <E281 3.3>, <N59 'u'>, <P280 83.0>, <F290 7.3>, <G176 10.5>, <S242 15.4>, <K78 'z'>, <T230 'u'>, <J75 0.0>, <M101 10.4>, <D97 21.0>, <L40 0.0>, <P119 315.0>, <J138 140.0>, <F269 1.0>, <L49 11.3>, <D305 0.0>, <K19 'z'>, <Q296 35.2>, <R113 'z'>, <L233 3.7>, <S270 4.9>, <L53 0.8>, <R243 4.9>, <K44 10.3>, <G141 32.4>, <F299 6.0>, <Q59 0.6>, <J28 251.0>, <F252 8.1>, <S221 29.7>, <R194 5.2>, <L54 'z'>, <F186 2.6>, <Q24 11.0>, <Q83 31.7>, <M145 10.2>, <Q65 38.7>, <Q31 18.7>, <Q302 'z'>, <M89 7.1>, <N281 'u'>, <L115 'z'>, <G69 16.5>, <K245 'z'>, <K271 5.7>, <Q265 'z'>, <E181 27.3>, <L81 5.6>, <G213 5.5>, <F195 8.8>, <Q63 20.6>, <F57 'z'>, <L163 13.4>, <D96 9.0>, <E29 23.0>, <D184 211.0>, <E119 9.9>, <D76 0.0>, <D193 79.0>, <D156 211.0>, <K221 9.6>, <D268 10.0>, <G274 26.0>, <N148 'u'>,

In [4]:
Sex = tab.excel_ref('D14').expand(RIGHT).is_not_whitespace()
Sex

{<P14 'Males'>, <D14 'All persons'>, <J14 'Females'>}

In [5]:
deaths = tab.excel_ref('D15').expand(RIGHT).is_not_blank()
deaths

{<K15 'Rate per 100,000 persons1'>, <J15 'Deaths'>, <F15 'Lower 95% confidence limit'>, <R15 'Lower 95% confidence limit'>, <M15 'Upper 95% confidence limit'>, <L15 'Lower 95% confidence limit'>, <E15 'Rate per 100,000 persons1'>, <Q15 'Rate per 100,000 persons1'>, <D15 'Deaths'>, <S15 'Upper 95% confidence limit'>, <P15 'Deaths'>, <G15 'Upper 95% confidence limit'>}

In [6]:
Year = tab.excel_ref('A16').expand(DOWN).is_not_blank() - tab.excel_ref('A320').expand(DOWN)  
Year

{<A168 2009.0>, <A149 2008.0>, <A301 2016.0>, <A111 2006.0>, <A244 2013.0>, <A92 2005.0>, <A73 2004.0>, <A16 2001.0>, <A54 2003.0>, <A130 2007.0>, <A225 2012.0>, <A206 2011.0>, <A187 2010.0>, <A35 2002.0>, <A263 2014.0>, <A282 2015.0>}

In [7]:
Age = tab.excel_ref('B16').expand(DOWN).is_not_blank() - tab.excel_ref('B320').expand(DOWN)  
Age

{<B71 '80-84'>, <B244 '<1'>, <B48 '60-64'>, <B232 '30-34'>, <B284 '05-09'>, <B57 '10-14'>, <B294 '55-59'>, <B286 '15-19'>, <B299 '80-84'>, <B128 '80-84'>, <B231 '25-29'>, <B29 '60-64'>, <B147 '80-84'>, <B100 '35-39'>, <B58 '15-19'>, <B314 '60-64'>, <B40 '20-24'>, <B126 '70-74'>, <B70 '75-79'>, <B130 '<1'>, <B123 '55-59'>, <B171 '10-14'>, <B315 '65-69'>, <B97 '20-24'>, <B268 '20-24'>, <B96 '15-19'>, <B104 '55-59'>, <B316 '70-74'>, <B17 '01-04'>, <B253 '40-44'>, <B220 '65-69'>, <B216 '45-49'>, <B290 '35-39'>, <B152 '10-14'>, <B27 '50-54'>, <B95 '10-14'>, <B114 '10-14'>, <B43 '35-39'>, <B245 '01-04'>, <B258 '65-69'>, <B132 '05-09'>, <B120 '40-44'>, <B311 '45-49'>, <B203 '75-79'>, <B32 '75-79'>, <B163 '65-69'>, <B176 '35-39'>, <B145 '70-74'>, <B134 '15-19'>, <B175 '30-34'>, <B167 '85+'>, <B283 '01-04'>, <B191 '15-19'>, <B164 '70-74'>, <B237 '55-59'>, <B63 '40-44'>, <B186 '85+'>, <B274 '50-54'>, <B224 '85+'>, <B204 '80-84'>, <B310 '40-44'>, <B256 '55-59'>, <B146 '75-79'>, <B83 '45-49'>, <B6

In [8]:
Dimensions = [
            HDim(Year,'Year',CLOSEST,ABOVE),
            HDim(Age,'Age',DIRECTLY,LEFT),
            HDim(Sex,'Sex',CLOSEST,LEFT),
            HDim(deaths, 'Alcohol Specific Deaths',DIRECTLY,ABOVE),
            HDimConst('Measure Type', 'Count'),
            HDimConst('Unit','People')
            ]

In [9]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
if is_interactive():
    savepreviewhtml(c1)

0,1,2,3,4
OBS,Year,Age,Sex,Alcohol Specific Deaths

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
Back to contents,,,,,,,,,,,,,,,,,,,
"Table 2: Alcohol-specific age-specific death rates per 100,000 population, deaths registered in the United Kingdom, 2001 to 20161,2,3,4,5",,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,
"1 Age-specific rates are expressed per 100,000 population",,,,,,,,,,,,,,,,,,,
2 Figures include those who were not usually resident in the United Kingdom,,,,,,,,,,,,,,,,,,,
3 Figures are for deaths registered in each calendar year,,,,,,,,,,,,,,,,,,,
"4 Age-specific rates based on fewer that 3 deaths are not presented due to low reliability are are marked 'z'; when rates are presented for fewer than 20 deaths, these are marked 'u' to show low reliability.",,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,
"5 Statistically significant differences between rates are assessed using confidence intervals (CIs), also known as confidence limits. As a general rule, if the confidence interval around an estimate overlaps with the interval around another, there is no difference between the two estimates.",,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,


In [10]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,DATAMARKER,Year,Age,Sex,Alcohol Specific Deaths,Measure Type,Unit
0,0,,2001.0,<1,All persons,Deaths,Count,People
1,,z,2001.0,<1,All persons,"Rate per 100,000 persons1",Count,People
2,,z,2001.0,<1,All persons,Lower 95% confidence limit,Count,People
3,,z,2001.0,<1,All persons,Upper 95% confidence limit,Count,People
4,0,,2001.0,<1,Females,Deaths,Count,People
5,,z,2001.0,<1,Females,"Rate per 100,000 persons1",Count,People
6,,z,2001.0,<1,Females,Lower 95% confidence limit,Count,People
7,,z,2001.0,<1,Females,Upper 95% confidence limit,Count,People
8,0,,2001.0,<1,Males,Deaths,Count,People
9,,z,2001.0,<1,Males,"Rate per 100,000 persons1",Count,People


In [11]:
new_table = new_table[new_table['OBS'] !=  0 ]

In [12]:
new_table = new_table[new_table['OBS'] !=  'z' ]

In [13]:
new_table = new_table[new_table['OBS'] !=  '' ]

In [14]:
new_table['Year'] = pd.to_numeric(new_table['Year'], errors='coerce').fillna(0)

In [15]:
new_table['Year'] = new_table['Year'].astype(int)

In [16]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [17]:
new_table.dtypes

Value                      object
DATAMARKER                 object
Year                        int32
Age                        object
Sex                        object
Alcohol Specific Deaths    object
Measure Type               object
Unit                       object
dtype: object

In [18]:
new_table

Unnamed: 0,Value,DATAMARKER,Year,Age,Sex,Alcohol Specific Deaths,Measure Type,Unit
36,1,,2001,10-14,All persons,Deaths,Count,People
40,1,,2001,10-14,Females,Deaths,Count,People
48,11,,2001,15-19,All persons,Deaths,Count,People
49,0.3,,2001,15-19,All persons,"Rate per 100,000 persons1",Count,People
50,0.1,,2001,15-19,All persons,Lower 95% confidence limit,Count,People
51,0.5,,2001,15-19,All persons,Upper 95% confidence limit,Count,People
53,1,,2001,15-19,Females,Deaths,Count,People
57,10,,2001,15-19,Males,Deaths,Count,People
58,0.5,,2001,15-19,Males,"Rate per 100,000 persons1",Count,People
59,0.3,,2001,15-19,Males,Lower 95% confidence limit,Count,People


In [19]:
nt1 = new_table[new_table['Alcohol Specific Deaths'] == 'Lower 95% confidence limit']

In [20]:
nt1

Unnamed: 0,Value,DATAMARKER,Year,Age,Sex,Alcohol Specific Deaths,Measure Type,Unit
50,0.1,,2001,15-19,All persons,Lower 95% confidence limit,Count,People
59,0.3,,2001,15-19,Males,Lower 95% confidence limit,Count,People
64,0.3,,2001,20-24,All persons,Lower 95% confidence limit,Count,People
73,0.5,,2001,20-24,Males,Lower 95% confidence limit,Count,People
78,1.2,,2001,25-29,All persons,Lower 95% confidence limit,Count,People
82,0.5,,2001,25-29,Females,Lower 95% confidence limit,Count,People
87,1.6,,2001,25-29,Males,Lower 95% confidence limit,Count,People
91,3.6,,2001,30-34,All persons,Lower 95% confidence limit,Count,People
95,1.8,,2001,30-34,Females,Lower 95% confidence limit,Count,People
99,5,,2001,30-34,Males,Lower 95% confidence limit,Count,People


In [21]:
nt1.columns = ['Lower 95% confidence limit' if x=='Value' else x for x in new_table.columns]

In [22]:
nt2 = new_table[new_table['Alcohol Specific Deaths'] == 'Upper 95% confidence limit']

In [23]:
nt2

Unnamed: 0,Value,DATAMARKER,Year,Age,Sex,Alcohol Specific Deaths,Measure Type,Unit
51,0.5,,2001,15-19,All persons,Upper 95% confidence limit,Count,People
60,1,,2001,15-19,Males,Upper 95% confidence limit,Count,People
65,0.7,,2001,20-24,All persons,Upper 95% confidence limit,Count,People
74,1.4,,2001,20-24,Males,Upper 95% confidence limit,Count,People
79,2,,2001,25-29,All persons,Upper 95% confidence limit,Count,People
83,1.3,,2001,25-29,Females,Upper 95% confidence limit,Count,People
88,3,,2001,25-29,Males,Upper 95% confidence limit,Count,People
92,4.7,,2001,30-34,All persons,Upper 95% confidence limit,Count,People
96,3.1,,2001,30-34,Females,Upper 95% confidence limit,Count,People
100,7,,2001,30-34,Males,Upper 95% confidence limit,Count,People


In [24]:
nt2.columns = ['Upper 95% confidence limit' if x=='Value' else x for x in new_table.columns]

In [25]:
new_table.count()

Value                      2768
DATAMARKER                    0
Year                       2768
Age                        2768
Sex                        2768
Alcohol Specific Deaths    2768
Measure Type               2768
Unit                       2768
dtype: int64

In [26]:
new_table = new_table[new_table['Alcohol Specific Deaths'] != 'Upper 95% confidence limit']

In [27]:
new_table = new_table[new_table['Alcohol Specific Deaths'] != 'Lower 95% confidence limit']

In [28]:
Final_table = pd.merge(new_table, nt1, how = 'inner', on = ['Year','Sex','Age'])

In [29]:
Final_table.head(2)

Unnamed: 0,Value,DATAMARKER_x,Year,Age,Sex,Alcohol Specific Deaths_x,Measure Type_x,Unit_x,Lower 95% confidence limit,DATAMARKER_y,Alcohol Specific Deaths_y,Measure Type_y,Unit_y
0,11.0,,2001,15-19,All persons,Deaths,Count,People,0.1,,Lower 95% confidence limit,Count,People
1,0.3,,2001,15-19,All persons,"Rate per 100,000 persons1",Count,People,0.1,,Lower 95% confidence limit,Count,People


In [30]:
Final_table = Final_table[['Value','Year','Age','Sex','Alcohol Specific Deaths_x','Measure Type_x','Unit_x',
                           'Lower 95% confidence limit']]

In [31]:
Final_table = pd.merge(Final_table, nt2, how = 'inner', on = ['Year','Sex','Age'])

In [32]:
Final_table.head(1)

Unnamed: 0,Value,Year,Age,Sex,Alcohol Specific Deaths_x,Measure Type_x,Unit_x,Lower 95% confidence limit,Upper 95% confidence limit,DATAMARKER,Alcohol Specific Deaths,Measure Type,Unit
0,11,2001,15-19,All persons,Deaths,Count,People,0.1,0.5,,Upper 95% confidence limit,Count,People


In [33]:
Final_table = Final_table[['Year','Sex','Age','Alcohol Specific Deaths_x','Measure Type','Value','Lower 95% confidence limit', 'Upper 95% confidence limit','Unit']]

In [34]:
Final_table

Unnamed: 0,Year,Sex,Age,Alcohol Specific Deaths_x,Measure Type,Value,Lower 95% confidence limit,Upper 95% confidence limit,Unit
0,2001,All persons,15-19,Deaths,Count,11,0.1,0.5,People
1,2001,All persons,15-19,"Rate per 100,000 persons1",Count,0.3,0.1,0.5,People
2,2001,Males,15-19,Deaths,Count,10,0.3,1,People
3,2001,Males,15-19,"Rate per 100,000 persons1",Count,0.5,0.3,1,People
4,2001,All persons,20-24,Deaths,Count,16,0.3,0.7,People
5,2001,All persons,20-24,"Rate per 100,000 persons1",Count,0.4,0.3,0.7,People
6,2001,Males,20-24,Deaths,Count,15,0.5,1.4,People
7,2001,Males,20-24,"Rate per 100,000 persons1",Count,0.8,0.5,1.4,People
8,2001,All persons,25-29,Deaths,Count,60,1.2,2,People
9,2001,All persons,25-29,"Rate per 100,000 persons1",Count,1.5,1.2,2,People


In [35]:
Final_table.columns = ['Alcohol Specific Deaths' if x=='Alcohol Specific Deaths_x' else x for x in Final_table.columns]

In [36]:
def user_perc(x,y):
    
    if str(x) == 'Deaths':
        return ''
    else:
        return y
    
Final_table['Lower 95% confidence limit'] = Final_table.apply(lambda row: user_perc(row['Alcohol Specific Deaths'],row['Lower 95% confidence limit']), axis = 1)

In [37]:
def user_perc(x,y):
    
    if str(x) == 'Deaths':
        return ''
    else:
        return y
    
Final_table['Upper 95% confidence limit'] = Final_table.apply(lambda row: user_perc(row['Alcohol Specific Deaths'],row['Upper 95% confidence limit']), axis = 1)

In [38]:
Final_table['Alcohol Specific Deaths'] = Final_table['Alcohol Specific Deaths'].str.rstrip('1')

In [39]:
Final_table['Sex'] = Final_table['Sex'].map(
    lambda x: {
        'All persons' : 'T', 
        'Males' : 'M',
        'Females': 'F'         
        }.get(x, x))

In [40]:
Final_table['Geography'] =  'K02000001'

In [41]:
if is_interactive():
    destinationFolder = Path('out')
    destinationFolder.mkdir(exist_ok=True, parents=True)
    Final_table.to_csv(destinationFolder / ('tab2.csv'), index = False)