Table 3 - Alcohol-related deaths1,2, by gender

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://webarchive.nationalarchives.gov.uk/20180328130416/http://digital.nhs.uk/media/30889/Statistics-on-Alcohol-England-2017-Tables/Any/alc-eng-2017-tab'
    inputFile = sourceFolder / 'alc-eng-2017-tab'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='Table 3')[0]

Loading in\alc-eng-2017-tab which has size 126229 bytes
Table names: ['Table 3']


In [3]:
observations = tab.excel_ref('E8').expand(DOWN).expand(RIGHT).is_not_blank()
observations

{<E11 9.0>, <G14 2.0>, <E18 7.0>, <G20 1.0>, <E21 4.0>, <E20 4.0>, <F18 7.0>, <E8 6813.0>, <F17 971.0>, <E19 313.0>, <G18 0.0>, <F16 2.0>, <F20 3.0>, <F11 6.0>, <E16 3.0>, <F21 3.0>, <G21 1.0>, <E14 8.0>, <G16 1.0>, <G12 0.0>, <G10 117.0>, <F13 62.0>, <F19 205.0>, <F10 288.0>, <E10 405.0>, <G17 602.0>, <G8 2382.0>, <F12 0.0>, <E15 4411.0>, <E17 1573.0>, <F14 6.0>, <G11 3.0>, <F15 2878.0>, <G19 108.0>, <G13 14.0>, <G15 1533.0>, <F8 4431.0>, <E12 0.0>, <E13 76.0>}

In [4]:
gr = tab.excel_ref('E6').expand(RIGHT).is_not_blank()
gr

{<F6 'Male'>, <E6 'All persons'>, <G6 'Female'>}

In [5]:
code = tab.excel_ref('C10').expand(DOWN).is_not_blank() 
code

{<C21 'Y15'>, <C15 'K70'>, <C10 'F10'>, <C17 'K74 (excluding K74.3 - K74.5, biliary cirrhosis)'>, <C19 'X45'>, <C11 'G31.2'>, <C14 'K29.2'>, <C12 'G62.1'>, <C16 'K73'>, <C20 'X65'>, <C13 'I42.6'>, <C18 'K86.0'>}

In [6]:
des = tab.excel_ref('A8').expand(DOWN).is_not_blank() - tab.excel_ref('A22')
des

{<A26 2.0>, <A17 'Fibrosis and cirrhosis of liver'>, <A25 1.0>, <A20 'Intentional self-poisoning by and exposure to alcohol'>, <A35 'Copyright © 2017. The Health and Social Care Information Centre, Lifestyles Statistics. All rights reserved.'>, <A8 'Total'>, <A24 'Footnotes'>, <A12 'Alcoholic polyneuropathy'>, <A31 'Alcohol-related deaths by sex, age group and individual cause of death, UK constituent countries, deaths registered 2001 to 2015 - Office for National Statistics'>, <A34 'Copyright © 2017, re-used with the permission of The Office for National Statistics.'>, <A27 3.0>, <A21 'Poisoning by and exposure to alcohol, undetermined intent'>, <A16 'Chronic hepatitis, not elsewhere classified'>, <A18 'Alcohol induced chronic pancreatitis'>, <A11 'Degeneration of nervous system due to alcohol'>, <A13 'Alcoholic cardiomyopathy'>, <A15 'Alcoholic liver disease'>, <A30 'Source'>, <A10 'Mental and behavioural disorders due to use of alcohol'>, <A32 'http://www.ons.gov.uk/peoplepopulation

In [7]:
Dimensions = [
            HDimConst('Category','Alcohol related deaths'),
            HDim(code,'ICD-10 Code',DIRECTLY,LEFT),
            HDim(gr,'Sex',DIRECTLY,ABOVE),
            HDimConst('Geography','E92000001'),
            HDimConst('Period','2015'),
            HDimConst('Unit','People'),
            HDimConst('Measure Type','Count'),
            HDim(des,'ICD-10 Description',DIRECTLY,LEFT)     
            ]

In [8]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [9]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Category,ICD-10 Code,Sex,Geography,Period,Unit,Measure Type,ICD-10 Description
0,6813.0,Alcohol related deaths,,All persons,E92000001,2015,People,Count,Total
1,4431.0,Alcohol related deaths,,Male,E92000001,2015,People,Count,Total
2,2382.0,Alcohol related deaths,,Female,E92000001,2015,People,Count,Total
3,405.0,Alcohol related deaths,F10,All persons,E92000001,2015,People,Count,Mental and behavioural disorders due to use of...
4,288.0,Alcohol related deaths,F10,Male,E92000001,2015,People,Count,Mental and behavioural disorders due to use of...
5,117.0,Alcohol related deaths,F10,Female,E92000001,2015,People,Count,Mental and behavioural disorders due to use of...
6,9.0,Alcohol related deaths,G31.2,All persons,E92000001,2015,People,Count,Degeneration of nervous system due to alcohol
7,6.0,Alcohol related deaths,G31.2,Male,E92000001,2015,People,Count,Degeneration of nervous system due to alcohol
8,3.0,Alcohol related deaths,G31.2,Female,E92000001,2015,People,Count,Degeneration of nervous system due to alcohol
9,0.0,Alcohol related deaths,G62.1,All persons,E92000001,2015,People,Count,Alcoholic polyneuropathy


In [10]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [11]:
new_table.head()

Unnamed: 0,Value,Category,ICD-10 Code,Sex,Geography,Period,Unit,Measure Type,ICD-10 Description
0,6813.0,Alcohol related deaths,,All persons,E92000001,2015,People,Count,Total
1,4431.0,Alcohol related deaths,,Male,E92000001,2015,People,Count,Total
2,2382.0,Alcohol related deaths,,Female,E92000001,2015,People,Count,Total
3,405.0,Alcohol related deaths,F10,All persons,E92000001,2015,People,Count,Mental and behavioural disorders due to use of...
4,288.0,Alcohol related deaths,F10,Male,E92000001,2015,People,Count,Mental and behavioural disorders due to use of...


In [12]:
new_table['ICD-10 Description'] = new_table['ICD-10 Description'].map(
    lambda x: {
        'Total' : 'All deaths' 
        }.get(x, x))

In [13]:
new_table.head()

Unnamed: 0,Value,Category,ICD-10 Code,Sex,Geography,Period,Unit,Measure Type,ICD-10 Description
0,6813.0,Alcohol related deaths,,All persons,E92000001,2015,People,Count,All deaths
1,4431.0,Alcohol related deaths,,Male,E92000001,2015,People,Count,All deaths
2,2382.0,Alcohol related deaths,,Female,E92000001,2015,People,Count,All deaths
3,405.0,Alcohol related deaths,F10,All persons,E92000001,2015,People,Count,Mental and behavioural disorders due to use of...
4,288.0,Alcohol related deaths,F10,Male,E92000001,2015,People,Count,Mental and behavioural disorders due to use of...


In [14]:
new_table.tail()

Unnamed: 0,Value,Category,ICD-10 Code,Sex,Geography,Period,Unit,Measure Type,ICD-10 Description
34,3.0,Alcohol related deaths,X65,Male,E92000001,2015,People,Count,Intentional self-poisoning by and exposure to ...
35,1.0,Alcohol related deaths,X65,Female,E92000001,2015,People,Count,Intentional self-poisoning by and exposure to ...
36,4.0,Alcohol related deaths,Y15,All persons,E92000001,2015,People,Count,"Poisoning by and exposure to alcohol, undeterm..."
37,3.0,Alcohol related deaths,Y15,Male,E92000001,2015,People,Count,"Poisoning by and exposure to alcohol, undeterm..."
38,1.0,Alcohol related deaths,Y15,Female,E92000001,2015,People,Count,"Poisoning by and exposure to alcohol, undeterm..."


In [15]:
new_table.dtypes

Value                 float64
Category               object
ICD-10 Code            object
Sex                    object
Geography              object
Period                 object
Unit                   object
Measure Type           object
ICD-10 Description     object
dtype: object

In [16]:
new_table['Value'] = new_table['Value'].astype(int)

In [17]:
new_table['Period'] = new_table['Period'].astype(int)

In [18]:
new_table.head(3)

Unnamed: 0,Value,Category,ICD-10 Code,Sex,Geography,Period,Unit,Measure Type,ICD-10 Description
0,6813,Alcohol related deaths,,All persons,E92000001,2015,People,Count,All deaths
1,4431,Alcohol related deaths,,Male,E92000001,2015,People,Count,All deaths
2,2382,Alcohol related deaths,,Female,E92000001,2015,People,Count,All deaths


In [19]:
new_table['Sex'] = new_table['Sex'].map(
    lambda x: {
        'Female' : 'F', 
        'Male' : 'M',
        'All persons' : 'T'
        }.get(x, x))

In [20]:
new_table['ICD-10 Code'].fillna('All', inplace = True)

In [21]:
new_table = new_table[['Geography','Period','Sex','Category','ICD-10 Code','ICD-10 Description','Measure Type','Value','Unit']]

In [22]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('table3.csv'), index = False)

In [23]:
new_table.head()

Unnamed: 0,Geography,Period,Category,ICD-10 Code,ICD-10 Description,Measure Type,Value,Unit
0,E92000001,2015,Alcohol related deaths,All,All deaths,Count,6813,People
1,E92000001,2015,Alcohol related deaths,All,All deaths,Count,4431,People
2,E92000001,2015,Alcohol related deaths,All,All deaths,Count,2382,People
3,E92000001,2015,Alcohol related deaths,F10,Mental and behavioural disorders due to use of...,Count,405,People
4,E92000001,2015,Alcohol related deaths,F10,Mental and behavioural disorders due to use of...,Count,288,People
