Table 3 - Alcohol-related deaths1,2, by gender

In [None]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://webarchive.nationalarchives.gov.uk/20180328130416/http://digital.nhs.uk/media/30889/Statistics-on-Alcohol-England-2017-Tables/Any/alc-eng-2017-tab'
    inputFile = sourceFolder / 'alc-eng-2017-tab'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [None]:
tab = loadxlstabs(inputFile, sheetids='Table 3')[0]

In [None]:
observations = tab.excel_ref('E8').expand(DOWN).expand(RIGHT).is_not_blank()
observations

In [None]:
gr = tab.excel_ref('E6').expand(RIGHT).is_not_blank()
gr

In [None]:
code = tab.excel_ref('C10').expand(DOWN).is_not_blank() 
code

In [None]:
des = tab.excel_ref('A8').expand(DOWN).is_not_blank() - tab.excel_ref('A22')
des

In [None]:
Dimensions = [
            HDimConst('Category','Alcohol related deaths'),
            HDim(code,'ICD-10 Code',DIRECTLY,LEFT),
            HDim(gr,'Sex',DIRECTLY,LEFT),
            HDimConst('Geography','E92000001'),
            HDimConst('Period','2015'),
            HDimConst('Unit','People'),
            HDimConst('Measure Type','Count'),
            HDim(des,'ICD-10 Description',DIRECTLY,LEFT)     
            ]

In [None]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [None]:
new_table = c1.topandas()
new_table

In [None]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [None]:
new_table.head()

In [None]:
new_table['ICD-10 Description'] = new_table['ICD-10 Description'].map(
    lambda x: {
        'Total' : 'All deaths' 
        }.get(x, x))

In [None]:
new_table.head()

In [None]:
new_table.tail()

In [None]:
new_table.dtypes

In [None]:
new_table['Value'] = new_table['Value'].astype(int)

In [None]:
new_table['Period'] = new_table['Period'].astype(int)

In [None]:
new_table.head(3)

In [None]:
new_table = new_table[['Geography','Period','Category','ICD-10 Code','ICD-10 Description','Measure Type','Value','Unit']]

In [None]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('table3.csv'), index = False)

In [None]:
new_table.head()