Table 5:  Number of alcohol related deaths by deprivation quintile NIMDM171 and death rate per 100,000 population

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://www.nisra.gov.uk/sites/nisra.gov.uk/files/publications/Alcohol_Tables_17.xls'
    inputFile = sourceFolder / 'Alcohol_Tables_17.xls'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)
    tab = loadxlstabs(inputFile, sheetids='Table 5')[0]

Loading in\Alcohol_Tables_17.xls which has size 900608 bytes
Table names: ['Table 5']


In [2]:
cell = tab.filter('Cause of Death')
dp = cell.fill(RIGHT).is_not_blank().is_not_blank().is_not_whitespace() |\
        cell.shift(0,1).fill(RIGHT).is_not_blank().is_not_whitespace()
cod = cell.fill(DOWN).is_not_blank().is_not_whitespace()
observations = cod.fill(RIGHT).is_not_blank().is_not_whitespace()
# Dimensions = [
#             HDim(Year,'Year',DIRECTLY,LEFT),
#             HDim(age, 'Age',DIRECTLY,ABOVE),
#             HDimConst('Measure Type', 'Count'),
#             HDimConst('Unit','People'),
#             HDimConst('Sex', 'T'),
#             HDimConst('Underlying Cause of Death', 'all-alcohol-related-deaths'),
#             HDimConst('Health and Social Care Trust', 'all')
#             ]
# c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)
# table = c1.topandas()

In [3]:
Dimensions = [
            HDim(cod,'Cause of Death',DIRECTLY,LEFT),
            HDim(dp, 'Deprivation Quintile',DIRECTLY,ABOVE),
            HDimConst('Measure Type', 'Count'),
            HDimConst('Unit','People')
            ]

In [4]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
if is_interactive():
    savepreviewhtml(c1)

0,1,2
OBS,Cause of Death,Deprivation Quintile

0,1,2,3,4,5,6,7,8,9,10
"Table 5: Number of alcohol related deaths by deprivation quintile NIMDM171 and death rate per 100,000 population, 2013-2017",,,,,,,,,,
,,,,,,,,,,
Cause of Death,Deprivation Quintile1,,,,,,,,,
,Most Deprived 1,2.0,3.0,4.0,Least Deprived 5,,,,,
,,,,,,,,,,
Alcohol related deaths,461.0,324.0,211.0,182.0,121.0,,,,,
,,,,,,,,,,
All deaths,15811.0,16229.0,15465.0,14804.0,14351.0,,,,,
,,,,,,,,,,
"Rate per 100,000 population",26.647491262975482,17.161953220964914,10.824287443826565,9.450804408852184,6.867827802556195,,,,,


In [5]:
new_table = c1.topandas()




In [6]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [7]:
new_table['Value'] = new_table['Value'].astype(int)

In [8]:
new_table['Deprivation Quintile'] = new_table['Deprivation Quintile'].map(
    lambda x: {
        'Most Deprived\n1' : '1', 
        'Least Deprived\n5' : '5'        
       }.get(x, x))

In [9]:
new_table['Deprivation Quintile'] = pd.to_numeric(new_table['Deprivation Quintile'], errors='coerce').fillna(0)

In [10]:
new_table['Deprivation Quintile'] = new_table['Deprivation Quintile'].astype(int)

In [11]:
def user_perc(x,y):
    
    if str(x) == 'Rate per 100,000 population':
        return 'rate-per-100-000-persons'
    else:
        return y
    
new_table['Measure Type'] = new_table.apply(lambda row: user_perc(row['Cause of Death'],row['Measure Type']), axis = 1)

In [12]:
new_table['Cause of Death'] = new_table['Cause of Death'].map(
    lambda x: {
        'Alcohol related deaths' : 'alcohol-related-deaths', 
        'All deaths' : 'all-deaths',
       'Rate per 100,000 population' : 'alcohol-related-deaths'       
       }.get(x, x))

In [13]:
new_table['Period'] = 'gregorian-interval/20013-01-01T00:00:00/P4Y'

In [14]:
new_table = new_table[['Period','Cause of Death','Deprivation Quintile','Measure Type','Value','Unit']]

In [15]:
if is_interactive():
    destinationFolder = Path('out')
    destinationFolder.mkdir(exist_ok=True, parents=True)

    new_table.to_csv(destinationFolder / ('table5.csv'), index = False)

In [16]:
new_table

Unnamed: 0,Period,Cause of Death,Deprivation Quintile,Measure Type,Value,Unit
0,gregorian-interval/20013-01-01T00:00:00/P4Y,alcohol-related-deaths,1,Count,461,People
1,gregorian-interval/20013-01-01T00:00:00/P4Y,alcohol-related-deaths,2,Count,324,People
2,gregorian-interval/20013-01-01T00:00:00/P4Y,alcohol-related-deaths,3,Count,211,People
3,gregorian-interval/20013-01-01T00:00:00/P4Y,alcohol-related-deaths,4,Count,182,People
4,gregorian-interval/20013-01-01T00:00:00/P4Y,alcohol-related-deaths,5,Count,121,People
5,gregorian-interval/20013-01-01T00:00:00/P4Y,all-deaths,1,Count,15811,People
6,gregorian-interval/20013-01-01T00:00:00/P4Y,all-deaths,2,Count,16229,People
7,gregorian-interval/20013-01-01T00:00:00/P4Y,all-deaths,3,Count,15465,People
8,gregorian-interval/20013-01-01T00:00:00/P4Y,all-deaths,4,Count,14804,People
9,gregorian-interval/20013-01-01T00:00:00/P4Y,all-deaths,5,Count,14351,People
