Table 5a(i):  Number of alcohol related deaths by deprivation quintile NIMDM101 and death rate per 100,000 population, 2012-2016 

In [1]:
from databaker.framework import *
import pandas as pd 

In [2]:
import requests
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache
from cachecontrol.heuristics import LastModified
from pathlib import Path

session = CacheControl(requests.Session(),
                       cache=FileCache('.cache'),
                       heuristic=LastModified())

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

inputURL = 'https://www.nisra.gov.uk/sites/nisra.gov.uk/files/publications/Alcohol_Tables_16_0.xls'
inputFile = sourceFolder / 'Alcohol_Tables_16_0.xls'
response = session.get(inputURL)
with open(inputFile, 'wb') as f:
  f.write(response.content)

In [3]:
tab = loadxlstabs(inputFile, sheetids='Table 5a(i)')[0]

Loading in\Alcohol_Tables_16_0.xls which has size 969216 bytes
Table names: ['Table 5a(i)']


In [4]:
observations = tab.excel_ref('B6').expand(DOWN).expand(RIGHT).is_not_blank()

In [5]:
observations

{<F8 13620.0>, <B8 15960.0>, <D8 15584.0>, <B6 462.0>, <B10 26.629776932388033>, <E8 14014.0>, <D6 208.0>, <E10 8.006590688335029>, <C10 16.20451807550026>, <F10 6.559429582797263>, <C8 16202.0>, <D10 10.610620823343366>, <C6 304.0>, <F6 114.0>, <E6 152.0>}

In [6]:
cod = tab.excel_ref('A6').expand(DOWN).is_not_blank() - tab.excel_ref('A11').expand(DOWN)
cod

{<A8 'All deaths'>, <A10 'Rate per 100,000 population'>, <A6 'Alcohol related deaths'>}

In [7]:
dq = tab.excel_ref('B4').expand(RIGHT).is_not_blank()
dq

{<F4 'Least Deprived\n5'>, <C4 2.0>, <D4 3.0>, <E4 4.0>, <B4 'Most Deprived\n1'>}

In [8]:
Dimensions = [
            HDim(cod,'Cause of Death',DIRECTLY,LEFT),
            HDim(dq, 'Deprivation Quintile Rate',DIRECTLY,ABOVE),
            HDimConst('Measure Type', 'Count'),
            HDimConst('Unit','People')
            ]

In [9]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
savepreviewhtml(c1)

0,1,2
OBS,Cause of Death,Deprivation Quintile Rate

0,1,2,3,4,5,6,7,8,9,10
"Table 5a(i): Number of alcohol related deaths by deprivation quintile NIMDM101 and death rate per 100,000 population, 2012-2016 - (new definition)",,,,,,,,,,
,,,,,,,,,,
Cause of Death,Deprivation Quintile1,,,,,,,,,
,Most Deprived 1,2.0,3.0,4.0,Least Deprived 5,,,,,
,,,,,,,,,,
Alcohol related deaths,462.0,304.0,208.0,152.0,114.0,,,,,
,,,,,,,,,,
All deaths,15960.0,16202.0,15584.0,14014.0,13620.0,,,,,
,,,,,,,,,,
"Rate per 100,000 population",26.629776932388033,16.20451807550026,10.610620823343366,8.006590688335029,6.559429582797263,,,,,


In [10]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Cause of Death,Deprivation Quintile Rate,Measure Type,Unit
0,462.0,Alcohol related deaths,Most Deprived\n1,Count,People
1,304.0,Alcohol related deaths,2.0,Count,People
2,208.0,Alcohol related deaths,3.0,Count,People
3,152.0,Alcohol related deaths,4.0,Count,People
4,114.0,Alcohol related deaths,Least Deprived\n5,Count,People
5,15960.0,All deaths,Most Deprived\n1,Count,People
6,16202.0,All deaths,2.0,Count,People
7,15584.0,All deaths,3.0,Count,People
8,14014.0,All deaths,4.0,Count,People
9,13620.0,All deaths,Least Deprived\n5,Count,People


In [11]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [12]:
new_table.count()

Value                        15
Cause of Death               15
Deprivation Quintile Rate    15
Measure Type                 15
Unit                         15
dtype: int64

In [13]:
new_table['Value'] = new_table['Value'].astype(int)

In [14]:
new_table.dtypes

Value                         int32
Cause of Death               object
Deprivation Quintile Rate    object
Measure Type                 object
Unit                         object
dtype: object

In [15]:
new_table.tail(5)

Unnamed: 0,Value,Cause of Death,Deprivation Quintile Rate,Measure Type,Unit
10,26,"Rate per 100,000 population",Most Deprived\n1,Count,People
11,16,"Rate per 100,000 population",2.0,Count,People
12,10,"Rate per 100,000 population",3.0,Count,People
13,8,"Rate per 100,000 population",4.0,Count,People
14,6,"Rate per 100,000 population",Least Deprived\n5,Count,People


In [16]:
new_table['Deprivation Quintile Rate'] = new_table['Deprivation Quintile Rate'].map(
    lambda x: {
        'Most Deprived\n1' : '1', 
        'Least Deprived\n5' : '5'        
       }.get(x, x))


In [17]:
new_table['Deprivation Quintile Rate'] = pd.to_numeric(new_table['Deprivation Quintile Rate'], errors='coerce').fillna(0)

In [18]:
new_table['Deprivation Quintile Rate'] = new_table['Deprivation Quintile Rate'].astype(int)

In [19]:
new_table

Unnamed: 0,Value,Cause of Death,Deprivation Quintile Rate,Measure Type,Unit
0,462,Alcohol related deaths,1,Count,People
1,304,Alcohol related deaths,2,Count,People
2,208,Alcohol related deaths,3,Count,People
3,152,Alcohol related deaths,4,Count,People
4,114,Alcohol related deaths,5,Count,People
5,15960,All deaths,1,Count,People
6,16202,All deaths,2,Count,People
7,15584,All deaths,3,Count,People
8,14014,All deaths,4,Count,People
9,13620,All deaths,5,Count,People


In [20]:
new_table = new_table[['Cause of Death','Deprivation Quintile Rate','Measure Type','Value','Unit']]

In [21]:
new_table.head(5)

Unnamed: 0,Cause of Death,Deprivation Quintile Rate,Measure Type,Value,Unit
0,Alcohol related deaths,1,Count,462,People
1,Alcohol related deaths,2,Count,304,People
2,Alcohol related deaths,3,Count,208,People
3,Alcohol related deaths,4,Count,152,People
4,Alcohol related deaths,5,Count,114,People


In [22]:
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

new_table.to_csv(destinationFolder / ('tab5ai.csv'), index = False)