Table 4a: Number of alcohol related deaths by Health and Social Care Trust and registration year, 2006-2016

In [1]:
from databaker.framework import *
import pandas as pd 

In [2]:
import requests
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache
from cachecontrol.heuristics import LastModified
from pathlib import Path

session = CacheControl(requests.Session(),
                       cache=FileCache('.cache'),
                       heuristic=LastModified())

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

inputURL = 'https://www.nisra.gov.uk/sites/nisra.gov.uk/files/publications/Alcohol_Tables_16_0.xls'
inputFile = sourceFolder / 'Alcohol_Tables_16_0.xls'
response = session.get(inputURL)
with open(inputFile, 'wb') as f:
  f.write(response.content)

In [3]:
tab = loadxlstabs(inputFile, sheetids='Table 4a')[0]

Loading in\Alcohol_Tables_16_0.xls which has size 969216 bytes
Table names: ['Table 4a']


In [4]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()

In [5]:
observations

{<G14 282.0>, <B5 60.0>, <E5 28.0>, <E14 44.0>, <G16 2668.0>, <F10 36.0>, <B9 71.0>, <C15 62.0>, <F6 41.0>, <D15 50.0>, <B13 61.0>, <F14 56.0>, <E6 38.0>, <G11 244.0>, <G6 238.0>, <G7 243.0>, <E7 29.0>, <D11 43.0>, <E9 44.0>, <G15 289.0>, <D6 36.0>, <B6 74.0>, <F12 42.0>, <C7 63.0>, <F8 48.0>, <B10 67.0>, <G5 210.0>, <G8 249.0>, <F5 38.0>, <G13 219.0>, <B15 88.0>, <C11 38.0>, <D10 47.0>, <F16 493.0>, <D13 38.0>, <F7 30.0>, <E8 37.0>, <B8 67.0>, <E12 31.0>, <E11 41.0>, <C8 52.0>, <E13 36.0>, <F9 62.0>, <D16 457.0>, <G10 228.0>, <D7 46.0>, <B12 55.0>, <B14 69.0>, <F15 55.0>, <E10 33.0>, <C16 563.0>, <G12 206.0>, <E16 395.0>, <C9 45.0>, <B11 73.0>, <C12 46.0>, <D14 47.0>, <D9 38.0>, <F11 49.0>, <B16 760.0>, <C10 45.0>, <C5 49.0>, <D12 32.0>, <F13 36.0>, <G9 260.0>, <D5 35.0>, <C6 49.0>, <C13 48.0>, <D8 45.0>, <B7 75.0>, <C14 66.0>, <E15 34.0>}

In [6]:
Year = tab.excel_ref('A5').expand(DOWN).is_not_blank() - tab.excel_ref('A17').expand(DOWN)
Year

{<A8 2009.0>, <A11 2012.0>, <A7 2008.0>, <A15 2016.0>, <A14 2015.0>, <A9 2010.0>, <A5 2006.0>, <A16 'Total (2006-2016)'>, <A10 2011.0>, <A6 2007.0>, <A13 2014.0>, <A12 2013.0>}

In [7]:
sct = tab.excel_ref('B4').expand(RIGHT).is_not_blank()
sct

{<D4 'South Eastern'>, <C4 'Northern'>, <F4 'Western'>, <B4 'Belfast'>, <E4 ' Southern'>}

In [8]:
Dimensions = [
            HDim(Year,'Year',DIRECTLY,LEFT),
            HDim(sct, 'Social Care Trust',DIRECTLY,ABOVE),
            HDimConst('Measure Type', 'Count'),
            HDimConst('Unit','People')
            ]

In [9]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
savepreviewhtml(c1)

0,1,2
OBS,Year,Social Care Trust

0,1,2,3,4,5,6
"Table 4a: Number of alcohol related deaths by Health and Social Care Trust and registration year, 2006-2016 - (new definition)",,,,,,
,,,,,,
Registration Year,Health and Social Care Trust,,,,,Total
,Belfast,Northern,South Eastern,Southern,Western,
2006.0,60.0,49.0,35.0,28.0,38.0,210.0
2007.0,74.0,49.0,36.0,38.0,41.0,238.0
2008.0,75.0,63.0,46.0,29.0,30.0,243.0
2009.0,67.0,52.0,45.0,37.0,48.0,249.0
2010.0,71.0,45.0,38.0,44.0,62.0,260.0
2011.0,67.0,45.0,47.0,33.0,36.0,228.0


In [10]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Year,Social Care Trust,Measure Type,Unit
0,60.0,2006.0,Belfast,Count,People
1,49.0,2006.0,Northern,Count,People
2,35.0,2006.0,South Eastern,Count,People
3,28.0,2006.0,Southern,Count,People
4,38.0,2006.0,Western,Count,People
5,210.0,2006.0,,Count,People
6,74.0,2007.0,Belfast,Count,People
7,49.0,2007.0,Northern,Count,People
8,36.0,2007.0,South Eastern,Count,People
9,38.0,2007.0,Southern,Count,People


In [11]:
new_table['Social Care Trust'].fillna('Total', inplace = True)

In [12]:
new_table['Year'] = pd.to_numeric(new_table['Year'], errors='coerce').fillna(0)

In [13]:
new_table['Year'] = new_table['Year'].astype(int)

In [14]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [15]:
new_table['Year'] = new_table['Year'].astype(str)

In [16]:
new_table.count()

Value                72
Year                 72
Social Care Trust    72
Measure Type         72
Unit                 72
dtype: int64

In [17]:
new_table['Value'] = new_table['Value'].astype(int)

In [18]:
new_table.dtypes

Value                 int32
Year                 object
Social Care Trust    object
Measure Type         object
Unit                 object
dtype: object

In [19]:
new_table.tail(5)

Unnamed: 0,Value,Year,Social Care Trust,Measure Type,Unit
67,563,0,Northern,Count,People
68,457,0,South Eastern,Count,People
69,395,0,Southern,Count,People
70,493,0,Western,Count,People
71,2668,0,Total,Count,People


In [20]:
def user_perc(x):
    
    if str(x) == '0':
        return '2006-2016'
    else:
        return x
    
new_table['Year'] = new_table.apply(lambda row: user_perc(row['Year']), axis = 1)


In [21]:
new_table

Unnamed: 0,Value,Year,Social Care Trust,Measure Type,Unit
0,60,2006,Belfast,Count,People
1,49,2006,Northern,Count,People
2,35,2006,South Eastern,Count,People
3,28,2006,Southern,Count,People
4,38,2006,Western,Count,People
5,210,2006,Total,Count,People
6,74,2007,Belfast,Count,People
7,49,2007,Northern,Count,People
8,36,2007,South Eastern,Count,People
9,38,2007,Southern,Count,People


In [22]:
new_table = new_table[['Year','Social Care Trust','Measure Type','Value','Unit']]

In [23]:
new_table.head(5)

Unnamed: 0,Year,Social Care Trust,Measure Type,Value,Unit
0,2006,Belfast,Count,60,People
1,2006,Northern,Count,49,People
2,2006,South Eastern,Count,35,People
3,2006,Southern,Count,28,People
4,2006,Western,Count,38,People


In [24]:
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

new_table.to_csv(destinationFolder / ('tab4a.csv'), index = False)