Table 3a: Number of alcohol related deaths by underlying cause of death and registration year, 2006-2016 

In [1]:
from databaker.framework import *
import pandas as pd 

In [2]:
import requests
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache
from cachecontrol.heuristics import LastModified
from pathlib import Path

session = CacheControl(requests.Session(),
                       cache=FileCache('.cache'),
                       heuristic=LastModified())

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

inputURL = 'https://www.nisra.gov.uk/sites/nisra.gov.uk/files/publications/Alcohol_Tables_16_0.xls'
inputFile = sourceFolder / 'Alcohol_Tables_16_0.xls'
response = session.get(inputURL)
with open(inputFile, 'wb') as f:
  f.write(response.content)

In [3]:
tab = loadxlstabs(inputFile, sheetids='Table 3a')[0]

Loading in\Alcohol_Tables_16_0.xls which has size 969216 bytes
Table names: ['Table 3a']


In [4]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()

In [5]:
observations

{<M9 2668.0>, <J6 36.0>, <K5 45.0>, <B8 131.0>, <B7 '-'>, <C6 2.0>, <C7 1.0>, <J8 156.0>, <M5 551.0>, <I6 25.0>, <E8 183.0>, <C5 94.0>, <E6 26.0>, <I7 0.0>, <L8 216.0>, <I8 148.0>, <J5 27.0>, <B9 210.0>, <D8 171.0>, <E5 39.0>, <K6 28.0>, <K8 209.0>, <E7 1.0>, <G5 36.0>, <M7 12.0>, <M6 255.0>, <I9 206.0>, <L7 10.0>, <L5 51.0>, <J7 0.0>, <H6 29.0>, <F5 48.0>, <F8 168.0>, <D9 243.0>, <G8 165.0>, <F6 44.0>, <J9 219.0>, <G7 0.0>, <D7 '-'>, <I5 33.0>, <G9 228.0>, <M8 1850.0>, <D6 26.0>, <B6 '-'>, <F9 260.0>, <L6 12.0>, <D5 46.0>, <H9 244.0>, <K7 0.0>, <K9 282.0>, <G6 27.0>, <H5 53.0>, <H7 0.0>, <E9 249.0>, <C9 238.0>, <L9 289.0>, <H8 162.0>, <F7 '-'>, <C8 141.0>, <B5 79.0>}

In [6]:
cause = tab.excel_ref('A5').expand(DOWN).is_not_blank() - tab.excel_ref('A10').expand(DOWN)
cause

{<A6 'Accidental poisoning by and exposure to alcohol (X45)'>, <A7 'Intentional self-poisoning by and exposure to alcohol or poisoning by and exposure to alcohol, undetermined intent (X65, Y15)'>, <A8 'All other alcohol related deaths (E24.4, G31.2, G62.1, G72.1, I42.6, K29.2, K70, K85.2, Q86.0, R78.0, K86.0)'>, <A9 'All alcohol related deaths'>, <A5 'Mental and behavioural disorders due to use of alcohol (F10)'>}

In [7]:
Year = tab.excel_ref('B4').expand(RIGHT).is_not_blank()
Year

{<J4 2014.0>, <E4 2009.0>, <I4 2013.0>, <G4 2011.0>, <K4 2015.0>, <H4 2012.0>, <B4 2006.0>, <L4 2016.0>, <D4 2008.0>, <F4 2010.0>, <C4 2007.0>}

In [8]:
Dimensions = [
            HDim(Year,'Year',DIRECTLY,ABOVE),
            HDim(cause, 'Underlying Cause (ICD-10 codes)',DIRECTLY,LEFT),
            HDimConst('Measure Type', 'Count'),
            HDimConst('Unit','People')
            ]

In [9]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
savepreviewhtml(c1)

0,1,2
OBS,Year,Underlying Cause (ICD-10 codes)

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
"Table 3a: Number of alcohol related deaths by underlying cause of death and registration year, 2006-2016 - (new definition)",,,,,,,,,,,,,,
,,,,,,,,,,,,,,
Underlying Cause (ICD-10 codes),,,,,,,,,,,,Total (2006-2016),,
,2006.0,2007.0,2008.0,2009.0,2010.0,2011.0,2012.0,2013.0,2014.0,2015.0,2016.0,,,
Mental and behavioural disorders due to use of alcohol (F10),79.0,94.0,46.0,39.0,48.0,36.0,53.0,33.0,27.0,45.0,51.0,551.0,,
Accidental poisoning by and exposure to alcohol (X45),-,2.0,26.0,26.0,44.0,27.0,29.0,25.0,36.0,28.0,12.0,255.0,,
"Intentional self-poisoning by and exposure to alcohol or poisoning by and exposure to alcohol, undetermined intent (X65, Y15)",-,1.0,-,1.0,-,0.0,0.0,0.0,0.0,0.0,10.0,12.0,,
"All other alcohol related deaths (E24.4, G31.2, G62.1, G72.1, I42.6, K29.2, K70, K85.2, Q86.0, R78.0, K86.0)",131.0,141.0,171.0,183.0,168.0,165.0,162.0,148.0,156.0,209.0,216.0,1850.0,,
All alcohol related deaths,210.0,238.0,243.0,249.0,260.0,228.0,244.0,206.0,219.0,282.0,289.0,2668.0,,
,,,,,,,,,,,,,,


In [10]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,DATAMARKER,Year,Underlying Cause (ICD-10 codes),Measure Type,Unit
0,79.0,,2006.0,Mental and behavioural disorders due to use of...,Count,People
1,94.0,,2007.0,Mental and behavioural disorders due to use of...,Count,People
2,46.0,,2008.0,Mental and behavioural disorders due to use of...,Count,People
3,39.0,,2009.0,Mental and behavioural disorders due to use of...,Count,People
4,48.0,,2010.0,Mental and behavioural disorders due to use of...,Count,People
5,36.0,,2011.0,Mental and behavioural disorders due to use of...,Count,People
6,53.0,,2012.0,Mental and behavioural disorders due to use of...,Count,People
7,33.0,,2013.0,Mental and behavioural disorders due to use of...,Count,People
8,27.0,,2014.0,Mental and behavioural disorders due to use of...,Count,People
9,45.0,,2015.0,Mental and behavioural disorders due to use of...,Count,People


In [11]:
new_table['Year'] = pd.to_numeric(new_table['Year'], errors='coerce').fillna(0)

In [12]:
new_table['Year'] = new_table['Year'].astype(int)

In [13]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [14]:
new_table['Year'] = new_table['Year'].astype(str)

In [15]:
new_table.count()

Value                              60
DATAMARKER                          4
Year                               60
Underlying Cause (ICD-10 codes)    60
Measure Type                       60
Unit                               60
dtype: int64

In [16]:
new_table['Value'].unique()

array([79.0, 94.0, 46.0, 39.0, 48.0, 36.0, 53.0, 33.0, 27.0, 45.0, 51.0,
       551.0, '', 2.0, 26.0, 44.0, 29.0, 25.0, 28.0, 12.0, 255.0, 1.0, 0.0,
       10.0, 131.0, 141.0, 171.0, 183.0, 168.0, 165.0, 162.0, 148.0, 156.0,
       209.0, 216.0, 1850.0, 210.0, 238.0, 243.0, 249.0, 260.0, 228.0,
       244.0, 206.0, 219.0, 282.0, 289.0, 2668.0], dtype=object)

In [17]:
new_table = new_table[new_table['Value'] != '']

In [18]:
new_table['Value'] = new_table['Value'].astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [19]:
new_table.dtypes

Value                               int32
DATAMARKER                         object
Year                               object
Underlying Cause (ICD-10 codes)    object
Measure Type                       object
Unit                               object
dtype: object

In [20]:
new_table.tail(5)

Unnamed: 0,Value,DATAMARKER,Year,Underlying Cause (ICD-10 codes),Measure Type,Unit
55,206,,2013,All alcohol related deaths,Count,People
56,219,,2014,All alcohol related deaths,Count,People
57,282,,2015,All alcohol related deaths,Count,People
58,289,,2016,All alcohol related deaths,Count,People
59,2668,,0,All alcohol related deaths,Count,People


In [21]:
def user_perc(x):
    
    if str(x) == '0':
        return '2006-2016'
    else:
        return x
    
new_table['Year'] = new_table.apply(lambda row: user_perc(row['Year']), axis = 1)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [22]:
new_table

Unnamed: 0,Value,DATAMARKER,Year,Underlying Cause (ICD-10 codes),Measure Type,Unit
0,79,,2006,Mental and behavioural disorders due to use of...,Count,People
1,94,,2007,Mental and behavioural disorders due to use of...,Count,People
2,46,,2008,Mental and behavioural disorders due to use of...,Count,People
3,39,,2009,Mental and behavioural disorders due to use of...,Count,People
4,48,,2010,Mental and behavioural disorders due to use of...,Count,People
5,36,,2011,Mental and behavioural disorders due to use of...,Count,People
6,53,,2012,Mental and behavioural disorders due to use of...,Count,People
7,33,,2013,Mental and behavioural disorders due to use of...,Count,People
8,27,,2014,Mental and behavioural disorders due to use of...,Count,People
9,45,,2015,Mental and behavioural disorders due to use of...,Count,People


In [23]:
new_table = new_table[['Year','Underlying Cause (ICD-10 codes)','Measure Type','Value','Unit']]

In [24]:
new_table.head(5)

Unnamed: 0,Year,Underlying Cause (ICD-10 codes),Measure Type,Value,Unit
0,2006,Mental and behavioural disorders due to use of...,Count,79,People
1,2007,Mental and behavioural disorders due to use of...,Count,94,People
2,2008,Mental and behavioural disorders due to use of...,Count,46,People
3,2009,Mental and behavioural disorders due to use of...,Count,39,People
4,2010,Mental and behavioural disorders due to use of...,Count,48,People


In [25]:
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

new_table.to_csv(destinationFolder / ('tab3a.csv'), index = False)