Table 5: Alcohol-specific age-standardised death rates per 100,000 population, deaths registered in Northern Ireland, 2001 to 2016

In [1]:
from gssutils import *

if is_interactive():
    scraper = Scraper('https://www.ons.gov.uk/peoplepopulationandcommunity/healthandsocialcare/causesofdeath/datasets/alcoholspecificdeathsintheukmaindataset')
    sheet = scraper.distribution().as_databaker()



In [2]:
tab = next(tab for tab in sheet if tab.name=='Table 5')

In [3]:
cell = tab.filter('Year')
cell.assert_one()
Sex = cell.shift(0,-1).expand(RIGHT).is_not_whitespace().is_not_blank()
year = cell.expand(DOWN).is_not_whitespace().is_not_blank().is_number()
deaths = cell.shift(1,0).expand(RIGHT).is_not_whitespace().is_not_blank()
observations = deaths.shift(0,1).expand(DOWN).is_not_whitespace().is_not_blank()
Dimensions = [
            HDim(year,'Year',DIRECTLY,LEFT),
            HDim(Sex,'Sex',CLOSEST,LEFT),
            HDim(deaths, 'Alcohol Specific Deaths',DIRECTLY,ABOVE),
            HDimConst('Measure Type', 'Count'),
            HDimConst('Unit','deaths')
            ]
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
if is_interactive():
    savepreviewhtml(c1)
    
new_table = c1.topandas()
import numpy as np
new_table.rename(columns={'OBS': 'Value'}, inplace=True)
new_table['Value'] = new_table['Value'].astype(int)
new_table['Year'] = new_table['Year'].apply(lambda x: pd.to_numeric(x, downcast='integer'))

0,1,2,3
OBS,Year,Sex,Alcohol Specific Deaths

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
,Back to contents,,,,,,,,,,,,,,,
,"Table 5: Alcohol-specific age-standardised death rates per 100,000 population, deaths registered in Northern Ireland, 2001 to 20171,2,3,4",,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,
,,,All Persons,,,,,Females,,,,,Males,,,
,Year,,Deaths,"Rate per 100,000 people1",Lower 95% confidence limit,Upper 95% confidence limit,,Deaths,"Rate per 100,000 females1",Lower 95% confidence limit,Upper 95% confidence limit,,Deaths,"Rate per 100,000 males1",Lower 95% confidence limit,Upper 95% confidence limit
,2001.0,,178.0,12.4,10.6,14.3,,61.0,8.2,6.3,10.5,,117.0,17.0,13.9,20.1
,2002.0,,194.0,13.5,11.6,15.5,,53.0,7.4,5.5,9.7,,141.0,20.5,16.9,24.1
,2003.0,,175.0,12.1,10.3,14.0,,63.0,8.4,6.4,10.7,,112.0,16.2,13.2,19.2
,2004.0,,204.0,13.5,11.7,15.4,,62.0,8.1,6.2,10.4,,142.0,19.3,16.1,22.5





In [4]:
nt1 = new_table[new_table['Alcohol Specific Deaths'] == 'Lower 95% confidence limit']

In [5]:
nt1.columns = ['Lower 95% confidence limit' if x=='Value' else x for x in new_table.columns]

In [6]:
nt2 = new_table[new_table['Alcohol Specific Deaths'] == 'Upper 95% confidence limit']

In [7]:
nt2.columns = ['Upper 95% confidence limit' if x=='Value' else x for x in new_table.columns]

In [8]:
new_table = new_table[new_table['Alcohol Specific Deaths'] != 'Upper 95% confidence limit']

In [9]:
new_table = new_table[new_table['Alcohol Specific Deaths'] != 'Lower 95% confidence limit']

In [10]:
Final_table = pd.merge(new_table, nt1, how = 'inner', on = ['Year','Sex'])

In [11]:
Final_table = Final_table[['Value','Year','Sex','Alcohol Specific Deaths_x','Measure Type_x','Unit_x',
                           'Lower 95% confidence limit']]

In [12]:
Final_table = pd.merge(Final_table, nt2, how = 'inner', on = ['Year','Sex'])

In [13]:
Final_table = Final_table[['Year','Sex','Alcohol Specific Deaths_x','Measure Type','Value','Lower 95% confidence limit', 'Upper 95% confidence limit','Unit']]

In [14]:
Final_table.columns = ['Alcohol Specific Deaths' if x=='Alcohol Specific Deaths_x' else x for x in Final_table.columns]

In [15]:
def user_perc(x,y):
    
    if str(x) == 'Deaths':
        return ''
    else:
        return y
    
Final_table['Lower 95% confidence limit'] = Final_table.apply(lambda row: user_perc(row['Alcohol Specific Deaths'],row['Lower 95% confidence limit']), axis = 1)

In [16]:
def user_perc(x,y):
    
    if str(x) == 'Deaths':
        return ''
    else:
        return y
    
Final_table['Upper 95% confidence limit'] = Final_table.apply(lambda row: user_perc(row['Alcohol Specific Deaths'],row['Upper 95% confidence limit']), axis = 1)

In [17]:
Final_table['Alcohol Specific Deaths'] = Final_table['Alcohol Specific Deaths'].str.rstrip('1')

In [18]:
Final_table['Sex'] = Final_table['Sex'].map(
    lambda x: {
        'All Persons' : 'T', 
        'Males' : 'M',
        'Females': 'F'         
        }.get(x, x))

In [19]:
Final_table['Age'] =  'All'

In [20]:
Final_table['Geography'] =  'N92000002'

In [21]:
def user_perc(x,y):
    
    if str(x) == 'Deaths':
        return 'Count'
    else:
        return 'Rate per 100,000 persons'
    
Final_table['Measure Type'] = Final_table.apply(lambda row: user_perc(row['Alcohol Specific Deaths'],row['Measure Type']), axis = 1)

In [22]:
Final_table

Unnamed: 0,Year,Sex,Alcohol Specific Deaths,Measure Type,Value,Lower 95% confidence limit,Upper 95% confidence limit,Unit,Age,Geography
0,2001,T,Deaths,Count,178,,,deaths,All,N92000002
1,2001,T,"Rate per 100,000 people","Rate per 100,000 persons",12,10,14,deaths,All,N92000002
2,2001,F,Deaths,Count,61,,,deaths,All,N92000002
3,2001,F,"Rate per 100,000 females","Rate per 100,000 persons",8,6,10,deaths,All,N92000002
4,2001,M,Deaths,Count,117,,,deaths,All,N92000002
5,2001,M,"Rate per 100,000 males","Rate per 100,000 persons",17,13,20,deaths,All,N92000002
6,2002,T,Deaths,Count,194,,,deaths,All,N92000002
7,2002,T,"Rate per 100,000 people","Rate per 100,000 persons",13,11,15,deaths,All,N92000002
8,2002,F,Deaths,Count,53,,,deaths,All,N92000002
9,2002,F,"Rate per 100,000 females","Rate per 100,000 persons",7,5,9,deaths,All,N92000002
