Table 2: Alcohol-specific age-specific death rates per 100,000 population, deaths registered in the United Kingdom, 2001 to 2016

In [1]:
from gssutils import *

if is_interactive():
    scraper = Scraper('https://www.ons.gov.uk/peoplepopulationandcommunity/healthandsocialcare/causesofdeath/datasets/alcoholspecificdeathsintheukmaindataset')
    sheet = scraper.distribution().as_databaker()



In [2]:
tab = next(tab for tab in sheet if tab.name=='Table 2')

In [3]:
cell = tab.filter('Year')
cell.assert_one()
Sex = cell.shift(0,-1).expand(RIGHT).is_not_whitespace().is_not_blank()
year = cell.expand(DOWN).is_not_whitespace().is_not_blank().is_number()
deaths = cell.shift(2,0).expand(RIGHT).is_not_whitespace().is_not_blank()
age = cell.shift(1,1).expand(DOWN).is_not_whitespace().is_not_blank()
observations = deaths.shift(0,1).expand(DOWN).is_not_whitespace().is_not_blank()
Dimensions = [
            HDim(year,'Year',CLOSEST,ABOVE),
            HDim(Sex,'Sex',CLOSEST,LEFT),
            HDim(age,'Age',DIRECTLY,LEFT),
            HDim(deaths, 'Alcohol Specific Deaths',DIRECTLY,ABOVE),
            HDimConst('Measure Type', 'Count'),
            HDimConst('Unit','deaths')
            ]
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
if is_interactive():
    savepreviewhtml(c1)
    
new_table = c1.topandas()

0,1,2,3,4
OBS,Year,Sex,Age,Alcohol Specific Deaths

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
,Back to contents,,,,,,,,,,,,,,,,,,,
,"Table 2: Alcohol-specific age-specific death rates per 100,000 people, deaths registered in the United Kingdom, 2001 to 20171,2,3,4,5",,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,
,Notes:,,,,,,,,,,,,,,,,,,,
,"1 Age-specific rates are expressed per 100,000 population.",,,,,,,,,,,,,,,,,,,
,2 Deaths of non-residents are included in figures for the UK.,,,,,,,,,,,,,,,,,,,
,3 Figures are for deaths registered in each calendar year.,,,,,,,,,,,,,,,,,,,
,"4 Age-specific rates based on fewer that 3 deaths are not presented due to low reliability and are marked 'z'; when rates are presented for fewer than 20 deaths, these are marked 'u' to show low reliability.",,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,





In [4]:
import numpy as np
new_table.rename(columns={'OBS': 'Value'}, inplace=True)
new_table['Year'] = new_table['Year'].apply(lambda x: pd.to_numeric(x, downcast='integer'))
new_table['Value'] = new_table['Value'].map(lambda x:
                            '' if x == 'z' else x )

In [5]:
nt1 = new_table[new_table['Alcohol Specific Deaths'] == 'Lower 95% confidence limit']

In [6]:
nt1.columns = ['Lower 95% confidence limit' if x=='Value' else x for x in new_table.columns]

In [7]:
nt2 = new_table[new_table['Alcohol Specific Deaths'] == 'Upper 95% confidence limit']

In [8]:
nt2.columns = ['Upper 95% confidence limit' if x=='Value' else x for x in new_table.columns]

In [9]:
new_table = new_table[new_table['Alcohol Specific Deaths'] != 'Upper 95% confidence limit']

In [10]:
new_table = new_table[new_table['Alcohol Specific Deaths'] != 'Lower 95% confidence limit']

In [11]:
Final_table = pd.merge(new_table, nt1, how = 'outer', on = ['Year','Sex','Age'])

In [12]:
Final_table = Final_table[['Value','Year','Age','Sex','Alcohol Specific Deaths_x','Measure Type_x','Unit_x',
                           'Lower 95% confidence limit']]

In [13]:
Final_table = pd.merge(Final_table, nt2, how = 'outer', on = ['Year','Sex','Age'])

In [14]:
Final_table = Final_table[['Year','Sex','Age','Alcohol Specific Deaths_x','Measure Type','Value','Lower 95% confidence limit', 'Upper 95% confidence limit','Unit']]

In [15]:
Final_table.columns = ['Alcohol Specific Deaths' if x=='Alcohol Specific Deaths_x' else x for x in Final_table.columns]

In [16]:
def user_perc(x,y):
    
    if str(x) == 'Deaths':
        return 0
    else:
        return y
    
Final_table['Lower 95% confidence limit'] = Final_table.apply(lambda row: user_perc(row['Alcohol Specific Deaths'],row['Lower 95% confidence limit']), axis = 1)

In [17]:
def user_perc(x,y):
    
    if str(x) == 'Deaths':
        return 0
    else:
        return y
    
Final_table['Upper 95% confidence limit'] = Final_table.apply(lambda row: user_perc(row['Alcohol Specific Deaths'],row['Upper 95% confidence limit']), axis = 1)

In [18]:
Final_table['Alcohol Specific Deaths'] = Final_table['Alcohol Specific Deaths'].str.rstrip('1')

In [19]:
Final_table['Sex'] = Final_table['Sex'].map(
    lambda x: {
        'All persons' : 'T', 
        'Males' : 'M',
        'Females': 'F'         
        }.get(x, x))

In [20]:
Final_table['Geography'] =  'K02000001'

In [21]:
Final_table['Alcohol Specific Deaths'] = Final_table['Alcohol Specific Deaths'].map(
    lambda x: {
        'Rate per 100,000 males' : 'Rate per 100,000 females', 
        'Rate per 100,000 females' : 'Rate per 100,000 males',
        }.get(x, x))

In [22]:
def user_perc(x,y):
    
    if str(x) == 'Deaths':
        return 'count'
    else:
        return 'rate-per-100-000-persons'
    
Final_table['Measure Type'] = Final_table.apply(lambda row: user_perc(row['Alcohol Specific Deaths'],row['Measure Type']), axis = 1)

In [23]:
Final_table

Unnamed: 0,Year,Sex,Age,Alcohol Specific Deaths,Measure Type,Value,Lower 95% confidence limit,Upper 95% confidence limit,Unit,Geography
0,2001,T,<1,Deaths,count,0,0,0,deaths,K02000001
1,2001,T,<1,"Rate per 100,000 people",rate-per-100-000-persons,,,,deaths,K02000001
2,2001,F,<1,Deaths,count,0,0,0,deaths,K02000001
3,2001,F,<1,"Rate per 100,000 females",rate-per-100-000-persons,,,,deaths,K02000001
4,2001,M,<1,Deaths,count,0,0,0,deaths,K02000001
5,2001,M,<1,"Rate per 100,000 males",rate-per-100-000-persons,,,,deaths,K02000001
6,2001,T,01-04,Deaths,count,0,0,0,deaths,K02000001
7,2001,T,01-04,"Rate per 100,000 people",rate-per-100-000-persons,,,,deaths,K02000001
8,2001,F,01-04,Deaths,count,0,0,0,deaths,K02000001
9,2001,F,01-04,"Rate per 100,000 females",rate-per-100-000-persons,,,,deaths,K02000001
