Table 8: Alcohol-specific Geography-specific death rates per 100,000 population, deaths registered in Regions of England, 2001 to 2016

In [1]:
from gssutils import *

if is_interactive():
    scraper = Scraper('https://www.ons.gov.uk/peoplepopulationandcommunity/healthandsocialcare/causesofdeath/datasets/alcoholspecificdeathsintheukmaindataset')
    sheet = scraper.distribution().as_databaker()



In [2]:
tab = next(tab for tab in sheet if tab.name=='Table 8')

In [3]:
cell = tab.filter('Year')
cell.assert_one()
Sex = cell.shift(0,-1).expand(RIGHT).is_not_whitespace().is_not_blank()
year = cell.expand(DOWN).is_not_whitespace().is_not_blank().is_number()
geography = cell.shift(1,1).expand(DOWN).is_not_whitespace().is_not_blank()
deaths = cell.shift(3,0).expand(RIGHT).is_not_whitespace().is_not_blank()
observations = deaths.shift(0,1).expand(DOWN).is_not_whitespace().is_not_blank()
Dimensions = [
            HDim(year,'Year',CLOSEST,ABOVE),
            HDim(Sex,'Sex',CLOSEST,LEFT),
            HDim(geography,'Geography',DIRECTLY,LEFT),
            HDim(deaths, 'Alcohol Specific Deaths',DIRECTLY,ABOVE),
            HDimConst('Measure Type', 'Count'),
            HDimConst('Unit','deaths')
            ]
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
if is_interactive():
    savepreviewhtml(c1)
    
new_table = c1.topandas()
import numpy as np
new_table.rename(columns={'OBS': 'Value'}, inplace=True)
new_table['Value'] = new_table['Value'].astype(int)
new_table['Year'] = new_table['Year'].apply(lambda x: pd.to_numeric(x, downcast='integer'))

0,1,2,3,4
OBS,Year,Sex,Geography,Alcohol Specific Deaths

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
,Back to contents,,,,,,,,,,,,,,,,,
,"Table 8: Alcohol-specific age-specific death rates per 100,000 population, deaths registered in Regions of England, 2001 to 20171,2,3,4",,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,
,Notes:,,,,,,,,,,,,,,,,,
,"1 Age-standardised rates are expressed per 100,000 population, standardised to the 2013 European Standard Population.",,,,,,,,,,,,,,,,,
,"2 Deaths of non-residents are excluded in figures for England, based on boundaries as of August 2018.",,,,,,,,,,,,,,,,,
,3 Figures are for deaths registered in each calendar year.,,,,,,,,,,,,,,,,,
,"4 The lower and upper confidence limits have been provided. These form a confidence interval, which is a measure of the statistical precision of an estimate and shows the range of uncertainty around the estimated figure. Calculations based on small numbers of events are often subject to random fluctuations. As a general rule, if the confidence interval around one figure overlaps with the interval around another, we cannot say with certainty that there is more than a chance difference between the two figures.",,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,





In [4]:
nt1 = new_table[new_table['Alcohol Specific Deaths'] == 'Lower 95% confidence limit']

In [5]:
nt1.columns = ['Lower 95% confidence limit' if x=='Value' else x for x in new_table.columns]

In [6]:
nt2 = new_table[new_table['Alcohol Specific Deaths'] == 'Upper 95% confidence limit']

In [7]:
nt2.columns = ['Upper 95% confidence limit' if x=='Value' else x for x in new_table.columns]

In [8]:
new_table = new_table[new_table['Alcohol Specific Deaths'] != 'Upper 95% confidence limit']

In [9]:
new_table = new_table[new_table['Alcohol Specific Deaths'] != 'Lower 95% confidence limit']

In [10]:
Final_table = pd.merge(new_table, nt1, how = 'inner', on = ['Year','Sex','Geography'])

In [11]:
Final_table = Final_table[['Value','Year','Geography','Sex','Alcohol Specific Deaths_x','Measure Type_x','Unit_x',
                           'Lower 95% confidence limit']]

In [12]:
Final_table = pd.merge(Final_table, nt2, how = 'inner', on = ['Year','Sex','Geography'])

In [13]:
Final_table = Final_table[['Year','Sex','Geography','Alcohol Specific Deaths_x','Measure Type','Value','Lower 95% confidence limit', 'Upper 95% confidence limit','Unit']]

In [14]:
Final_table.columns = ['Alcohol Specific Deaths' if x=='Alcohol Specific Deaths_x' else x for x in Final_table.columns]

In [15]:
def user_perc(x,y):
    
    if str(x) == 'Deaths':
        return 0
    else:
        return y
    
Final_table['Lower 95% confidence limit'] = Final_table.apply(lambda row: user_perc(row['Alcohol Specific Deaths'],row['Lower 95% confidence limit']), axis = 1)

In [16]:
def user_perc(x,y):
    
    if str(x) == 'Deaths':
        return 0
    else:
        return y
    
Final_table['Upper 95% confidence limit'] = Final_table.apply(lambda row: user_perc(row['Alcohol Specific Deaths'],row['Upper 95% confidence limit']), axis = 1)

In [17]:
Final_table['Alcohol Specific Deaths'] = Final_table['Alcohol Specific Deaths'].str.rstrip('1')

In [18]:
Final_table['Sex'] = Final_table['Sex'].map(
    lambda x: {
        'All Persons' : 'T', 
        'Males' : 'M',
        'Females': 'F'         
        }.get(x, x))

In [19]:
Final_table['Age'] =  'All'

In [20]:
def user_perc(x,y):
    
    if str(x) == 'Deaths':
        return 'count'
    else:
        return 'rate-per-100-000-persons'
    
Final_table['Measure Type'] = Final_table.apply(lambda row: user_perc(row['Alcohol Specific Deaths'],row['Measure Type']), axis = 1)

In [21]:
Final_table

Unnamed: 0,Year,Sex,Geography,Alcohol Specific Deaths,Measure Type,Value,Lower 95% confidence limit,Upper 95% confidence limit,Unit,Age
0,2001,T,E12000001,Deaths,count,261,0,0,deaths,All
1,2001,T,E12000001,"Rate per 100,000 people",rate-per-100-000-persons,10,9,12,deaths,All
2,2001,F,E12000001,Deaths,count,96,0,0,deaths,All
3,2001,F,E12000001,"Rate per 100,000 females",rate-per-100-000-persons,7,6,9,deaths,All
4,2001,M,E12000001,Deaths,count,165,0,0,deaths,All
5,2001,M,E12000001,"Rate per 100,000 males",rate-per-100-000-persons,14,12,16,deaths,All
6,2001,T,E12000002,Deaths,count,795,0,0,deaths,All
7,2001,T,E12000002,"Rate per 100,000 people",rate-per-100-000-persons,12,11,13,deaths,All
8,2001,F,E12000002,Deaths,count,278,0,0,deaths,All
9,2001,F,E12000002,"Rate per 100,000 females",rate-per-100-000-persons,8,7,9,deaths,All
