In [162]:
#ONS - Children living in long-term workless households, by disability status (Table H)
#tabel a - e

from gssutils import *
import panas as pd

scraper = Scraper('https://www.ons.gov.uk/employmentandlabourmarket/peoplenotinwork/unemployment/datasets/hchildrenlivinginlongtermworklesshouseholdsandworklesshouseholdsbydisabilitystatus')
tabs = {tab.name: tab for tab in scraper.distribution(latest=True, mediaType=Excel).as_databaker()}

In [163]:
year_options = ['2006.0', '2007.0', '2008.0', '2009.0', '20103', '2011.0', '2012 r', 
                '20134 r', '2014 r', '2015 r', '2016 r', '2017 r', '2018.0']

household_disability_status = ['No disabled adults in household',
                        'Some adults in household are disabled',
                        'All adults in household are disabled', 'Total'] #is total required here ?

workless_household_type = ['Children in long-term workless households1'
                           , 'Children in workless households2'
                           , 'All children5']
count_type = ['Thousands', 'percent']

In [164]:
tab = tabs['CILTWH H']
ref_cell = tab.filter('Table a')
ref_cell.assert_one()

{<A3 'Table a'>}

In [165]:
year = ref_cell.fill(DOWN).one_of(year_options)
household_type = tab.excel_ref('B2').fill(DOWN).one_of(workless_household_type)
disability_status = tab.excel_ref('B4').expand(RIGHT).expand(DOWN).one_of(household_disability_status)
count = tab.excel_ref('E2').expand(DOWN).one_of(count_type)
observations = tab.excel_ref('B6').expand(RIGHT).expand(DOWN).is_not_blank().is_not_whitespace().is_number()

In [166]:
Dimensions = [
                HDim(year,'Year',DIRECTLY,LEFT),
                HDim(disability_status,'Disability Status',DIRECTLY,ABOVE),
                HDim(household_type,'Household Type',CLOSEST,ABOVE),
                HDim(count, 'Unit', CLOSEST, ABOVE)
            ]

In [167]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
new_table = c1.topandas()
#savepreviewhtml(c1, fname="Preview.html")




In [168]:
new_table = new_table[~new_table['Unit'].isin(['percent'])]
new_table.rename(columns={'OBS': 'Value'}, inplace=True)
new_table['Value'] = new_table['Value'].astype(int)
new_table['Year'] = new_table['Year'].str[:4]
new_table['Year'] = new_table['Year'].apply(lambda x: pd.to_numeric(x, downcast='integer'))
new_table['Household Type'] = new_table['Household Type'].str[:-1]

In [169]:
new_table

Unnamed: 0,Value,Year,Disability Status,Household Type,Unit
0,959098,2006,No disabled adults in household,Children in long-term workless households,Thousands
1,280765,2006,Some adults in household are disabled,Children in long-term workless households,Thousands
2,304317,2006,All adults in household are disabled,Children in long-term workless households,Thousands
3,1544180,2006,Total,Children in long-term workless households,Thousands
4,972888,2007,No disabled adults in household,Children in long-term workless households,Thousands
5,291156,2007,Some adults in household are disabled,Children in long-term workless households,Thousands
6,290042,2007,All adults in household are disabled,Children in long-term workless households,Thousands
7,1554086,2007,Total,Children in long-term workless households,Thousands
8,953774,2008,No disabled adults in household,Children in long-term workless households,Thousands
9,312141,2008,Some adults in household are disabled,Children in long-term workless households,Thousands


Each statistical dimension has a set of possible values, typically a classification scheme of some form. Each of these values should have an identifier and a label. The set of possible values for a given dimension is held in a codelist.

