Table 4 -  Indices of alcohol price, retail prices, alcohol price index relative to retail prices index (all items), real household disposable income, real disposable income per adult and affordability of alcohol

In [None]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://webarchive.nationalarchives.gov.uk/20180328130416/http://digital.nhs.uk/media/30889/Statistics-on-Alcohol-England-2017-Tables/Any/alc-eng-2017-tab'
    inputFile = sourceFolder / 'alc-eng-2017-tab'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [None]:
tab = loadxlstabs(inputFile, sheetids='Table 4')[0]

In [None]:
observations = tab.excel_ref('B7').expand(DOWN).expand(RIGHT).is_not_blank()
observations

In [None]:
gr = tab.excel_ref('B5').expand(RIGHT).is_not_blank()
gr

In [None]:
code = tab.excel_ref('A7').expand(DOWN).is_not_blank() - tab.excel_ref('A44')
code

In [None]:
Dimensions = [
            HDim(code,'Period',DIRECTLY,LEFT),
            HDim(gr,'Affordability of alcohol',DIRECTLY,ABOVE),
            HDimConst('Geography','K02000001'),
            HDimConst('Unit','Indices'),
            HDimConst('Measure Type','1980 = 100')
            ]

In [None]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [None]:
new_table = c1.topandas()
new_table

In [None]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [None]:
new_table.head()

In [None]:
new_table.head()

In [None]:
new_table.tail()

In [None]:
new_table.dtypes

In [None]:
new_table['Value'] = new_table['Value'].astype(str)

In [None]:
new_table = new_table[new_table['Value'] != '' ]

In [None]:
new_table['Period'] = pd.to_numeric(new_table['Period'], errors='coerce').fillna(0)

In [None]:
new_table['Period'] = new_table['Period'].astype(int)

In [None]:
new_table.head(3)

In [None]:
new_table['Affordability of alcohol'] = new_table['Affordability of alcohol'].str.rstrip('123')

In [None]:
new_table = new_table[['Geography','Period','Affordability of alcohol','Measure Type','Value','Unit']]

In [None]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('table4.csv'), index = False)

In [None]:
new_table.head()