Table 4 -  Indices of alcohol price, retail prices, alcohol price index relative to retail prices index (all items), real household disposable income, real disposable income per adult and affordability of alcohol

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://webarchive.nationalarchives.gov.uk/20180328130416/http://digital.nhs.uk/media/30889/Statistics-on-Alcohol-England-2017-Tables/Any/alc-eng-2017-tab'
    inputFile = sourceFolder / 'alc-eng-2017-tab'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='Table 4')[0]

Loading in\alc-eng-2017-tab which has size 126229 bytes
Table names: ['Table 4']


In [3]:
observations = tab.excel_ref('B7').expand(DOWN).expand(RIGHT).is_not_blank()
observations

{<G37 99.2045931847103>, <D26 124.24031437637674>, <C43 100.0>, <G33 116.7303374430856>, <F27 147.93177510156977>, <F36 112.88724181250822>, <C23 254.77189988623437>, <C22 259.2599544937429>, <C28 223.05631399317406>, <B21 329.3361746824968>, <E20 203.27952158749483>, <C11 363.0836177474402>, <F24 160.48438099393778>, <G23 136.73829594151826>, <F17 186.93798081945837>, <G19 149.82272019600467>, <B33 208.41966451284338>, <F40 100.53328988985135>, <B18 349.6198491262692>, <C12 351.8634812286689>, <B28 276.8174491517331>, <C10 374.1541524459613>, <E38 111.1752601223082>, <C16 309.07736063708757>, <F41 98.72953342257956>, <B42 116.91122640608586>, <F10 185.02899677896764>, <F16 190.2199131171028>, <D39 111.00570489903436>, <G26 123.09435361445583>, <G32 111.81795773527254>, <D37 112.39171946486208>, <D12 123.66475368311191>, <D38 111.2122207988036>, <F13 189.45049525127428>, <D13 122.93466851882597>, <B47 'See Appendix B for affordability calculations'>, <B34 190.05774898940066>, <C9 382.9

In [4]:
gr = tab.excel_ref('B5').expand(RIGHT).is_not_blank()
gr

{<D5 'Alcohol price index relative to Retail price index (all items)'>, <C5 'Retail prices index\n(all items)'>, <B5 'Alcohol price index'>, <F5 'Real disposable income per adult (18+)\n(revised)2,3'>, <E5 'Real household disposable income (revised)2,3'>, <G5 'Affordability of alcohol index on a per capita basis (revised)1,2,3'>}

In [5]:
code = tab.excel_ref('A7').expand(DOWN).is_not_blank() - tab.excel_ref('A44')
code

{<A17 2006.0>, <A33 1990.0>, <A34 1989.0>, <A54 'Alcohol Price and Retail Prices (all items) Indices: derived from Focus on Consumer Price Indices: (Codes CBAA, CBAB, CHBD, CHAW). The Office for National Statistics\n\n'>, <A12 2011.0>, <A47 1.0>, <A55 'Real Households Disposable Income: Economic Trends: (Code NRJR). The Office for National Statistics'>, <A10 2013.0>, <A27 1996.0>, <A8 2015.0>, <A46 'Footnotes'>, <A15 2008.0>, <A23 2000.0>, <A38 1985.0>, <A9 2014.0>, <A41 1982.0>, <A53 'Sources'>, <A29 1994.0>, <A37 1986.0>, <A58 'Copyright © 2017, re-used with the permission of The Office for National Statistics'>, <A11 2012.0>, <A43 1980.0>, <A36 1987.0>, <A7 2016.0>, <A39 1984.0>, <A56 'MYE2: Population Estimatesin England, mid-2013. The Office for National Statistics'>, <A18 2005.0>, <A32 1991.0>, <A48 2.0>, <A13 2010.0>, <A21 2002.0>, <A59 'Copyright © 2017. Health and Social Care Information Centre, Lifestyles Statistics. All rights reserved'>, <A31 1992.0>, <A42 1981.0>, <A24 199

In [6]:
Dimensions = [
            HDim(code,'Period',DIRECTLY,LEFT),
            HDim(gr,'Affordability of alcohol',DIRECTLY,ABOVE),
            HDimConst('Geography','K02000001'),
            HDimConst('Unit','Indices'),
            HDimConst('Measure Type','Percentage')
            ]

In [7]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [8]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,DATAMARKER,Period,Affordability of alcohol,Geography,Unit,Measure Type
0,477.045,,2016.0,Alcohol price index,K02000001,Indices,Percentage
1,393.602,,2016.0,Retail prices index\n(all items),K02000001,Indices,Percentage
2,121.2,,2016.0,Alcohol price index relative to Retail price i...,K02000001,Indices,Percentage
3,239.68,,2016.0,"Real household disposable income (revised)2,3",K02000001,Indices,Percentage
4,194.375,,2016.0,Real disposable income per adult (18+)\n(revis...,K02000001,Indices,Percentage
5,160.376,,2016.0,Affordability of alcohol index on a per capita...,K02000001,Indices,Percentage
6,474.183,,2015.0,Alcohol price index,K02000001,Indices,Percentage
7,386.721,,2015.0,Retail prices index\n(all items),K02000001,Indices,Percentage
8,122.617,,2015.0,Alcohol price index relative to Retail price i...,K02000001,Indices,Percentage
9,236.137,,2015.0,"Real household disposable income (revised)2,3",K02000001,Indices,Percentage


In [9]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [10]:
new_table.head()

Unnamed: 0,Value,DATAMARKER,Period,Affordability of alcohol,Geography,Unit,Measure Type
0,477.045,,2016.0,Alcohol price index,K02000001,Indices,Percentage
1,393.602,,2016.0,Retail prices index\n(all items),K02000001,Indices,Percentage
2,121.2,,2016.0,Alcohol price index relative to Retail price i...,K02000001,Indices,Percentage
3,239.68,,2016.0,"Real household disposable income (revised)2,3",K02000001,Indices,Percentage
4,194.375,,2016.0,Real disposable income per adult (18+)\n(revis...,K02000001,Indices,Percentage


In [11]:
new_table.head()

Unnamed: 0,Value,DATAMARKER,Period,Affordability of alcohol,Geography,Unit,Measure Type
0,477.045,,2016.0,Alcohol price index,K02000001,Indices,Percentage
1,393.602,,2016.0,Retail prices index\n(all items),K02000001,Indices,Percentage
2,121.2,,2016.0,Alcohol price index relative to Retail price i...,K02000001,Indices,Percentage
3,239.68,,2016.0,"Real household disposable income (revised)2,3",K02000001,Indices,Percentage
4,194.375,,2016.0,Real disposable income per adult (18+)\n(revis...,K02000001,Indices,Percentage


In [12]:
new_table.tail()

Unnamed: 0,Value,DATAMARKER,Period,Affordability of alcohol,Geography,Unit,Measure Type
221,100.0,,1980.0,Affordability of alcohol index on a per capita...,K02000001,Indices,Percentage
222,,See Appendix B for affordability calculations,1.0,Alcohol price index,K02000001,Indices,Percentage
223,,These figures have been revised since previous...,2.0,Alcohol price index,K02000001,Indices,Percentage
224,,The RHDI index was adjusted using mid-year ONS...,3.0,Alcohol price index,K02000001,Indices,Percentage
225,,Population estimates used in this calculation ...,4.0,Alcohol price index,K02000001,Indices,Percentage


In [13]:
new_table.dtypes

Value                       object
DATAMARKER                  object
Period                      object
Affordability of alcohol    object
Geography                   object
Unit                        object
Measure Type                object
dtype: object

In [14]:
new_table['Value'] = new_table['Value'].astype(str)

In [15]:
new_table = new_table[new_table['Value'] != '' ]

In [16]:
new_table['Period'] = pd.to_numeric(new_table['Period'], errors='coerce').fillna(0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [17]:
new_table['Period'] = new_table['Period'].astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [18]:
new_table.head(3)

Unnamed: 0,Value,DATAMARKER,Period,Affordability of alcohol,Geography,Unit,Measure Type
0,477.0450902377694,,2016,Alcohol price index,K02000001,Indices,Percentage
1,393.6023890784983,,2016,Retail prices index\n(all items),K02000001,Indices,Percentage
2,121.1997445835192,,2016,Alcohol price index relative to Retail price i...,K02000001,Indices,Percentage


In [19]:
new_table['Affordability of alcohol'] = new_table['Affordability of alcohol'].str.rstrip('123')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [20]:
new_table['Affordability of alcohol'] = new_table['Affordability of alcohol'].str.rstrip('1,2,')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [21]:
new_table = new_table[['Geography','Period','Affordability of alcohol','Measure Type','Value','Unit']]

In [22]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('table4.csv'), index = False)

In [23]:
new_table.head()

Unnamed: 0,Geography,Period,Affordability of alcohol,Measure Type,Value,Unit
0,K02000001,2016,Alcohol price index,Percentage,477.0450902377694,Indices
1,K02000001,2016,Retail prices index\n(all items),Percentage,393.6023890784983,Indices
2,K02000001,2016,Alcohol price index relative to Retail price i...,Percentage,121.1997445835192,Indices
3,K02000001,2016,Real household disposable income (revised),Percentage,239.67964138384104,Indices
4,K02000001,2016,Real disposable income per adult (18+)\n(revised),Percentage,194.3749810589289,Indices
