Table A3: Perceptions of change in the level of alcohol related issues in the local area in the last 12 months by demographics (%)

In [1]:
from databaker.framework import *
import pandas as pd 

In [2]:
import requests
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache
from cachecontrol.heuristics import LastModified
from pathlib import Path

session = CacheControl(requests.Session(),
                       cache=FileCache('.cache'),
                       heuristic=LastModified())

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

inputURL = 'https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx'
inputFile = sourceFolder / 'ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx'
Value = session.get(inputURL)
with open(inputFile, 'wb') as f:
  f.write(Value.content)

https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx

In [3]:
tab = loadxlstabs(inputFile, sheetids='Table A3')[0]

Loading in\ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx which has size 191159 bytes
Table names: ['Table A3']


In [4]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()

In [5]:
observations

{<D5 0.106>, <B12 0.071>, <C32 0.807>, <D36 0.089>, <C31 0.857>, <B36 0.054>, <C26 0.814>, <C25 0.871>, <C48 0.82>, <C28 0.745>, <B47 0.055>, <E39 100.0>, <E57 561.0>, <C16 0.819>, <D27 0.063>, <D37 'n<100'>, <E10 230.0>, <C15 0.828>, <D42 0.09>, <C5 0.824>, <B44 0.053>, <E43 365.0>, <E47 322.0>, <B57 0.094>, <B19 0.072>, <Q25 'Total'>, <C57 0.778>, <B43 0.087>, <B21 'n<100'>, <D43 0.121>, <E26 187.0>, <E28 144.0>, <C8 'n<100'>, <B35 0.093>, <B24 0.115>, <C38 'n<100'>, <E16 465.0>, <E15 416.0>, <B5 0.07>, <D20 0.088>, <B31 0.062>, <E42 238.0>, <D12 0.093>, <C43 0.792>, <B37 'n<100'>, <B16 0.077>, <E51 230.0>, <B55 0.089>, <D26 0.113>, <B10 0.06>, <C20 0.85>, <B56 0.098>, <D55 0.131>, <E37 40.0>, <E32 446.0>, <D56 0.124>, <E48 559.0>, <D8 'n<100'>, <B15 0.063>, <D39 0.048>, <E38 85.0>, <E31 416.0>, <D58 0.071>, <E25 238.0>, <D16 0.104>, <E11 244.0>, <B58 0.032>, <E27 165.0>, <D10 0.108>, <Q36 'Total'>, <D51 0.132>, <E24 147.0>, <Q42 1.0>, <C47 0.83>, <D28 0.161>, <C42 0.847>, <D11 0.091

In [6]:
Alcoholissue = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
Alcoholissue

{<C3 'About the same'>, <E3 'Unweighted base'>, <D3 'Worse'>, <B3 'Better'>}

In [7]:
sect = tab.excel_ref('A5').expand(DOWN).is_not_blank()
sect

{<A7 'Age of household reference person (HRP)1'>, <A27 'Southern'>, <A36 'Married and living with husband / wife'>, <A51 'Has disability / illness'>, <A50 'Disability / illness'>, <A21 'Other'>, <A24 'Belfast'>, <A10 '35-49'>, <A34 'Marital status'>, <A31 'In paid employment'>, <A43 'Secondary'>, <A37 'Married and separated from husband / wife'>, <A28 'Western'>, <A44 'Tertiary'>, <A23 'Health and Social Care Trust'>, <A32 'Not in paid employment'>, <A35 'Single, that is never married'>, <A61 'Source: Northern Ireland Omnibus Survey, October 2016'>, <A56 'Urban, excluding Belfast'>, <A30 'Employment status'>, <A5 'All adults'>, <A54 'Area type'>, <A46 'Dependants'>, <A15 'Male'>, <A25 'Northern'>, <A8 '16-24'>, <A18 'Religion'>, <A39 'Widowed'>, <A16 'Female'>, <A12 '65 and over'>, <A41 'Level of qualifications'>, <A26 'South Eastern'>, <A42 'Primary'>, <A47 'Has dependants'>, <A14 'Gender'>, <A55 'Belfast'>, <A62 '1. Results exclude "don\'t know" and refusals.'>, <A57 'All urban'>, <A

In [8]:
category = tab.excel_ref('A').expand(DOWN).by_index([7,14,18,23,30,34,41,46,50,54])
category

{<A7 'Age of household reference person (HRP)1'>, <A54 'Area type'>, <A46 'Dependants'>, <A14 'Gender'>, <A50 'Disability / illness'>, <A18 'Religion'>, <A23 'Health and Social Care Trust'>, <A30 'Employment status'>, <A34 'Marital status'>, <A41 'Level of qualifications'>}

In [9]:
Dimensions = [
            HDim(Alcoholissue,'Alcohol related issue response',DIRECTLY,ABOVE),
            HDim(category,'Category',CLOSEST,ABOVE),
            HDim(sect,'Sect',DIRECTLY,LEFT),
            HDimConst('Measure Type', 'Percent'),
            HDimConst('Unit','People'),
            ]

In [10]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# savepreviewhtml(c1)

In [11]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,DATAMARKER,Alcohol related issue response,Category,Sect,Measure Type,Unit
0,0.07,,Better,,All adults,Percent,People
1,0.824,,About the same,,All adults,Percent,People
2,0.106,,Worse,,All adults,Percent,People
3,881,,Unweighted base,,All adults,Percent,People
4,,n<100,Better,Age of household reference person (HRP)1,16-24,Percent,People
5,,n<100,About the same,Age of household reference person (HRP)1,16-24,Percent,People
6,,n<100,Worse,Age of household reference person (HRP)1,16-24,Percent,People
7,57,,Unweighted base,Age of household reference person (HRP)1,16-24,Percent,People
8,,n<100,Better,Age of household reference person (HRP)1,25-34,Percent,People
9,,n<100,About the same,Age of household reference person (HRP)1,25-34,Percent,People


In [12]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [13]:
new_table['Value'] = pd.to_numeric(new_table['Value'], errors='coerce')

In [14]:
new_table = new_table[new_table['Value'].notnull()]

In [15]:
new_table['Value'] = new_table['Value'].apply(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [16]:
def user_perc(x,y):
    
    if str(x) == 'Unweighted base':
        return 'Count'
    else:
        return y
    
new_table['Measure Type'] = new_table.apply(lambda row: user_perc(row['Alcohol related issue response'],row['Measure Type']), axis = 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [17]:
new_table['Category'] = new_table['Category'].str.rstrip('1')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [18]:
new_table['Demographic1'] = new_table['Category'].fillna('') + ' - ' + new_table['Sect'].fillna('')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [19]:
new_table['Demographic1'] = new_table['Demographic1'].str.lstrip(' - ')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [20]:
new_table = new_table[['Demographic1','Alcohol related issue response','Measure Type','Value','Unit']]

In [21]:
new_table.head(5)

Unnamed: 0,Demographic1,Alcohol related issue response,Measure Type,Value,Unit
0,All adults,Better,Percent,0.07,People
1,All adults,About the same,Percent,0.824,People
2,All adults,Worse,Percent,0.106,People
3,All adults,Unweighted base,Count,881.0,People
7,Age of household reference person (HRP) - 16-24,Unweighted base,Count,57.0,People


In [22]:
import urllib.request as request
import csv
r = request.urlopen('https://raw.githubusercontent.com/ONS-OpenData/ref_alcohol/master/codelists/demographic.csv').read().decode('utf8').split("\n")
reader = csv.reader(r)
for line in reader:
    print(line)

['Label', 'Notation', 'Parent Notation', 'Sort Priority']
['All adults', 'all-adults', '', '1']
['Age of household reference person (HRP) - 16-24', 'hrp/16-24', '', '2']
['Age of household reference person (HRP) - 25-34', 'hrp/25-34', '', '3']
['Age of household reference person (HRP) - 35-49', 'hrp/35-49', '', '4']
['Age of household reference person (HRP) - 50-64', 'hrp/50-64', '', '5']
['Age of household reference person (HRP) - 65 and over', 'hrp/65plus', '', '6']
['Gender - Male', 'male', '', '7']
['Gender - Female', 'female', '', '8']
['Religion - Catholic', 'catholic', '', '9']
['Religion - Protestant', 'protestant', '', '10']
['Religion - Other', 'other', '', '11']
['Health and Social Care Trust - Belfast', 'hsct/belfast', '', '12']
['Health and Social Care Trust - Northern', 'hsct/northern', '', '13']
['Health and Social Care Trust - South Eastern', 'hsct/south-eastern', '', '14']
['Health and Social Care Trust - Southern', 'hsct/southern', '', '15']
['Health and Social Care T

In [23]:
import io
import requests
url="https://raw.githubusercontent.com/ONS-OpenData/ref_alcohol/master/codelists/demographic.csv"
s=requests.get(url).content
c=pd.read_csv(io.StringIO(s.decode('utf-8')))

In [24]:
Final_table = pd.merge(new_table, c, how = 'left', left_on = 'Demographic1', right_on = 'Label')

In [25]:
Final_table.columns = ['Demographic' if x=='Notation' else x for x in Final_table.columns]

In [26]:
Final_table['Alcohol related issue response'] = Final_table['Alcohol related issue response'].map(
    lambda x: {
        'Better' : 'better', 
        'About the same' : 'same',
        'Worse': 'worse' ,
        'Unweighted base': 'unweighted-base'
        }.get(x, x))

In [27]:
Final_table = Final_table[Final_table['Alcohol related issue response'].notnull()]

In [28]:
Final_table = Final_table[['Demographic','Alcohol related issue response','Measure Type','Value','Unit']]

In [29]:
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

Final_table.to_csv(destinationFolder / ('table3.csv'), index = False)

In [30]:
Final_table['Alcohol related issue response'].unique()

array(['better', 'same', 'worse', 'unweighted-base'], dtype=object)