Table A3: Perceptions of change in the level of alcohol related issues in the local area in the last 12 months by demographics (%)

In [1]:
from databaker.framework import *
import pandas as pd 

In [2]:
import requests
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache
from cachecontrol.heuristics import LastModified
from pathlib import Path

session = CacheControl(requests.Session(),
                       cache=FileCache('.cache'),
                       heuristic=LastModified())

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

inputURL = 'https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx'
inputFile = sourceFolder / 'ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx'
response = session.get(inputURL)
with open(inputFile, 'wb') as f:
  f.write(response.content)

https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx

In [3]:
tab = loadxlstabs(inputFile, sheetids='Table A3')[0]

Loading in\ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx which has size 191159 bytes
Table names: ['Table A3']


In [4]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()

In [5]:
observations

{<C24 0.763>, <B32 0.075>, <C16 0.819>, <C35 0.755>, <E5 881.0>, <C15 0.828>, <C39 0.887>, <C8 'n<100'>, <B37 'n<100'>, <B28 0.094>, <E26 187.0>, <D21 'n<100'>, <C25 0.871>, <D52 0.098>, <C19 0.791>, <C28 0.745>, <C9 'n<100'>, <C21 'n<100'>, <E12 260.0>, <E57 561.0>, <B43 0.087>, <C37 'n<100'>, <E42 238.0>, <B8 'n<100'>, <E10 230.0>, <Q36 'Total'>, <B12 0.071>, <D36 0.089>, <B24 0.115>, <E16 465.0>, <E32 446.0>, <B44 0.053>, <B57 0.094>, <E43 365.0>, <B47 0.055>, <E39 100.0>, <E25 238.0>, <B58 0.032>, <D27 0.063>, <D12 0.093>, <C43 0.792>, <B10 0.06>, <C20 0.85>, <B55 0.089>, <D42 0.09>, <B56 0.098>, <C47 0.83>, <E37 40.0>, <C42 0.847>, <B35 0.093>, <E48 559.0>, <E15 416.0>, <B19 0.072>, <D16 0.104>, <E11 244.0>, <C27 0.891>, <E27 165.0>, <B39 0.065>, <C57 0.778>, <D20 0.088>, <B31 0.062>, <D57 0.129>, <D51 0.132>, <D43 0.121>, <E28 144.0>, <D37 'n<100'>, <D28 0.161>, <B9 'n<100'>, <E51 230.0>, <D26 0.113>, <C11 0.854>, <B42 0.063>, <D44 0.096>, <E44 278.0>, <D48 0.099>, <B15 0.063>, <

In [6]:
Alcoholissue = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
Alcoholissue

{<E3 'Unweighted base'>, <C3 'About the same'>, <B3 'Better'>, <D3 'Worse'>}

In [7]:
sect = tab.excel_ref('A5').expand(DOWN).is_not_blank()
sect

{<A50 'Disability / illness'>, <A14 'Gender'>, <A46 'Dependants'>, <A44 'Tertiary'>, <A20 'Protestant'>, <A41 'Level of qualifications'>, <A28 'Western'>, <A24 'Belfast'>, <A10 '35-49'>, <A18 'Religion'>, <A48 'No dependants'>, <A21 'Other'>, <A30 'Employment status'>, <A62 '1. Results exclude "don\'t know" and refusals.'>, <A54 'Area type'>, <A32 'Not in paid employment'>, <A52 'No disability / illness'>, <A34 'Marital status'>, <A8 '16-24'>, <A58 'Rural'>, <A37 'Married and separated from husband / wife'>, <A57 'All urban'>, <A11 '50-64'>, <A5 'All adults'>, <A23 'Health and Social Care Trust'>, <A25 'Northern'>, <A19 'Catholic'>, <A7 'Age of household reference person (HRP)1'>, <A9 '25-34'>, <A35 'Single, that is never married'>, <A15 'Male'>, <A38 'Divorced'>, <A12 '65 and over'>, <A47 'Has dependants'>, <A61 'Source: Northern Ireland Omnibus Survey, October 2016'>, <A51 'Has disability / illness'>, <A27 'Southern'>, <A26 'South Eastern'>, <A55 'Belfast'>, <A31 'In paid employment'

In [8]:
category = tab.excel_ref('A').expand(DOWN).by_index([7,14,18,23,30,34,41,46,50,54])
category

{<A50 'Disability / illness'>, <A14 'Gender'>, <A46 'Dependants'>, <A54 'Area type'>, <A41 'Level of qualifications'>, <A23 'Health and Social Care Trust'>, <A34 'Marital status'>, <A7 'Age of household reference person (HRP)1'>, <A18 'Religion'>, <A30 'Employment status'>}

In [9]:
Dimensions = [
            HDim(Alcoholissue,'Alcohol issue',DIRECTLY,ABOVE),
            HDim(category,'Category',CLOSEST,ABOVE),
            HDim(sect,'Sect',DIRECTLY,LEFT),
            HDimConst('Measure Type', 'Percentage'),
            HDimConst('Unit','People'),
            ]

In [10]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# savepreviewhtml(c1)

In [11]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,DATAMARKER,Alcohol issue,Category,Sect,Measure Type,Unit
0,0.07,,Better,,All adults,Percentage,People
1,0.824,,About the same,,All adults,Percentage,People
2,0.106,,Worse,,All adults,Percentage,People
3,881,,Unweighted base,,All adults,Percentage,People
4,,n<100,Better,Age of household reference person (HRP)1,16-24,Percentage,People
5,,n<100,About the same,Age of household reference person (HRP)1,16-24,Percentage,People
6,,n<100,Worse,Age of household reference person (HRP)1,16-24,Percentage,People
7,57,,Unweighted base,Age of household reference person (HRP)1,16-24,Percentage,People
8,,n<100,Better,Age of household reference person (HRP)1,25-34,Percentage,People
9,,n<100,About the same,Age of household reference person (HRP)1,25-34,Percentage,People


In [12]:
new_table.columns = ['Response' if x=='OBS' else x for x in new_table.columns]

In [13]:
new_table = new_table[new_table['Response'] != ' ']

In [14]:
new_table.dtypes

Response         object
DATAMARKER       object
Alcohol issue    object
Category         object
Sect             object
Measure Type     object
Unit             object
dtype: object

In [15]:
new_table.tail(5)

Unnamed: 0,Response,DATAMARKER,Alcohol issue,Category,Sect,Measure Type,Unit
134,561.0,,Unweighted base,Area type,All urban,Percentage,People
135,0.032,,Better,Area type,Rural,Percentage,People
136,0.897,,About the same,Area type,Rural,Percentage,People
137,0.071,,Worse,Area type,Rural,Percentage,People
138,320.0,,Unweighted base,Area type,Rural,Percentage,People


In [16]:
new_table.head()

Unnamed: 0,Response,DATAMARKER,Alcohol issue,Category,Sect,Measure Type,Unit
0,0.07,,Better,,All adults,Percentage,People
1,0.824,,About the same,,All adults,Percentage,People
2,0.106,,Worse,,All adults,Percentage,People
3,881.0,,Unweighted base,,All adults,Percentage,People
4,,n<100,Better,Age of household reference person (HRP)1,16-24,Percentage,People


In [17]:
new_table['Response'] = pd.to_numeric(new_table['Response'], errors='coerce')

In [18]:
new_table['Response'] = new_table['Response'].apply(str)

In [19]:
def user_perc(x,y):
    
    if str(x) == 'Unweighted base':
        return 'Number'
    else:
        return y
    
new_table['Measure Type'] = new_table.apply(lambda row: user_perc(row['Alcohol issue'],row['Measure Type']), axis = 1)

In [20]:
new_table['Category'].fillna('All', inplace = True)

In [21]:
new_table['Demographic'] = new_table['Category'] + '/' + new_table['Sect']

In [22]:
new_table = new_table[['Demographic','Alcohol issue','Measure Type','Response','Unit']]

In [23]:
new_table.head(5)

Unnamed: 0,Demographic,Alcohol issue,Measure Type,Response,Unit
0,All/All adults,Better,Percentage,0.07,People
1,All/All adults,About the same,Percentage,0.824,People
2,All/All adults,Worse,Percentage,0.106,People
3,All/All adults,Unweighted base,Number,881.0,People
4,Age of household reference person (HRP)1/16-24,Better,Percentage,,People


In [24]:
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

new_table.to_csv(destinationFolder / ('taba3.csv'), index = False)