Table A1: Concern about alcohol related issues in the local area by demographics (%)

In [1]:
from databaker.framework import *
import pandas as pd 

In [2]:
import requests
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache
from cachecontrol.heuristics import LastModified
from pathlib import Path

session = CacheControl(requests.Session(),
                       cache=FileCache('.cache'),
                       heuristic=LastModified())

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

inputURL = 'https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx'
inputFile = sourceFolder / 'ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx'
response = session.get(inputURL)
with open(inputFile, 'wb') as f:
  f.write(response.content)

https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx

In [3]:
tab = loadxlstabs(inputFile, sheetids='Table A1')[0]

Loading in\ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx which has size 191159 bytes
Table names: ['Table A1']


In [4]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()

In [5]:
observations

{<E11 259.0>, <D15 0.382>, <C19 0.133>, <D27 0.44>, <D9 0.521>, <D35 0.415>, <E58 339.0>, <D10 0.373>, <D11 0.368>, <C26 0.161>, <D58 0.486>, <E9 100.0>, <D48 0.42>, <D55 0.42>, <C37 'n<100'>, <E35 271.0>, <B8 'n<100'>, <D56 0.294>, <C5 0.164>, <B15 0.458>, <D39 0.493>, <E8 66.0>, <E20 454.0>, <B27 0.399>, <B28 0.551>, <B25 0.405>, <C32 0.138>, <B9 0.313>, <C43 0.164>, <E27 181.0>, <E26 195.0>, <E28 152.0>, <B58 0.304>, <B51 0.5>, <D8 'n<100'>, <D32 0.378>, <D44 0.428>, <D24 0.442>, <D5 0.403>, <E32 479.0>, <C51 0.096>, <C15 0.16>, <B21 'n<100'>, <C36 0.172>, <E37 46.0>, <C57 0.136>, <D37 'n<100'>, <B19 0.471>, <C31 0.197>, <B5 0.433>, <C8 'n<100'>, <B37 'n<100'>, <E19 360.0>, <C52 0.183>, <D21 'n<100'>, <B57 0.514>, <B32 0.484>, <B52 0.414>, <D25 0.409>, <C58 0.21>, <C21 'n<100'>, <D26 0.414>, <C48 0.156>, <E12 272.0>, <E51 243.0>, <D28 0.295>, <C55 0.16>, <E25 251.0>, <D57 0.351>, <D19 0.396>, <E38 87.0>, <C28 0.154>, <E56 325.0>, <C27 0.161>, <C10 0.217>, <C38 'n<100'>, <D42 0.417>,

In [6]:
Alcoholissue = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
Alcoholissue

{<C3 'Neither agree nor disagree'>, <D3 'Disagree / strongly disagree'>, <E3 'Unweighted base'>, <B3 'Agree / strongly agree'>}

In [7]:
sect = tab.excel_ref('A5').expand(DOWN).is_not_blank()
sect

{<A46 'Dependants'>, <A19 'Catholic'>, <A56 'Urban, excluding Belfast'>, <A16 'Female'>, <A57 'All urban'>, <A52 'No disability / illness'>, <A54 'Area type'>, <A55 'Belfast'>, <A8 '16-24'>, <A39 'Widowed'>, <A30 'Employment status'>, <A12 '65 and over'>, <A62 '1. Results exclude "don\'t know" and refusals.'>, <A23 'Health and Social Care Trust'>, <A32 'Not in paid employment'>, <A48 'No dependants'>, <A61 'Source: Northern Ireland Omnibus Survey, October 2016'>, <A28 'Western'>, <A41 'Level of qualifications'>, <A58 'Rural'>, <A20 'Protestant'>, <A50 'Disability / illness'>, <A5 'All adults'>, <A7 'Age of household reference person (HRP)1'>, <A37 'Married and separated from husband / wife'>, <A25 'Northern'>, <A34 'Marital status'>, <A9 '25-34'>, <A42 'Primary'>, <A11 '50-64'>, <A43 'Secondary'>, <A18 'Religion'>, <A27 'Southern'>, <A36 'Married and living with husband / wife'>, <A15 'Male'>, <A35 'Single, that is never married'>, <A10 '35-49'>, <A44 'Tertiary'>, <A38 'Divorced'>, <A2

In [8]:
category = tab.excel_ref('A').expand(DOWN).by_index([7,14,18,23,30,34,41,46,50,54])
category

{<A46 'Dependants'>, <A34 'Marital status'>, <A23 'Health and Social Care Trust'>, <A54 'Area type'>, <A18 'Religion'>, <A41 'Level of qualifications'>, <A14 'Gender'>, <A50 'Disability / illness'>, <A30 'Employment status'>, <A7 'Age of household reference person (HRP)1'>}

In [9]:
Dimensions = [
            HDim(Alcoholissue,'Alcohol issue',DIRECTLY,ABOVE),
            HDim(category,'Category',CLOSEST,ABOVE),
            HDim(sect,'Sect',DIRECTLY,LEFT),
            HDimConst('Measure Type', 'Percentage'),
            HDimConst('Unit','People'),
            ]

In [10]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# savepreviewhtml(c1)

In [11]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,DATAMARKER,Alcohol issue,Category,Sect,Measure Type,Unit
0,0.433,,Agree / strongly agree,,All adults,Percentage,People
1,0.164,,Neither agree nor disagree,,All adults,Percentage,People
2,0.403,,Disagree / strongly disagree,,All adults,Percentage,People
3,945,,Unweighted base,,All adults,Percentage,People
4,,n<100,Agree / strongly agree,Age of household reference person (HRP)1,16-24,Percentage,People
5,,n<100,Neither agree nor disagree,Age of household reference person (HRP)1,16-24,Percentage,People
6,,n<100,Disagree / strongly disagree,Age of household reference person (HRP)1,16-24,Percentage,People
7,66,,Unweighted base,Age of household reference person (HRP)1,16-24,Percentage,People
8,0.313,,Agree / strongly agree,Age of household reference person (HRP)1,25-34,Percentage,People
9,0.167,,Neither agree nor disagree,Age of household reference person (HRP)1,25-34,Percentage,People


In [12]:
new_table.columns = ['Response' if x=='OBS' else x for x in new_table.columns]

In [13]:
new_table = new_table[new_table['Response'] != ' ']

In [14]:
new_table.dtypes

Response         object
DATAMARKER       object
Alcohol issue    object
Category         object
Sect             object
Measure Type     object
Unit             object
dtype: object

In [15]:
new_table.tail(5)

Unnamed: 0,Response,DATAMARKER,Alcohol issue,Category,Sect,Measure Type,Unit
131,606.0,,Unweighted base,Area type,All urban,Percentage,People
132,0.304,,Agree / strongly agree,Area type,Rural,Percentage,People
133,0.21,,Neither agree nor disagree,Area type,Rural,Percentage,People
134,0.486,,Disagree / strongly disagree,Area type,Rural,Percentage,People
135,339.0,,Unweighted base,Area type,Rural,Percentage,People


In [16]:
new_table.head()

Unnamed: 0,Response,DATAMARKER,Alcohol issue,Category,Sect,Measure Type,Unit
0,0.433,,Agree / strongly agree,,All adults,Percentage,People
1,0.164,,Neither agree nor disagree,,All adults,Percentage,People
2,0.403,,Disagree / strongly disagree,,All adults,Percentage,People
3,945.0,,Unweighted base,,All adults,Percentage,People
4,,n<100,Agree / strongly agree,Age of household reference person (HRP)1,16-24,Percentage,People


In [17]:
new_table['Response'] = pd.to_numeric(new_table['Response'], errors='coerce')

In [18]:
new_table['Response'] = new_table['Response'].apply(str)

In [19]:
def user_perc(x,y):
    
    if str(x) == 'Unweighted base':
        return 'Number'
    else:
        return y
    
new_table['Measure Type'] = new_table.apply(lambda row: user_perc(row['Alcohol issue'],row['Measure Type']), axis = 1)

In [20]:
new_table['Category'].fillna('All', inplace = True)

In [21]:
new_table['Demographic'] = new_table['Category'] + '/' + new_table['Sect']

In [22]:
new_table = new_table[['Demographic','Alcohol issue','Measure Type','Response','Unit']]

In [23]:
new_table.head(5)

Unnamed: 0,Demographic,Alcohol issue,Measure Type,Response,Unit
0,All/All adults,Agree / strongly agree,Percentage,0.433,People
1,All/All adults,Neither agree nor disagree,Percentage,0.164,People
2,All/All adults,Disagree / strongly disagree,Percentage,0.403,People
3,All/All adults,Unweighted base,Number,945.0,People
4,Age of household reference person (HRP)1/16-24,Agree / strongly agree,Percentage,,People


In [24]:
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

new_table.to_csv(destinationFolder / ('taba1.csv'), index = False)