Table A14: Overall level of confidence that enough is being done to tackle alcohol and/or drug related issues in Northern Ireland by demographics (%)

In [1]:
from databaker.framework import *
import pandas as pd 

In [2]:
import requests
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache
from cachecontrol.heuristics import LastModified
from pathlib import Path

session = CacheControl(requests.Session(),
                       cache=FileCache('.cache'),
                       heuristic=LastModified())

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

inputURL = 'https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx'
inputFile = sourceFolder / 'ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx'
response = session.get(inputURL)
with open(inputFile, 'wb') as f:
  f.write(response.content)

https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx

In [3]:
tab = loadxlstabs(inputFile, sheetids='Table A14')[0]

Loading in\ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx which has size 191159 bytes
Table names: ['Table A14']


In [4]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()

In [5]:
observations

{<E48 578.0>, <C19 0.383>, <B11 0.086>, <D35 0.449>, <C39 'n<100'>, <B19 0.097>, <D11 0.515>, <E5 917.0>, <B20 0.096>, <E11 256.0>, <D19 0.52>, <E9 100.0>, <E51 236.0>, <B58 0.093>, <E21 13.0>, <B55 0.052>, <E10 240.0>, <E28 150.0>, <C11 0.398>, <E57 588.0>, <B48 0.11>, <B21 'n<100'>, <E20 442.0>, <D8 'n<100'>, <E58 329.0>, <C44 0.473>, <E19 349.0>, <E37 45.0>, <C25 0.369>, <C16 0.435>, <B56 0.128>, <E52 681.0>, <B39 'n<100'>, <C24 0.41>, <C9 0.458>, <C8 'n<100'>, <B37 'n<100'>, <E42 246.0>, <D21 'n<100'>, <C20 0.466>, <B28 0.098>, <D27 0.351>, <D16 0.467>, <D28 0.529>, <D5 0.472>, <C21 'n<100'>, <E55 269.0>, <C43 0.389>, <D58 0.467>, <D20 0.438>, <E31 439.0>, <D37 'n<100'>, <D55 0.488>, <B43 0.092>, <C36 0.412>, <C35 0.449>, <D39 'n<100'>, <D43 0.519>, <B52 0.092>, <E56 319.0>, <C55 0.46>, <E26 190.0>, <C31 0.423>, <D12 0.423>, <B35 0.102>, <E44 297.0>, <E35 265.0>, <D32 0.454>, <B16 0.098>, <D26 0.412>, <D15 0.478>, <E43 374.0>, <C38 'n<100'>, <D9 0.5>, <D24 0.54>, <D10 0.483>, <B12 

In [6]:
Alcoholissue = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
Alcoholissue

{<E3 'Unweighted base'>, <D3 'Little or no confidence'>, <C3 'Some confidence'>, <B3 'Total or a lot of confidence'>}

In [7]:
sect = tab.excel_ref('A5').expand(DOWN).is_not_blank()
sect

{<A28 'Western'>, <A26 'South Eastern'>, <A62 '1. Results exclude "don\'t know" and refusals.'>, <A24 'Belfast'>, <A42 'Primary'>, <A12 '65 and over'>, <A10 '35-49'>, <A8 '16-24'>, <A57 'All urban'>, <A43 'Secondary'>, <A7 'Age of household reference person (HRP)1'>, <A23 'Health and Social Care Trust'>, <A9 '25-34'>, <A35 'Single, that is never married'>, <A52 'No disability / illness'>, <A41 'Level of qualifications'>, <A16 'Female'>, <A21 'Other'>, <A55 'Belfast'>, <A50 'Disability / illness'>, <A19 'Catholic'>, <A30 'Employment status'>, <A5 'All adults'>, <A39 'Widowed'>, <A20 'Protestant'>, <A38 'Divorced'>, <A11 '50-64'>, <A48 'No dependants'>, <A15 'Male'>, <A34 'Marital status'>, <A37 'Married and separated from husband / wife'>, <A56 'Urban, excluding Belfast'>, <A46 'Dependants'>, <A25 'Northern'>, <A44 'Tertiary'>, <A54 'Area type'>, <A31 'In paid employment'>, <A14 'Gender'>, <A32 'Not in paid employment'>, <A18 'Religion'>, <A51 'Has disability / illness'>, <A36 'Married 

In [8]:
category = tab.excel_ref('A').expand(DOWN).by_index([7,14,18,23,30,34,41,46,50,54])
category

{<A54 'Area type'>, <A23 'Health and Social Care Trust'>, <A14 'Gender'>, <A18 'Religion'>, <A41 'Level of qualifications'>, <A34 'Marital status'>, <A46 'Dependants'>, <A7 'Age of household reference person (HRP)1'>, <A30 'Employment status'>, <A50 'Disability / illness'>}

In [9]:
Dimensions = [
            HDim(Alcoholissue,'Level of Confidence',DIRECTLY,ABOVE),
            HDim(category,'Category',CLOSEST,ABOVE),
            HDim(sect,'Sect',DIRECTLY,LEFT),
            HDimConst('Measure Type', 'Percentage'),
            HDimConst('Unit','People'),
            ]

In [10]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# savepreviewhtml(c1)

In [11]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,DATAMARKER,Level of Confidence,Category,Sect,Measure Type,Unit
0,0.094,,Total or a lot of confidence,,All adults,Percentage,People
1,0.434,,Some confidence,,All adults,Percentage,People
2,0.472,,Little or no confidence,,All adults,Percentage,People
3,917,,Unweighted base,,All adults,Percentage,People
4,,n<100,Total or a lot of confidence,Age of household reference person (HRP)1,16-24,Percentage,People
5,,n<100,Some confidence,Age of household reference person (HRP)1,16-24,Percentage,People
6,,n<100,Little or no confidence,Age of household reference person (HRP)1,16-24,Percentage,People
7,65,,Unweighted base,Age of household reference person (HRP)1,16-24,Percentage,People
8,0.042,,Total or a lot of confidence,Age of household reference person (HRP)1,25-34,Percentage,People
9,0.458,,Some confidence,Age of household reference person (HRP)1,25-34,Percentage,People


In [12]:
new_table.columns = ['Response' if x=='OBS' else x for x in new_table.columns]

In [13]:
new_table = new_table[new_table['Response'] != '']

In [14]:
new_table.dtypes

Response               object
DATAMARKER             object
Level of Confidence    object
Category               object
Sect                   object
Measure Type           object
Unit                   object
dtype: object

In [15]:
new_table.tail(5)

Unnamed: 0,Response,DATAMARKER,Level of Confidence,Category,Sect,Measure Type,Unit
131,588.0,,Unweighted base,Area type,All urban,Percentage,People
132,0.093,,Total or a lot of confidence,Area type,Rural,Percentage,People
133,0.439,,Some confidence,Area type,Rural,Percentage,People
134,0.467,,Little or no confidence,Area type,Rural,Percentage,People
135,329.0,,Unweighted base,Area type,Rural,Percentage,People


In [16]:
new_table.head()

Unnamed: 0,Response,DATAMARKER,Level of Confidence,Category,Sect,Measure Type,Unit
0,0.094,,Total or a lot of confidence,,All adults,Percentage,People
1,0.434,,Some confidence,,All adults,Percentage,People
2,0.472,,Little or no confidence,,All adults,Percentage,People
3,917.0,,Unweighted base,,All adults,Percentage,People
7,65.0,,Unweighted base,Age of household reference person (HRP)1,16-24,Percentage,People


In [17]:
new_table['Response'] = pd.to_numeric(new_table['Response'], errors='coerce')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [18]:
new_table['Response'] = new_table['Response'].apply(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [19]:
def user_perc(x,y):
    
    if str(x) == 'Unweighted base':
        return 'Number'
    else:
        return y
    
new_table['Measure Type'] = new_table.apply(lambda row: user_perc(row['Level of Confidence'],row['Measure Type']), axis = 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [20]:
new_table['Category'].fillna('All', inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._update_inplace(new_data)


In [21]:
new_table['Demographic'] = new_table['Category'] + '/' + new_table['Sect']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [22]:
new_table = new_table[['Demographic','Level of Confidence','Measure Type','Response','Unit']]

In [23]:
new_table.head(5)

Unnamed: 0,Demographic,Level of Confidence,Measure Type,Response,Unit
0,All/All adults,Total or a lot of confidence,Percentage,0.094,People
1,All/All adults,Some confidence,Percentage,0.434,People
2,All/All adults,Little or no confidence,Percentage,0.472,People
3,All/All adults,Unweighted base,Number,917.0,People
7,Age of household reference person (HRP)1/16-24,Unweighted base,Number,65.0,People


In [24]:
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

new_table.to_csv(destinationFolder / ('taba14.csv'), index = False)