Table A13: Confidence in work to tackle alcohol and/or drug related issues in Northern Ireland

In [1]:
from databaker.framework import *
import pandas as pd 

In [2]:
import requests
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache
from cachecontrol.heuristics import LastModified
from pathlib import Path

session = CacheControl(requests.Session(),
                       cache=FileCache('.cache'),
                       heuristic=LastModified())

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

inputURL = 'https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx'
inputFile = sourceFolder / 'ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx'
response = session.get(inputURL)
with open(inputFile, 'wb') as f:
  f.write(response.content)

https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx

In [3]:
tab = loadxlstabs(inputFile, sheetids='Table A13')[0]

Loading in\ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx which has size 191159 bytes
Table names: ['Table A13']


In [4]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()

In [5]:
observations

{<E9 924.0>, <C11 0.497>, <B10 0.113>, <D10 0.446>, <D11 0.326>, <C10 0.441>, <E10 887.0>, <C9 0.499>, <C12 0.343>, <E5 917.0>, <B11 0.177>, <D5 0.472>, <E8 914.0>, <B9 0.259>, <B5 0.094>, <D8 0.422>, <C5 0.434>, <D12 0.583>, <B12 0.074>, <C8 0.461>, <D9 0.241>, <E11 823.0>, <B8 0.118>, <E12 786.0>}

In [6]:
Confidenceinwork = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
Confidenceinwork

{<D3 'Little or no confidence'>, <E3 'Unweighted base'>, <C3 'Some confidence'>, <B3 'Total or a lot of confidence'>}

In [7]:
publicbody = tab.excel_ref('A5').expand(DOWN).is_not_blank()
publicbody

{<A8 'Gvt Depts'>, <A5 'Overall'>, <A16 '1. Results exclude "don\'t know" and refusals.'>, <A9 'PSNI'>, <A12 'NIHE / HA'>, <A10 'Local Council'>, <A11 'PCSP'>, <A15 'Source: Northern Ireland Omnibus Survey, October 2016'>}

In [8]:
Dimensions = [
            HDim(Confidenceinwork,'Level of Confidence',DIRECTLY,ABOVE),
            HDim(publicbody,'Public Body',DIRECTLY,LEFT),
            HDimConst('Measure Type', 'Percentage'),
            HDimConst('Unit','People'),
            ]

In [9]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# savepreviewhtml(c1)

In [10]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Level of Confidence,Public Body,Measure Type,Unit
0,0.094,Total or a lot of confidence,Overall,Percentage,People
1,0.434,Some confidence,Overall,Percentage,People
2,0.472,Little or no confidence,Overall,Percentage,People
3,917.0,Unweighted base,Overall,Percentage,People
4,0.118,Total or a lot of confidence,Gvt Depts,Percentage,People
5,0.461,Some confidence,Gvt Depts,Percentage,People
6,0.422,Little or no confidence,Gvt Depts,Percentage,People
7,914.0,Unweighted base,Gvt Depts,Percentage,People
8,0.259,Total or a lot of confidence,PSNI,Percentage,People
9,0.499,Some confidence,PSNI,Percentage,People


In [11]:
new_table.columns = ['Response' if x=='OBS' else x for x in new_table.columns]

In [12]:
new_table.dtypes

Response               float64
Level of Confidence     object
Public Body             object
Measure Type            object
Unit                    object
dtype: object

In [13]:
new_table.tail(5)

Unnamed: 0,Response,Level of Confidence,Public Body,Measure Type,Unit
19,823.0,Unweighted base,PCSP,Percentage,People
20,0.074,Total or a lot of confidence,NIHE / HA,Percentage,People
21,0.343,Some confidence,NIHE / HA,Percentage,People
22,0.583,Little or no confidence,NIHE / HA,Percentage,People
23,786.0,Unweighted base,NIHE / HA,Percentage,People


In [14]:
new_table.head()

Unnamed: 0,Response,Level of Confidence,Public Body,Measure Type,Unit
0,0.094,Total or a lot of confidence,Overall,Percentage,People
1,0.434,Some confidence,Overall,Percentage,People
2,0.472,Little or no confidence,Overall,Percentage,People
3,917.0,Unweighted base,Overall,Percentage,People
4,0.118,Total or a lot of confidence,Gvt Depts,Percentage,People


In [15]:
new_table['Response'] = pd.to_numeric(new_table['Response'], errors='coerce')

In [16]:
new_table['Response'] = new_table['Response'].apply(str)

In [17]:
def user_perc(x,y):
    
    if str(x) == 'Unweighted base':
        return 'Number'
    else:
        return y
    
new_table['Measure Type'] = new_table.apply(lambda row: user_perc(row['Level of Confidence'],row['Measure Type']), axis = 1)

In [18]:
new_table = new_table[['Public Body','Level of Confidence','Measure Type','Response','Unit']]

In [19]:
new_table.head(5)

Unnamed: 0,Public Body,Level of Confidence,Measure Type,Response,Unit
0,Overall,Total or a lot of confidence,Percentage,0.094,People
1,Overall,Some confidence,Percentage,0.434,People
2,Overall,Little or no confidence,Percentage,0.472,People
3,Overall,Unweighted base,Number,917.0,People
4,Gvt Depts,Total or a lot of confidence,Percentage,0.118,People


In [20]:
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

new_table.to_csv(destinationFolder / ('taba13.csv'), index = False)