Table A5: Proportion of respondents affected as a result of alcohol related issues in the local area

In [1]:
from databaker.framework import *
import pandas as pd 

In [2]:
import requests
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache
from cachecontrol.heuristics import LastModified
from pathlib import Path

session = CacheControl(requests.Session(),
                       cache=FileCache('.cache'),
                       heuristic=LastModified())

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

inputURL = 'https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx'
inputFile = sourceFolder / 'ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx'
response = session.get(inputURL)
with open(inputFile, 'wb') as f:
  f.write(response.content)

https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx

In [3]:
tab = loadxlstabs(inputFile, sheetids='Table A5')[0]

Loading in\ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx which has size 191159 bytes
Table names: ['Table A5']


In [4]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()

In [5]:
observations

{<B5 0.086>, <B8 946.0>, <B6 0.914>}

In [6]:
representatives = tab.excel_ref('A5').expand(DOWN).is_not_blank()
representatives

{<A6 'No'>, <A5 'Yes'>, <A11 '1. Results exclude "don\'t know" and refusals.'>, <A10 'Source: Northern Ireland Omnibus Survey, October 2016'>, <A8 'Unweighted base'>}

In [7]:
Dimensions = [
            HDim(representatives,'Respondents affected',DIRECTLY,LEFT),
            HDimConst('Measure Type', 'Percentage'),
            HDimConst('Unit','People')
            ]

In [8]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# savepreviewhtml(c1)

In [9]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Respondents affected,Measure Type,Unit
0,0.086,Yes,Percentage,People
1,0.914,No,Percentage,People
2,946.0,Unweighted base,Percentage,People


In [10]:
new_table.columns = ['Response' if x=='OBS' else x for x in new_table.columns]

In [11]:
new_table.dtypes

Value                   float64
Respondents affected     object
Measure Type             object
Unit                     object
dtype: object

In [12]:
new_table.tail(5)

Unnamed: 0,Value,Respondents affected,Measure Type,Unit
0,0.086,Yes,Percentage,People
1,0.914,No,Percentage,People
2,946.0,Unweighted base,Percentage,People


In [13]:
new_table.head()

Unnamed: 0,Value,Respondents affected,Measure Type,Unit
0,0.086,Yes,Percentage,People
1,0.914,No,Percentage,People
2,946.0,Unweighted base,Percentage,People


In [14]:
new_table['Response'] = pd.to_numeric(new_table['Response'], errors='coerce')

In [15]:
new_table['Response'] = new_table['Response'].apply(str)

In [16]:
def user_perc(x,y):
    
    if str(x) == 'Unweighted base':
        return 'Number'
    else:
        return y
    
new_table['Measure Type'] = new_table.apply(lambda row: user_perc(row['Respondents affected'],row['Measure Type']), axis = 1)

In [17]:
new_table = new_table[['Respondents affected','Measure Type','Response','Unit']]

In [18]:
new_table.head(5)

Unnamed: 0,Respondents affected,Measure Type,Value,Unit
0,Yes,Percentage,0.086,People
1,No,Percentage,0.914,People
2,Unweighted base,Number,946.0,People


In [19]:
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

new_table.to_csv(destinationFolder / ('taba5.csv'), index = False)