Table A4: Bodies/Representatives approached in the last year about an alcohol related issue in the local area

In [1]:
from databaker.framework import *
import pandas as pd 

In [2]:
import requests
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache
from cachecontrol.heuristics import LastModified
from pathlib import Path

session = CacheControl(requests.Session(),
                       cache=FileCache('.cache'),
                       heuristic=LastModified())

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

inputURL = 'https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx'
inputFile = sourceFolder / 'ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx'
response = session.get(inputURL)
with open(inputFile, 'wb') as f:
  f.write(response.content)

https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx

In [3]:
tab = loadxlstabs(inputFile, sheetids='Table A4')[0]

Loading in\ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx which has size 191159 bytes
Table names: ['Table A4']


In [4]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()

In [5]:
observations

{<B13 0.063>, <B11 0.069>, <B7 0.144>, <B8 0.126>, <B9 0.121>, <B15 95.0>, <B12 0.029>, <B5 0.649>, <B10 0.109>, <B6 0.184>}

In [6]:
representatives = tab.excel_ref('A5').expand(DOWN).is_not_blank()
representatives

{<A21 '3. Caution should be exercised when interpreting percentages in this table, due to small numbers or respondents.'>, <A11 'Borough/City/District Council'>, <A15 'Unweighted base'>, <A13 'Other'>, <A9 'NIHE / Housing Association'>, <A12 'Government Department'>, <A7 'Local community representative'>, <A18 '1. Results exclude "don\'t know", "none" and refusals.'>, <A10 'Policing and Community Safety Partnership'>, <A17 'Source: Northern Ireland Omnibus Survey, October 2016'>, <A8 'Local MLA'>, <A20 'their local area, more than one option could be selected.'>, <A5 'Police'>, <A6 'Local councillor'>, <A19 '2. Respondents were asked to select from a list of bodies/representatives they approached about an alcohol related issue in '>}

In [7]:
Dimensions = [
            HDim(representatives,'Representatives',DIRECTLY,LEFT),
            HDimConst('Measure Type', 'Percentage'),
            HDimConst('Unit','People')
            ]

In [8]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# savepreviewhtml(c1)

In [9]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Representatives,Measure Type,Unit
0,0.649,Police,Percentage,People
1,0.184,Local councillor,Percentage,People
2,0.144,Local community representative,Percentage,People
3,0.126,Local MLA,Percentage,People
4,0.121,NIHE / Housing Association,Percentage,People
5,0.109,Policing and Community Safety Partnership,Percentage,People
6,0.069,Borough/City/District Council,Percentage,People
7,0.029,Government Department,Percentage,People
8,0.063,Other,Percentage,People
9,95.0,Unweighted base,Percentage,People


In [10]:
new_table.columns = ['Response' if x=='OBS' else x for x in new_table.columns]

In [11]:
new_table.dtypes

Value              float64
Representatives     object
Measure Type        object
Unit                object
dtype: object

In [12]:
new_table.tail(5)

Unnamed: 0,Value,Representatives,Measure Type,Unit
5,0.109,Policing and Community Safety Partnership,Percentage,People
6,0.069,Borough/City/District Council,Percentage,People
7,0.029,Government Department,Percentage,People
8,0.063,Other,Percentage,People
9,95.0,Unweighted base,Percentage,People


In [13]:
new_table.head()

Unnamed: 0,Value,Representatives,Measure Type,Unit
0,0.649,Police,Percentage,People
1,0.184,Local councillor,Percentage,People
2,0.144,Local community representative,Percentage,People
3,0.126,Local MLA,Percentage,People
4,0.121,NIHE / Housing Association,Percentage,People


In [14]:
new_table['Response'] = pd.to_numeric(new_table['Response'], errors='coerce')

In [15]:
new_table['Response'] = new_table['Response'].apply(str)

In [16]:
def user_perc(x,y):
    
    if str(x) == 'Unweighted base':
        return 'Number'
    else:
        return y
    
new_table['Measure Type'] = new_table.apply(lambda row: user_perc(row['Representatives'],row['Measure Type']), axis = 1)

In [17]:
new_table = new_table[['Representatives','Measure Type','Response','Unit']]

In [18]:
new_table.head(5)

Unnamed: 0,Representatives,Measure Type,Value,Unit
0,Police,Percentage,0.649,People
1,Local councillor,Percentage,0.184,People
2,Local community representative,Percentage,0.144,People
3,Local MLA,Percentage,0.126,People
4,NIHE / Housing Association,Percentage,0.121,People


In [19]:
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

new_table.to_csv(destinationFolder / ('taba4.csv'), index = False)