Table A4: Bodies/Representatives1 approached in the last year about an alcohol related issue in the local area

In [1]:
from databaker.framework import *
import pandas as pd 

In [2]:
import requests
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache
from cachecontrol.heuristics import LastModified
from pathlib import Path

session = CacheControl(requests.Session(),
                       cache=FileCache('.cache'),
                       heuristic=LastModified())

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

inputURL = 'https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx'
inputFile = sourceFolder / 'ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx'
response = session.get(inputURL)
with open(inputFile, 'wb') as f:
  f.write(response.content)

https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx

In [3]:
tab = loadxlstabs(inputFile, sheetids='Table A4')[0]

Loading in\ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx which has size 191159 bytes
Table names: ['Table A4']


In [4]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()

In [5]:
observations

{<B15 95.0>, <B12 0.029>, <B10 0.109>, <B13 0.063>, <B9 0.121>, <B5 0.649>, <B11 0.069>, <B6 0.184>, <B7 0.144>, <B8 0.126>}

In [6]:
Representatives1 = tab.excel_ref('A5').expand(DOWN).is_not_blank()
Representatives1

{<A18 '1. Results exclude "don\'t know", "none" and refusals.'>, <A21 '3. Caution should be exercised when interpreting percentages in this table, due to small numbers or respondents.'>, <A10 'Policing and Community Safety Partnership'>, <A7 'Local community representative'>, <A20 'their local area, more than one option could be selected.'>, <A11 'Borough/City/District Council'>, <A19 '2. Respondents were asked to select from a list of bodies/representatives they approached about an alcohol related issue in '>, <A8 'Local MLA'>, <A9 'NIHE / Housing Association'>, <A5 'Police'>, <A12 'Government Department'>, <A6 'Local councillor'>, <A17 'Source: Northern Ireland Omnibus Survey, October 2016'>, <A15 'Unweighted base'>, <A13 'Other'>}

In [7]:
Dimensions = [
            HDim(Representatives1,'Representatives1',DIRECTLY,LEFT),
            HDimConst('Measure Type', 'Percent'),
            HDimConst('Unit','People')
            ]

In [8]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# savepreviewhtml(c1)

In [9]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Representatives1,Measure Type,Unit
0,0.649,Police,Percent,People
1,0.184,Local councillor,Percent,People
2,0.144,Local community representative,Percent,People
3,0.126,Local MLA,Percent,People
4,0.121,NIHE / Housing Association,Percent,People
5,0.109,Policing and Community Safety Partnership,Percent,People
6,0.069,Borough/City/District Council,Percent,People
7,0.029,Government Department,Percent,People
8,0.063,Other,Percent,People
9,95.0,Unweighted base,Percent,People


In [10]:
new_table.columns = ['Response' if x=='OBS' else x for x in new_table.columns]

In [11]:
new_table.dtypes

Response            float64
Representatives1     object
Measure Type         object
Unit                 object
dtype: object

In [12]:
new_table.tail(5)

Unnamed: 0,Response,Representatives1,Measure Type,Unit
5,0.109,Policing and Community Safety Partnership,Percent,People
6,0.069,Borough/City/District Council,Percent,People
7,0.029,Government Department,Percent,People
8,0.063,Other,Percent,People
9,95.0,Unweighted base,Percent,People


In [13]:
new_table.head()

Unnamed: 0,Response,Representatives1,Measure Type,Unit
0,0.649,Police,Percent,People
1,0.184,Local councillor,Percent,People
2,0.144,Local community representative,Percent,People
3,0.126,Local MLA,Percent,People
4,0.121,NIHE / Housing Association,Percent,People


In [14]:
new_table['Response'] = pd.to_numeric(new_table['Response'], errors='coerce')

In [15]:
new_table['Response'] = new_table['Response'].apply(str)

In [16]:
def user_perc(x,y):
    
    if str(x) == 'Unweighted base':
        return 'Count'
    else:
        return y
    
new_table['Measure Type'] = new_table.apply(lambda row: user_perc(row['Representatives1'],row['Measure Type']), axis = 1)

In [17]:
import urllib.request as request
import csv
r = request.urlopen('https://raw.githubusercontent.com/ONS-OpenData/ref_alcohol/master/codelists/representatives.csv').read().decode('utf8').split("\n")
reader = csv.reader(r)
for line in reader:
    print(line)

['Label', 'Notation', 'Parent Notation', 'Sort Priority']
['Overall', 'overall', '', '1']
['Police', 'police', '', '2']
['Local councillor', 'councillor', '', '3']
['Local community representative', 'community', '', '4']
['Local MLA', 'mla', '', '5']
['NIHE / Housing Association', 'nihe', '', '6']
['Policing and Community Safety Partnership', 'pcsp', '', '7']
['Borough/City/District Council', 'borough', '', '8']
['Government Department', 'govt-dept', '', '9']
['Other', 'other', '', '10']
['Unweighted base', 'unweighted-base', '', '11']


In [18]:
import io
import requests
url="https://raw.githubusercontent.com/ONS-OpenData/ref_alcohol/master/codelists/representatives.csv"
s=requests.get(url).content
c=pd.read_csv(io.StringIO(s.decode('utf-8')))

In [19]:
Final_table = pd.merge(new_table, c, how = 'left', left_on = 'Representatives1', right_on = 'Label')

In [20]:
Final_table.columns = ['Representatives' if x=='Notation' else x for x in Final_table.columns]

In [21]:
Final_table = Final_table[['Representatives','Measure Type','Response','Unit']]

In [22]:
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

Final_table.to_csv(destinationFolder / ('table4.csv'), index = False)

In [23]:
Final_table

Unnamed: 0,Representatives,Measure Type,Response,Unit
0,police,Percent,0.649,People
1,councillor,Percent,0.184,People
2,community,Percent,0.144,People
3,mla,Percent,0.126,People
4,nihe,Percent,0.121,People
5,pcsp,Percent,0.109,People
6,borough,Percent,0.069,People
7,govt-dept,Percent,0.029,People
8,other,Percent,0.063,People
9,unweighted-base,Count,95.0,People
