Table A6: Alcohol related issue1s in the local area affecting the respondent or their family

In [1]:
from databaker.framework import *
import pandas as pd 

In [2]:
import requests
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache
from cachecontrol.heuristics import LastModified
from pathlib import Path

session = CacheControl(requests.Session(),
                       cache=FileCache('.cache'),
                       heuristic=LastModified())

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

inputURL = 'https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx'
inputFile = sourceFolder / 'ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx'
Value = session.get(inputURL)
with open(inputFile, 'wb') as f:
  f.write(Value.content)

https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx

In [3]:
tab = loadxlstabs(inputFile, sheetids='Table A6')[0]

Loading in\ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx which has size 191159 bytes
Table names: ['Table A6']


In [4]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()

In [5]:
observations

{<B13 0.122>, <B18 0.034>, <B14 0.108>, <B22 85.0>, <B11 0.176>, <B7 0.324>, <B12 0.142>, <B8 0.291>, <B9 0.236>, <B15 0.081>, <B20 0.034>, <B5 0.493>, <B10 0.236>, <B19 0.02>, <B16 0.061>, <B17 0.047>, <B6 0.392>}

In [6]:
representatives = tab.excel_ref('A5').expand(DOWN).is_not_blank()
representatives

{<A27 'affected by in their local area, more than one option could be selected.'>, <A9 'Drinking in public places'>, <A8 'Vandalism'>, <A17 'Spiking of drinks'>, <A12 'Litter'>, <A25 '1. Results exclude "don\'t know" and refusals.'>, <A22 'Unweighted base'>, <A6 'General disturbance'>, <A24 'Source: Northern Ireland Omnibus Survey, October 2016'>, <A14 'Theft'>, <A11 'Alcohol use/abuse'>, <A18 'Drink-driving'>, <A10 'Underage drinking'>, <A7 'Personal safety'>, <A20 'Other'>, <A16 'Joy riding'>, <A19 'Unwanted sexual contact'>, <A28 '3. Caution should be exercised when interpreting percentages in this table, due to small numbers or respondents.'>, <A5 'Rowdy and drunken behaviour'>, <A13 'Indecent exposure/urinating in street'>, <A15 'Underage sales of alcohol'>, <A26 '2. Respondents were asked to select from a list of alcohol related issues respondent or their family have been'>}

In [7]:
Dimensions = [
            HDim(representatives,'Alcohol related issue1',DIRECTLY,LEFT),
            HDimConst('Measure Type', 'Percent'),
            HDimConst('Unit','People')
            ]

In [8]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# savepreviewhtml(c1)

In [9]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Alcohol related issue1,Measure Type,Unit
0,0.493,Rowdy and drunken behaviour,Percent,People
1,0.392,General disturbance,Percent,People
2,0.324,Personal safety,Percent,People
3,0.291,Vandalism,Percent,People
4,0.236,Drinking in public places,Percent,People
5,0.236,Underage drinking,Percent,People
6,0.176,Alcohol use/abuse,Percent,People
7,0.142,Litter,Percent,People
8,0.122,Indecent exposure/urinating in street,Percent,People
9,0.108,Theft,Percent,People


In [10]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [11]:
new_table['Value'] = pd.to_numeric(new_table['Value'], errors='coerce')

In [12]:
new_table['Value'] = new_table['Value'].apply(str)

In [13]:
def user_perc(x,y):
    
    if str(x) == 'Unweighted base':
        return 'Count'
    else:
        return y
    
new_table['Measure Type'] = new_table.apply(lambda row: user_perc(row['Alcohol related issue1'],row['Measure Type']), axis = 1)

In [14]:
import urllib.request as request
import csv
r = request.urlopen('https://raw.githubusercontent.com/ONS-OpenData/ref_alcohol/master/codelists/alcohol-related-issue.csv').read().decode('utf8').split("\n")
reader = csv.reader(r)
for line in reader:
    print(line)

['Label', 'Notation', 'Parent Notation', 'Sort Priority']
['Underage drinking', 'underage-drinking', '', '1']
['Rowdy and drunken behaviour', 'rowdy-behaviour', '', '2']
['Drink-driving', 'drink-driving', '', '3']
['Drinking in public places', 'drinking-in-public', '', '4']
['Alcohol use/abuse', 'alcohol-use', '', '5']
['Underage sales of alcohol', 'underage-alcohol-sales', '', '6']
['Personal safety', 'personal-safety', '', '7']
['Theft', 'theft', '', '8']
['Vandalism', 'vandalism', '', '9']
['General disturbance', 'disturbance', '', '10']
['Joy riding', 'joy-riding', '', '11']
['Litter', 'litter', '', '12']
['Spiking of drinks', 'spiking-drinks', '', '13']
['Indecent exposure/urinating in street', 'indecent-exposure', '', '14']
['Unwanted sexual contact', 'unwanted-sexual-contact', '', '15']
['Other', 'other', '', '16']
['Unweighted base', 'unweighted-base', '', '17']


In [15]:
import io
import requests
url="https://raw.githubusercontent.com/ONS-OpenData/ref_alcohol/master/codelists/alcohol-related-issue.csv"
s=requests.get(url).content
c=pd.read_csv(io.StringIO(s.decode('utf-8')))

In [16]:
Final_table = pd.merge(new_table, c, how = 'left', left_on = 'Alcohol related issue1', right_on = 'Label')

In [17]:
Final_table.columns = ['Alcohol related issue' if x=='Notation' else x for x in Final_table.columns]

In [18]:
Final_table = Final_table[['Alcohol related issue','Measure Type','Value','Unit']]

In [19]:
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

Final_table.to_csv(destinationFolder / ('table6.csv'), index = False)