Table A2: Main alcohol related issues in the local area (%)

In [1]:
from databaker.framework import *
import pandas as pd 

In [2]:
import requests
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache
from cachecontrol.heuristics import LastModified
from pathlib import Path

session = CacheControl(requests.Session(),
                       cache=FileCache('.cache'),
                       heuristic=LastModified())

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

inputURL = 'https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx'
inputFile = sourceFolder / 'ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx'
response = session.get(inputURL)
with open(inputFile, 'wb') as f:
  f.write(response.content)

https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx

In [3]:
tab = loadxlstabs(inputFile, sheetids='Table A2')[0]

Loading in\ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx which has size 191159 bytes
Table names: ['Table A2']


In [4]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()

In [5]:
observations

{<F19 0.035>, <E15 0.042>, <E7 0.134>, <B13 0.011>, <D10 0.133>, <E17 0.038>, <E10 0.119>, <D9 0.104>, <B17 0.007>, <C9 0.088>, <E9 0.116>, <D5 0.029>, <D15 0.011>, <C11 0.019>, <F13 0.122>, <D7 0.156>, <B5 0.591>, <E19 0.006>, <F9 0.104>, <C19 0.003>, <B9 0.049>, <E11 0.078>, <E8 0.03>, <F10 0.097>, <C10 0.044>, <D14 0.04>, <B7 0.091>, <E14 0.083>, <E5 0.041>, <B11 0.015>, <F7 0.037>, <D17 0.046>, <B8 0.076>, <F14 0.107>, <C15 0.015>, <E18 0.047>, <F17 0.07>, <B14 0.008>, <B20 0.0>, <F12 0.05>, <F5 0.029>, <D11 0.039>, <F8 0.03>, <D12 0.04>, <F15 0.045>, <C22 405.0>, <C5 0.066>, <D18 0.017>, <C8 0.331>, <C20 0.0>, <F16 0.054>, <D22 388.0>, <C14 0.011>, <E20 0.0>, <C18 0.008>, <B16 0.007>, <B10 0.021>, <B22 415.0>, <F20 0.0>, <F22 329.0>, <D13 0.079>, <F11 0.111>, <D19 0.009>, <C7 0.126>, <B15 0.008>, <D20 0.003>, <D8 0.023>, <E12 0.059>, <C13 0.034>, <B6 0.099>, <F6 0.057>, <E16 0.063>, <E6 0.047>, <B12 0.015>, <C12 0.033>, <E13 0.101>, <C16 0.015>, <E22 369.0>, <B18 0.003>, <B19 0.0>

In [6]:
Alcoholissue = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
Alcoholissue

{<E3 'Quaternary issue'>, <C3 'Secondary issue'>, <B3 'Primary issue'>, <F3 'Quinary issue'>, <D3 'Tertiary issue'>}

In [7]:
Alcoholrelatedissue = tab.excel_ref('A5').expand(DOWN).is_not_blank()
Alcoholrelatedissue

{<A15 'Joy riding'>, <A7 'Drink-driving'>, <A20 'Other'>, <A13 'Vandalism'>, <A11 'Personal safety'>, <A8 'Drinking in public places'>, <A22 'Unweighted base'>, <A10 'Underage sales of alcohol'>, <A5 'Underage drinking'>, <A17 'Spiking of drinks'>, <A14 'General disturbance'>, <A16 'Litter'>, <A25 '1. Results exclude "don\'t know" and refusals.'>, <A6 'Rowdy and drunken behaviour'>, <A18 'Indecent exposure/urinating in street'>, <A24 'Source: Northern Ireland Omnibus Survey, October 2016'>, <A9 'Alcohol use/abuse'>, <A19 'Unwanted sexual contact'>, <A12 'Theft'>}

In [8]:
Dimensions = [
            HDim(Alcoholissue,'Alcohol issue',DIRECTLY,ABOVE),
            HDim(Alcoholrelatedissue,'Alcohol related issue',DIRECTLY,LEFT),
            HDimConst('Measure Type', 'Percentage'),
            HDimConst('Unit','People')
            ]

In [9]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# savepreviewhtml(c1)

In [10]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Alcohol issue,Alcohol related issue,Measure Type,Unit
0,0.591,Primary issue,Underage drinking,Percentage,People
1,0.066,Secondary issue,Underage drinking,Percentage,People
2,0.029,Tertiary issue,Underage drinking,Percentage,People
3,0.041,Quaternary issue,Underage drinking,Percentage,People
4,0.029,Quinary issue,Underage drinking,Percentage,People
5,0.099,Primary issue,Rowdy and drunken behaviour,Percentage,People
6,0.187,Secondary issue,Rowdy and drunken behaviour,Percentage,People
7,0.230,Tertiary issue,Rowdy and drunken behaviour,Percentage,People
8,0.047,Quaternary issue,Rowdy and drunken behaviour,Percentage,People
9,0.057,Quinary issue,Rowdy and drunken behaviour,Percentage,People


In [11]:
new_table.columns = ['Response' if x=='OBS' else x for x in new_table.columns]

In [12]:
new_table.dtypes

Response                 float64
Alcohol issue             object
Alcohol related issue     object
Measure Type              object
Unit                      object
dtype: object

In [13]:
new_table.tail(5)

Unnamed: 0,Response,Alcohol issue,Alcohol related issue,Measure Type,Unit
80,415.0,Primary issue,Unweighted base,Percentage,People
81,405.0,Secondary issue,Unweighted base,Percentage,People
82,388.0,Tertiary issue,Unweighted base,Percentage,People
83,369.0,Quaternary issue,Unweighted base,Percentage,People
84,329.0,Quinary issue,Unweighted base,Percentage,People


In [14]:
new_table.head()

Unnamed: 0,Response,Alcohol issue,Alcohol related issue,Measure Type,Unit
0,0.591,Primary issue,Underage drinking,Percentage,People
1,0.066,Secondary issue,Underage drinking,Percentage,People
2,0.029,Tertiary issue,Underage drinking,Percentage,People
3,0.041,Quaternary issue,Underage drinking,Percentage,People
4,0.029,Quinary issue,Underage drinking,Percentage,People


In [15]:
new_table['Response'] = pd.to_numeric(new_table['Response'], errors='coerce')

In [16]:
new_table['Response'] = new_table['Response'].apply(str)

In [17]:
def user_perc(x,y):
    
    if str(x) == 'Unweighted base':
        return 'Number'
    else:
        return y
    
new_table['Measure Type'] = new_table.apply(lambda row: user_perc(row['Alcohol related issue'],row['Measure Type']), axis = 1)

In [18]:
new_table = new_table[['Alcohol related issue','Alcohol issue','Measure Type','Response','Unit']]

In [19]:
new_table.head(5)

Unnamed: 0,Alcohol related issue,Alcohol issue,Measure Type,Response,Unit
0,Underage drinking,Primary issue,Percentage,0.591,People
1,Underage drinking,Secondary issue,Percentage,0.066,People
2,Underage drinking,Tertiary issue,Percentage,0.029,People
3,Underage drinking,Quaternary issue,Percentage,0.041,People
4,Underage drinking,Quinary issue,Percentage,0.029,People


In [20]:
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

new_table.to_csv(destinationFolder / ('taba2.csv'), index = False)