Table A6: Alcohol related issues in the local area affecting the respondent or their family

In [1]:
from databaker.framework import *
import pandas as pd 

In [2]:
import requests
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache
from cachecontrol.heuristics import LastModified
from pathlib import Path

session = CacheControl(requests.Session(),
                       cache=FileCache('.cache'),
                       heuristic=LastModified())

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

inputURL = 'https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx'
inputFile = sourceFolder / 'ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx'
response = session.get(inputURL)
with open(inputFile, 'wb') as f:
  f.write(response.content)

https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx

In [3]:
tab = loadxlstabs(inputFile, sheetids='Table A6')[0]

Loading in\ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx which has size 191159 bytes
Table names: ['Table A6']


In [4]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()

In [5]:
observations

{<B20 0.034>, <B10 0.236>, <B19 0.02>, <B22 85.0>, <B17 0.047>, <B12 0.142>, <B13 0.122>, <B9 0.236>, <B14 0.108>, <B15 0.081>, <B5 0.493>, <B11 0.176>, <B16 0.061>, <B6 0.392>, <B7 0.324>, <B18 0.034>, <B8 0.291>}

In [6]:
representatives = tab.excel_ref('A5').expand(DOWN).is_not_blank()
representatives

{<A18 'Drink-driving'>, <A28 '3. Caution should be exercised when interpreting percentages in this table, due to small numbers or respondents.'>, <A26 '2. Respondents were asked to select from a list of alcohol related issues respondent or their family have been'>, <A13 'Indecent exposure/urinating in street'>, <A14 'Theft'>, <A15 'Underage sales of alcohol'>, <A22 'Unweighted base'>, <A16 'Joy riding'>, <A20 'Other'>, <A5 'Rowdy and drunken behaviour'>, <A11 'Alcohol use/abuse'>, <A7 'Personal safety'>, <A8 'Vandalism'>, <A6 'General disturbance'>, <A9 'Drinking in public places'>, <A24 'Source: Northern Ireland Omnibus Survey, October 2016'>, <A12 'Litter'>, <A27 'affected by in their local area, more than one option could be selected.'>, <A10 'Underage drinking'>, <A17 'Spiking of drinks'>, <A25 '1. Results exclude "don\'t know" and refusals.'>, <A19 'Unwanted sexual contact'>}

In [7]:
Dimensions = [
            HDim(representatives,'Alcohol related issue',DIRECTLY,LEFT),
            HDimConst('Measure Type', 'Percentage'),
            HDimConst('Unit','People')
            ]

In [8]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# savepreviewhtml(c1)

In [9]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Alcohol related issue,Measure Type,Unit
0,0.493,Rowdy and drunken behaviour,Percentage,People
1,0.392,General disturbance,Percentage,People
2,0.324,Personal safety,Percentage,People
3,0.291,Vandalism,Percentage,People
4,0.236,Drinking in public places,Percentage,People
5,0.236,Underage drinking,Percentage,People
6,0.176,Alcohol use/abuse,Percentage,People
7,0.142,Litter,Percentage,People
8,0.122,Indecent exposure/urinating in street,Percentage,People
9,0.108,Theft,Percentage,People


In [10]:
new_table.columns = ['Response' if x=='OBS' else x for x in new_table.columns]

In [11]:
new_table.dtypes

Response                 float64
Alcohol related issue     object
Measure Type              object
Unit                      object
dtype: object

In [12]:
new_table.tail(5)

Unnamed: 0,Response,Alcohol related issue,Measure Type,Unit
12,0.047,Spiking of drinks,Percentage,People
13,0.034,Drink-driving,Percentage,People
14,0.02,Unwanted sexual contact,Percentage,People
15,0.034,Other,Percentage,People
16,85.0,Unweighted base,Percentage,People


In [13]:
new_table.head()

Unnamed: 0,Response,Alcohol related issue,Measure Type,Unit
0,0.493,Rowdy and drunken behaviour,Percentage,People
1,0.392,General disturbance,Percentage,People
2,0.324,Personal safety,Percentage,People
3,0.291,Vandalism,Percentage,People
4,0.236,Drinking in public places,Percentage,People


In [14]:
new_table['Response'] = pd.to_numeric(new_table['Response'], errors='coerce')

In [15]:
new_table['Response'] = new_table['Response'].apply(str)

In [16]:
def user_perc(x,y):
    
    if str(x) == 'Unweighted base':
        return 'Number'
    else:
        return y
    
new_table['Measure Type'] = new_table.apply(lambda row: user_perc(row['Alcohol related issue'],row['Measure Type']), axis = 1)

In [17]:
new_table = new_table[['Alcohol related issue','Measure Type','Response','Unit']]

In [18]:
new_table.head(5)

Unnamed: 0,Alcohol related issue,Measure Type,Response,Unit
0,Rowdy and drunken behaviour,Percentage,0.493,People
1,General disturbance,Percentage,0.392,People
2,Personal safety,Percentage,0.324,People
3,Vandalism,Percentage,0.291,People
4,Drinking in public places,Percentage,0.236,People


In [19]:
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

new_table.to_csv(destinationFolder / ('taba6.csv'), index = False)