Table A15: Top three harms caused by alcohol and/or drug related issues in the local area (%)

In [1]:
from databaker.framework import *
import pandas as pd 

In [2]:
import requests
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache
from cachecontrol.heuristics import LastModified
from pathlib import Path

session = CacheControl(requests.Session(),
                       cache=FileCache('.cache'),
                       heuristic=LastModified())

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

inputURL = 'https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx'
inputFile = sourceFolder / 'ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx'
response = session.get(inputURL)
with open(inputFile, 'wb') as f:
  f.write(response.content)

https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx

In [3]:
tab = loadxlstabs(inputFile, sheetids='Table A15')[0]

Loading in\ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx which has size 191159 bytes
Table names: ['Table A15']


In [4]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()

In [5]:
observations

{<C8 0.128>, <D9 0.196>, <C14 0.002>, <C7 0.332>, <D13 0.018>, <D16 658.0>, <B10 0.027>, <D6 0.175>, <B12 0.005>, <C13 0.023>, <C12 0.026>, <B11 0.013>, <C19 ' '>, <C11 0.026>, <C5 0.159>, <B9 0.036>, <B14 0.002>, <C10 0.042>, <B16 773.0>, <D12 0.054>, <D5 0.096>, <B7 0.124>, <C16 720.0>, <B13 0.003>, <C9 0.079>, <D11 0.035>, <D7 0.142>, <D14 0.007>, <B5 0.571>, <D8 0.206>, <C6 0.185>, <B6 0.173>, <B8 0.046>, <D10 0.072>}

In [6]:
Lvh = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
Lvh

{<C3 'Secondary harm'>, <D3 'Tertiary harm'>, <B3 'Primary harm'>}

In [7]:
tph = tab.excel_ref('A5').expand(DOWN).is_not_blank()
tph

{<A6 "Damage to people's health">, <A13 'Loss of trade for businesses'>, <A10 'Less money available for public services'>, <A9 'Violence in my local area'>, <A12 'Loss of confidence in law enforcement agencies'>, <A19 '1. Results exclude "don\'t know" and refusals.'>, <A11 'Lack of investment by businesses'>, <A14 'Other'>, <A8 'Fear in my local area'>, <A16 'Unweighted base'>, <A5 'Anti-social behaviour'>, <A7 'Crime in my local area'>, <A18 'Source: Northern Ireland Omnibus Survey, October 2016'>}

In [8]:
Dimensions = [
            HDim(Lvh,'Level of harm',DIRECTLY,ABOVE),
            HDim(tph,'Type of harm',DIRECTLY,LEFT),
            HDimConst('Measure Type', 'Percentage'),
            HDimConst('Unit','People'),
            ]

In [9]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# savepreviewhtml(c1)

In [10]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,DATAMARKER,Level of harm,Type of harm,Measure Type,Unit
0,0.571,,Primary harm,Anti-social behaviour,Percentage,People
1,0.159,,Secondary harm,Anti-social behaviour,Percentage,People
2,0.096,,Tertiary harm,Anti-social behaviour,Percentage,People
3,0.173,,Primary harm,Damage to people's health,Percentage,People
4,0.185,,Secondary harm,Damage to people's health,Percentage,People
5,0.175,,Tertiary harm,Damage to people's health,Percentage,People
6,0.124,,Primary harm,Crime in my local area,Percentage,People
7,0.332,,Secondary harm,Crime in my local area,Percentage,People
8,0.142,,Tertiary harm,Crime in my local area,Percentage,People
9,0.046,,Primary harm,Fear in my local area,Percentage,People


In [11]:
new_table.columns = ['Response' if x=='OBS' else x for x in new_table.columns]

In [12]:
new_table.dtypes

Response         object
DATAMARKER       object
Level of harm    object
Type of harm     object
Measure Type     object
Unit             object
dtype: object

In [13]:
new_table.tail(5)

Unnamed: 0,Response,DATAMARKER,Level of harm,Type of harm,Measure Type,Unit
29,0.007,,Tertiary harm,Other,Percentage,People
30,773.0,,Primary harm,Unweighted base,Percentage,People
31,720.0,,Secondary harm,Unweighted base,Percentage,People
32,658.0,,Tertiary harm,Unweighted base,Percentage,People
33,,,Secondary harm,"1. Results exclude ""don't know"" and refusals.",Percentage,People


In [14]:
new_table.head()

Unnamed: 0,Response,DATAMARKER,Level of harm,Type of harm,Measure Type,Unit
0,0.571,,Primary harm,Anti-social behaviour,Percentage,People
1,0.159,,Secondary harm,Anti-social behaviour,Percentage,People
2,0.096,,Tertiary harm,Anti-social behaviour,Percentage,People
3,0.173,,Primary harm,Damage to people's health,Percentage,People
4,0.185,,Secondary harm,Damage to people's health,Percentage,People


In [15]:
new_table['Response'] = pd.to_numeric(new_table['Response'], errors='coerce')

In [16]:
new_table['Response'] = new_table['Response'].apply(str)

In [17]:
def user_perc(x,y):
    
    if str(x) == 'Unweighted base':
        return 'Number'
    else:
        return y
    
new_table['Measure Type'] = new_table.apply(lambda row: user_perc(row['Type of harm'],row['Measure Type']), axis = 1)

In [18]:
new_table = new_table[['Level of harm','Type of harm','Measure Type','Response','Unit']]

In [19]:
new_table.head(5)

Unnamed: 0,Level of harm,Type of harm,Measure Type,Response,Unit
0,Primary harm,Anti-social behaviour,Percentage,0.571,People
1,Secondary harm,Anti-social behaviour,Percentage,0.159,People
2,Tertiary harm,Anti-social behaviour,Percentage,0.096,People
3,Primary harm,Damage to people's health,Percentage,0.173,People
4,Secondary harm,Damage to people's health,Percentage,0.185,People


In [20]:
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

new_table.to_csv(destinationFolder / ('taba15.csv'), index = False)