Table A15: Top three harms caused by alcohol and/or drug related issues in the local area (%)

In [1]:
from databaker.framework import *
import pandas as pd 

In [2]:
import requests
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache
from cachecontrol.heuristics import LastModified
from pathlib import Path

session = CacheControl(requests.Session(),
                       cache=FileCache('.cache'),
                       heuristic=LastModified())

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

inputURL = 'https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx'
inputFile = sourceFolder / 'ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx'
Value = session.get(inputURL)
with open(inputFile, 'wb') as f:
  f.write(Value.content)

https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx

In [3]:
tab = loadxlstabs(inputFile, sheetids='Table A15')[0]

Loading in\ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx which has size 191159 bytes
Table names: ['Table A15']


In [4]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()

In [5]:
observations

{<D12 0.054>, <D5 0.096>, <B7 0.124>, <C16 720.0>, <B13 0.003>, <C19 ' '>, <C9 0.079>, <D11 0.035>, <D7 0.142>, <D14 0.007>, <B5 0.571>, <D8 0.206>, <C6 0.185>, <B6 0.173>, <B8 0.046>, <D10 0.072>, <C8 0.128>, <C14 0.002>, <D9 0.196>, <C7 0.332>, <D13 0.018>, <D16 658.0>, <B10 0.027>, <D6 0.175>, <B12 0.005>, <C13 0.023>, <C12 0.026>, <B11 0.013>, <C11 0.026>, <C5 0.159>, <B9 0.036>, <B14 0.002>, <C10 0.042>, <B16 773.0>}

In [6]:
Lvh = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
Lvh

{<C3 'Secondary harm'>, <B3 'Primary harm'>, <D3 'Tertiary harm'>}

In [7]:
tph = tab.excel_ref('A5').expand(DOWN).is_not_blank()
tph

{<A12 'Loss of confidence in law enforcement agencies'>, <A19 '1. Results exclude "don\'t know" and refusals.'>, <A5 'Anti-social behaviour'>, <A14 'Other'>, <A13 'Loss of trade for businesses'>, <A10 'Less money available for public services'>, <A7 'Crime in my local area'>, <A18 'Source: Northern Ireland Omnibus Survey, October 2016'>, <A16 'Unweighted base'>, <A9 'Violence in my local area'>, <A6 "Damage to people's health">, <A8 'Fear in my local area'>, <A11 'Lack of investment by businesses'>}

In [8]:
Dimensions = [
            HDim(Lvh,'Harm level',DIRECTLY,ABOVE),
            HDim(tph,'Harm type1',DIRECTLY,LEFT),
            HDimConst('Measure Type', 'Percent'),
            HDimConst('Unit','People'),
            ]

In [9]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# savepreviewhtml(c1)

In [10]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,DATAMARKER,Harm level,Harm type1,Measure Type,Unit
0,0.571,,Primary harm,Anti-social behaviour,Percent,People
1,0.159,,Secondary harm,Anti-social behaviour,Percent,People
2,0.096,,Tertiary harm,Anti-social behaviour,Percent,People
3,0.173,,Primary harm,Damage to people's health,Percent,People
4,0.185,,Secondary harm,Damage to people's health,Percent,People
5,0.175,,Tertiary harm,Damage to people's health,Percent,People
6,0.124,,Primary harm,Crime in my local area,Percent,People
7,0.332,,Secondary harm,Crime in my local area,Percent,People
8,0.142,,Tertiary harm,Crime in my local area,Percent,People
9,0.046,,Primary harm,Fear in my local area,Percent,People


In [11]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [12]:
new_table.dtypes

Value           object
DATAMARKER      object
Harm level      object
Harm type1      object
Measure Type    object
Unit            object
dtype: object

In [13]:
new_table.tail(5)

Unnamed: 0,Value,DATAMARKER,Harm level,Harm type1,Measure Type,Unit
29,0.007,,Tertiary harm,Other,Percent,People
30,773.0,,Primary harm,Unweighted base,Percent,People
31,720.0,,Secondary harm,Unweighted base,Percent,People
32,658.0,,Tertiary harm,Unweighted base,Percent,People
33,,,Secondary harm,"1. Results exclude ""don't know"" and refusals.",Percent,People


In [14]:
new_table.head()

Unnamed: 0,Value,DATAMARKER,Harm level,Harm type1,Measure Type,Unit
0,0.571,,Primary harm,Anti-social behaviour,Percent,People
1,0.159,,Secondary harm,Anti-social behaviour,Percent,People
2,0.096,,Tertiary harm,Anti-social behaviour,Percent,People
3,0.173,,Primary harm,Damage to people's health,Percent,People
4,0.185,,Secondary harm,Damage to people's health,Percent,People


In [15]:
new_table['Value'] = pd.to_numeric(new_table['Value'], errors='coerce')

In [16]:
new_table = new_table[new_table['Value'].notnull() ]

In [17]:
new_table['Value'] = new_table['Value'].apply(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [18]:
def user_perc(x,y):
    
    if str(x) == 'Unweighted base':
        return 'Count'
    else:
        return y
    
new_table['Measure Type'] = new_table.apply(lambda row: user_perc(row['Harm type1'],row['Measure Type']), axis = 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [19]:
import urllib.request as request
import csv
r = request.urlopen('https://raw.githubusercontent.com/ONS-OpenData/ref_alcohol/master/codelists/harm-type.csv').read().decode('utf8').split("\n")
reader = csv.reader(r)
for line in reader:
    print(line)

['Label', 'Notation', 'Parent Notation', 'Sort Priority']
['Anti-social behaviour', 'asb', '', '1']
["Damage to people's health", 'damage-to-health', '', '2']
['Crime in my local area', 'crime', '', '3']
['Fear in my local area', 'fear', '', '4']
['Violence in my local area', 'violence', '', '5']
['Less money available for public services', 'less-money', '', '6']
['Lack of investment by businesses', 'lack-of-investment', '', '7']
['Loss of confidence in law enforcement agencies', 'loss-of-confidence', '', '8']
['Loss of trade for businesses', 'loss-of-trade', '', '9']
['Other', 'other', '', '10']
['Unweighted base', 'unweighted-base', '', '11']


In [20]:
import io
import requests
url="https://raw.githubusercontent.com/ONS-OpenData/ref_alcohol/master/codelists/harm-type.csv"
s=requests.get(url).content
c=pd.read_csv(io.StringIO(s.decode('utf-8')))

In [21]:
Final_table = pd.merge(new_table, c, how = 'left', left_on = 'Harm type1', right_on = 'Label')

In [22]:
Final_table.columns = ['Harm type' if x=='Notation' else x for x in Final_table.columns]

In [23]:
Final_table['Harm level'] = Final_table['Harm level'].map(
    lambda x: {
        'Primary harm' : 'primary', 
        'Secondary harm' : 'secondary',
        'Tertiary harm': 'tertiary'       
        }.get(x, x))

In [24]:
Final_table = Final_table[['Harm level','Harm type','Measure Type','Value','Unit']]

In [25]:
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

Final_table.to_csv(destinationFolder / ('table15.csv'), index = False)