Table A16: Comparison of main results from 2015 to 2016

In [1]:
from databaker.framework import *
import pandas as pd 

In [2]:
import requests
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache
from cachecontrol.heuristics import LastModified
from pathlib import Path

session = CacheControl(requests.Session(),
                       cache=FileCache('.cache'),
                       heuristic=LastModified())

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

inputURL = 'https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx'
inputFile = sourceFolder / 'ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx'
response = session.get(inputURL)
with open(inputFile, 'wb') as f:
  f.write(response.content)

https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx

In [3]:
tab = loadxlstabs(inputFile, sheetids='Table A16')[0]

Loading in\ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx which has size 191159 bytes
Table names: ['Table A16']


In [4]:
observations = tab.excel_ref('C5').expand(DOWN).expand(RIGHT).is_not_blank()

In [5]:
observations

{<E13 0.106>, <C9 0.355>, <C16 0.047>, <E5 0.403>, <D17 0.806>, <C21 0.094>, <E14 0.09>, <D16 0.8>, <C14 0.09>, <F16 856.0>, <D9 0.202>, <C12 'Better'>, <D6 0.196>, <E12 'Worse'>, <D12 'About the same'>, <F22 1078.0>, <F5 945.0>, <E17 0.138>, <C22 0.084>, <F6 1105.0>, <F12 'Unweighted base'>, <E9 0.442>, <E20 'Little or no confidence'>, <F20 'Unweighted base'>, <D8 0.196>, <F13 881.0>, <C20 'Total or a lot of confidence'>, <F21 917.0>, <E22 0.472>, <F14 1028.0>, <D20 'Some confidence'>, <C8 '42.9%**'>, <F17 1002.0>, <C13 0.07>, <D5 0.164>, <E21 0.472>, <C5 '43.3%**'>, <F9 1096.0>, <F8 935.0>, <D21 0.434>, <D22 0.443>, <D13 0.824>, <C17 0.056>, <E16 0.153>, <D14 0.82>, <E6 0.439>, <C6 0.366>, <E8 '37.5%**'>}

In [6]:
year = tab.excel_ref('B5').expand(DOWN).is_not_blank()
year

{<B21 2016.0>, <B12 'Year'>, <B14 2015.0>, <B9 2015.0>, <B13 2016.0>, <B22 2015.0>, <B17 2015.0>, <B5 2016.0>, <B16 2016.0>, <B20 'Year'>, <B8 2016.0>, <B6 2015.0>}

In [7]:
jh = tab.excel_ref('C4').expand(RIGHT).is_not_blank() -  tab.excel_ref('C11').expand(DOWN).is_not_blank()
jh

{<E4 '% Disagree / strongly disagree'>, <C4 '% agree'>, <D4 '% neither agree nor disagree'>, <F4 'Unweighted base'>}

In [8]:
gh = tab.excel_ref('C12').expand(RIGHT).is_not_blank() - tab.excel_ref('C20').expand(DOWN).is_not_blank()
gh

{<D12 'About the same'>, <F12 'Unweighted base'>, <E12 'Worse'>, <C12 'Better'>}

In [9]:
jk = tab.excel_ref('C20').expand(RIGHT).is_not_blank() - tab.excel_ref('C22').expand(DOWN).is_not_blank()
jk

{<D20 'Some confidence'>, <C20 'Total or a lot of confidence'>, <E20 'Little or no confidence'>, <F20 'Unweighted base'>}

In [10]:
tph = tab.excel_ref('A').expand(DOWN).by_index([5,13,21])
tph

{<A13 'Alcohol related issues in the local area % '>, <A5 'I am concerned about alcohol related issues in my local area %'>, <A21 ''>}

In [11]:
Dimensions = [
            HDim(jh,'Alcohol1',DIRECTLY,ABOVE),
            HDim(gh,'Alcohol2',DIRECTLY,ABOVE),
            HDim(jk,'Alcohol issue',DIRECTLY,ABOVE),
            HDim(year,'Year',DIRECTLY,LEFT),
            HDim(tph,'Statement',CLOSEST,ABOVE),
            HDimConst('Measure Type', 'Percentage'),
            HDimConst('Unit','People'),
            ]

In [12]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# savepreviewhtml(c1)

In [13]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,DATAMARKER,Alcohol1,Alcohol2,Alcohol issue,Year,Statement,Measure Type,Unit
0,43.3,%**,% agree,,,2016.0,I am concerned about alcohol related issues in...,Percentage,People
1,0.164,,% neither agree nor disagree,,,2016.0,I am concerned about alcohol related issues in...,Percentage,People
2,0.403,,% Disagree / strongly disagree,,,2016.0,I am concerned about alcohol related issues in...,Percentage,People
3,945.0,,Unweighted base,,,2016.0,I am concerned about alcohol related issues in...,Percentage,People
4,0.366,,% agree,,,2015.0,I am concerned about alcohol related issues in...,Percentage,People
5,0.196,,% neither agree nor disagree,,,2015.0,I am concerned about alcohol related issues in...,Percentage,People
6,0.439,,% Disagree / strongly disagree,,,2015.0,I am concerned about alcohol related issues in...,Percentage,People
7,1105.0,,Unweighted base,,,2015.0,I am concerned about alcohol related issues in...,Percentage,People
8,42.9,%**,% agree,,,2016.0,I am concerned about alcohol related issues in...,Percentage,People
9,0.196,,% neither agree nor disagree,,,2016.0,I am concerned about alcohol related issues in...,Percentage,People


In [14]:
new_table['Statement'] = new_table['Statement'].map(
    lambda x: {
        '' : 'Overall level of confidence'
        }.get(x, x))

In [15]:
new_table.columns = ['Response' if x=='OBS' else x for x in new_table.columns]

In [16]:
new_table = new_table[new_table['Response'] != '']

In [17]:
new_table.dtypes

Response         object
DATAMARKER       object
Alcohol1         object
Alcohol2         object
Alcohol issue    object
Year             object
Statement        object
Measure Type     object
Unit             object
dtype: object

In [18]:
new_table.tail(5)

Unnamed: 0,Response,DATAMARKER,Alcohol1,Alcohol2,Alcohol issue,Year,Statement,Measure Type,Unit
43,917.0,,Unweighted base,Unweighted base,Unweighted base,2016.0,Overall level of confidence,Percentage,People
44,0.084,,% agree,Better,Total or a lot of confidence,2015.0,Overall level of confidence,Percentage,People
45,0.443,,% neither agree nor disagree,About the same,Some confidence,2015.0,Overall level of confidence,Percentage,People
46,0.472,,% Disagree / strongly disagree,Worse,Little or no confidence,2015.0,Overall level of confidence,Percentage,People
47,1078.0,,Unweighted base,Unweighted base,Unweighted base,2015.0,Overall level of confidence,Percentage,People


In [19]:
new_table.head()

Unnamed: 0,Response,DATAMARKER,Alcohol1,Alcohol2,Alcohol issue,Year,Statement,Measure Type,Unit
0,43.3,%**,% agree,,,2016.0,I am concerned about alcohol related issues in...,Percentage,People
1,0.164,,% neither agree nor disagree,,,2016.0,I am concerned about alcohol related issues in...,Percentage,People
2,0.403,,% Disagree / strongly disagree,,,2016.0,I am concerned about alcohol related issues in...,Percentage,People
3,945.0,,Unweighted base,,,2016.0,I am concerned about alcohol related issues in...,Percentage,People
4,0.366,,% agree,,,2015.0,I am concerned about alcohol related issues in...,Percentage,People


In [20]:
def user_perc(x,y,z,p):
    
    if str(x) == 'I am concerned about alcohol related issues in my local area %':
        return y
    if str(x) == 'Alcohol related issues in the local area % ':
        return z 
    else:
        return p
    
new_table['Alcohol issue'] = new_table.apply(lambda row: user_perc(row['Statement'],row['Alcohol1'],row['Alcohol2'],row['Alcohol issue']), axis = 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.


In [21]:
new_table['Response'] = pd.to_numeric(new_table['Response'], errors='coerce')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [22]:
new_table['Year'] = pd.to_numeric(new_table['Year'], errors='coerce').fillna(0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [23]:
new_table['Year'] = new_table['Year'].apply(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [24]:
new_table['Response'] = new_table['Response'].apply(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [25]:
def user_perc(x,y):
    
    if str(x) == 'Unweighted base':
        return 'Number'
    else:
        return y
    
new_table['Measure Type'] = new_table.apply(lambda row: user_perc(row['Alcohol issue'],row['Measure Type']), axis = 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [26]:
new_table['Statement'] = new_table['Statement'].str.rstrip('%')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [27]:
new_table = new_table[['Year','Alcohol issue','Statement','Measure Type','Response','Unit']]

In [28]:
new_table.head(5)

Unnamed: 0,Year,Alcohol issue,Statement,Measure Type,Response,Unit
0,2016,% agree,I am concerned about alcohol related issues in...,Percentage,43.3,People
1,2016,% neither agree nor disagree,I am concerned about alcohol related issues in...,Percentage,0.164,People
2,2016,% Disagree / strongly disagree,I am concerned about alcohol related issues in...,Percentage,0.403,People
3,2016,Unweighted base,I am concerned about alcohol related issues in...,Number,945.0,People
4,2015,% agree,I am concerned about alcohol related issues in...,Percentage,0.366,People


In [29]:
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

new_table.to_csv(destinationFolder / ('taba16.csv'), index = False)