Table A17: Comparison of main results from 2012 to 2016

In [1]:
from databaker.framework import *
import pandas as pd 

In [2]:
import requests
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache
from cachecontrol.heuristics import LastModified
from pathlib import Path

session = CacheControl(requests.Session(),
                       cache=FileCache('.cache'),
                       heuristic=LastModified())

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

inputURL = 'https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx'
inputFile = sourceFolder / 'ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx'
response = session.get(inputURL)
with open(inputFile, 'wb') as f:
  f.write(response.content)

https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx

In [3]:
tab = loadxlstabs(inputFile, sheetids='Table A17')[0]

Loading in\ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx which has size 191159 bytes
Table names: ['Table A17']


In [4]:
observations = tab.excel_ref('C5').expand(DOWN).expand(RIGHT).is_not_blank()

In [5]:
observations

{<E12 0.431>, <C33 0.1>, <D16 'About the same'>, <E11 0.442>, <E23 0.138>, <F16 'Unweighted base'>, <C8 0.418>, <C28 'Total or a lot of confidence'>, <D17 0.824>, <C14 0.382>, <E28 'Little or no confidence'>, <F28 'Unweighted base'>, <F30 1078.0>, <D33 0.47>, <E8 0.41>, <E17 0.106>, <E5 0.403>, <F32 1125.0>, <F19 1013.0>, <D8 0.172>, <E21 0.104>, <E13 0.435>, <D21 0.786>, <C21 0.109>, <C19 0.1>, <F24 975.0>, <D18 0.82>, <D12 0.202>, <C29 0.094>, <C20 0.104>, <D10 0.196>, <D31 0.454>, <E30 0.472>, <D29 0.434>, <C24 0.06>, <C26 0.05>, <D30 0.443>, <E22 0.153>, <C9 0.461>, <E7 0.4>, <C23 0.056>, <F18 1028.0>, <E10 0.375>, <F7 1078.0>, <E33 0.43>, <C6 0.366>, <F31 1046.0>, <C22 0.047>, <F6 1105.0>, <E14 0.414>, <C7 0.44>, <D5 0.164>, <E26 0.12>, <F17 881.0>, <F25 1048.0>, <D23 0.806>, <D24 0.799>, <C16 'Better'>, <E29 0.472>, <E31 0.444>, <D20 0.791>, <F20 1077.0>, <C31 0.103>, <D25 0.835>, <D28 'Some confidence'>, <F23 1002.0>, <F14 1112.0>, <E24 0.141>, <C12 0.367>, <D13 0.223>, <D9 0.16

In [6]:
year = tab.excel_ref('B5').expand(DOWN).is_not_blank()
year

{<B31 2014.0>, <B6 2015.0>, <B26 2012.0>, <B20 2013.0>, <B10 2016.0>, <B17 2016.0>, <B22 2016.0>, <B13 2013.0>, <B24 2014.0>, <B33 2012.0>, <B30 2015.0>, <B29 2016.0>, <B25 2013.0>, <B18 2015.0>, <B11 2015.0>, <B19 2014.0>, <B7 2014.0>, <B12 2014.0>, <B32 2013.0>, <B23 2015.0>, <B9 2012.0>, <B28 'Year'>, <B5 2016.0>, <B8 2013.0>, <B14 2012.0>, <B16 'Year'>, <B21 2012.0>}

In [7]:
jh = tab.excel_ref('C4').expand(RIGHT).is_not_blank() -  tab.excel_ref('C15').expand(DOWN).is_not_blank()
jh

{<D4 '% neither agree nor disagree'>, <E4 '% Disagree / strongly disagree'>, <F4 'Unweighted base'>, <C4 '% agree'>}

In [8]:
gh = tab.excel_ref('C16').expand(RIGHT).is_not_blank() - tab.excel_ref('C27').expand(DOWN).is_not_blank()
gh

{<F16 'Unweighted base'>, <C16 'Better'>, <D16 'About the same'>, <E16 'Worse'>}

In [9]:
jk = tab.excel_ref('C28').expand(RIGHT).is_not_blank() - tab.excel_ref('C33').expand(DOWN).is_not_blank()
jk

{<C28 'Total or a lot of confidence'>, <E28 'Little or no confidence'>, <D28 'Some confidence'>, <F28 'Unweighted base'>}

In [10]:
tph = tab.excel_ref('A').expand(DOWN).by_index([5,17,29])
tph

{<A5 'I am concerned about alcohol related issues in my local area %'>, <A17 'Alcohol related issues in the local area % '>, <A29 ''>}

In [11]:
Dimensions = [
            HDim(jh,'Alcohol1',DIRECTLY,ABOVE),
            HDim(gh,'Alcohol2',DIRECTLY,ABOVE),
            HDim(jk,'Alcohol issue',DIRECTLY,ABOVE),
            HDim(year,'Year',DIRECTLY,LEFT),
            HDim(tph,'Statement',CLOSEST,ABOVE),
            HDimConst('Measure Type', 'Percentage'),
            HDimConst('Unit','People'),
            ]

In [12]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# savepreviewhtml(c1)

In [13]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,DATAMARKER,Alcohol1,Alcohol2,Alcohol issue,Year,Statement,Measure Type,Unit
0,0.433,,% agree,,,2016.0,I am concerned about alcohol related issues in...,Percentage,People
1,0.164,,% neither agree nor disagree,,,2016.0,I am concerned about alcohol related issues in...,Percentage,People
2,0.403,,% Disagree / strongly disagree,,,2016.0,I am concerned about alcohol related issues in...,Percentage,People
3,945,,Unweighted base,,,2016.0,I am concerned about alcohol related issues in...,Percentage,People
4,0.366,,% agree,,,2015.0,I am concerned about alcohol related issues in...,Percentage,People
5,0.196,,% neither agree nor disagree,,,2015.0,I am concerned about alcohol related issues in...,Percentage,People
6,0.439,,% Disagree / strongly disagree,,,2015.0,I am concerned about alcohol related issues in...,Percentage,People
7,1105,,Unweighted base,,,2015.0,I am concerned about alcohol related issues in...,Percentage,People
8,0.44,,% agree,,,2014.0,I am concerned about alcohol related issues in...,Percentage,People
9,0.16,,% neither agree nor disagree,,,2014.0,I am concerned about alcohol related issues in...,Percentage,People


In [14]:
new_table['Statement'] = new_table['Statement'].map(
    lambda x: {
        '' : 'Overall level of confidence'
        }.get(x, x))

In [15]:
new_table.columns = ['Response' if x=='OBS' else x for x in new_table.columns]

In [16]:
new_table = new_table[new_table['Response'] != '']

In [17]:
new_table.dtypes

Response         object
DATAMARKER       object
Alcohol1         object
Alcohol2         object
Alcohol issue    object
Year             object
Statement        object
Measure Type     object
Unit             object
dtype: object

In [18]:
new_table.tail(5)

Unnamed: 0,Response,DATAMARKER,Alcohol1,Alcohol2,Alcohol issue,Year,Statement,Measure Type,Unit
103,1125.0,,Unweighted base,Unweighted base,Unweighted base,2013.0,Overall level of confidence,Percentage,People
104,0.1,,% agree,Better,Total or a lot of confidence,2012.0,Overall level of confidence,Percentage,People
105,0.47,,% neither agree nor disagree,About the same,Some confidence,2012.0,Overall level of confidence,Percentage,People
106,0.43,,% Disagree / strongly disagree,Worse,Little or no confidence,2012.0,Overall level of confidence,Percentage,People
107,1096.0,,Unweighted base,Unweighted base,Unweighted base,2012.0,Overall level of confidence,Percentage,People


In [19]:
new_table.head()

Unnamed: 0,Response,DATAMARKER,Alcohol1,Alcohol2,Alcohol issue,Year,Statement,Measure Type,Unit
0,0.433,,% agree,,,2016.0,I am concerned about alcohol related issues in...,Percentage,People
1,0.164,,% neither agree nor disagree,,,2016.0,I am concerned about alcohol related issues in...,Percentage,People
2,0.403,,% Disagree / strongly disagree,,,2016.0,I am concerned about alcohol related issues in...,Percentage,People
3,945.0,,Unweighted base,,,2016.0,I am concerned about alcohol related issues in...,Percentage,People
4,0.366,,% agree,,,2015.0,I am concerned about alcohol related issues in...,Percentage,People


In [20]:
def user_perc(x,y,z,p):
    
    if str(x) == 'I am concerned about alcohol related issues in my local area %':
        return y
    if str(x) == 'Alcohol related issues in the local area % ':
        return z 
    else:
        return p
    
new_table['Alcohol issue'] = new_table.apply(lambda row: user_perc(row['Statement'],row['Alcohol1'],row['Alcohol2'],row['Alcohol issue']), axis = 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.


In [21]:
new_table['Response'] = pd.to_numeric(new_table['Response'], errors='coerce')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [22]:
new_table['Year'] = pd.to_numeric(new_table['Year'], errors='coerce').fillna(0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [23]:
new_table['Year'] = new_table['Year'].apply(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [24]:
new_table['Response'] = new_table['Response'].apply(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [25]:
def user_perc(x,y):
    
    if str(x) == 'Unweighted base':
        return 'Number'
    else:
        return y
    
new_table['Measure Type'] = new_table.apply(lambda row: user_perc(row['Alcohol issue'],row['Measure Type']), axis = 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [26]:
new_table['Statement'] = new_table['Statement'].str.rstrip('%')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [27]:
new_table = new_table[['Year','Alcohol issue','Statement','Measure Type','Response','Unit']]

In [28]:
new_table.head(5)

Unnamed: 0,Year,Alcohol issue,Statement,Measure Type,Response,Unit
0,2016,% agree,I am concerned about alcohol related issues in...,Percentage,0.433,People
1,2016,% neither agree nor disagree,I am concerned about alcohol related issues in...,Percentage,0.164,People
2,2016,% Disagree / strongly disagree,I am concerned about alcohol related issues in...,Percentage,0.403,People
3,2016,Unweighted base,I am concerned about alcohol related issues in...,Number,945.0,People
4,2015,% agree,I am concerned about alcohol related issues in...,Percentage,0.366,People


In [29]:
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

new_table.to_csv(destinationFolder / ('taba17.csv'), index = False)