Table A17: Comparison of main results from 2012 to 2016

In [1]:
from databaker.framework import *
import pandas as pd 

In [2]:
import requests
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache
from cachecontrol.heuristics import LastModified
from pathlib import Path

session = CacheControl(requests.Session(),
                       cache=FileCache('.cache'),
                       heuristic=LastModified())

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

inputURL = 'https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx'
inputFile = sourceFolder / 'ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx'
Value = session.get(inputURL)
with open(inputFile, 'wb') as f:
  f.write(Value.content)

https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx

In [3]:
tab = loadxlstabs(inputFile, sheetids='Table A17')[0]

Loading in\ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx which has size 191159 bytes
Table names: ['Table A17']


In [4]:
observations = tab.excel_ref('C5').expand(DOWN).expand(RIGHT).is_not_blank()

In [5]:
observations

{<D31 0.454>, <E24 0.141>, <D28 'Some confidence'>, <E28 'Little or no confidence'>, <F5 945.0>, <C11 0.355>, <C26 0.05>, <D19 0.797>, <E20 0.105>, <E19 0.102>, <E17 0.106>, <E5 0.403>, <C12 0.367>, <C28 'Total or a lot of confidence'>, <F16 'Unweighted base'>, <D33 0.47>, <E8 0.41>, <F7 1078.0>, <D6 0.196>, <F32 1125.0>, <D8 0.172>, <E10 0.375>, <C30 0.084>, <C25 0.054>, <C13 0.341>, <E30 0.472>, <C24 0.06>, <D10 0.196>, <E29 0.472>, <E6 0.439>, <C9 0.461>, <E12 0.431>, <E11 0.442>, <E16 'Worse'>, <F31 1046.0>, <F9 1126.0>, <F12 1067.0>, <C10 '42.9%**'>, <D22 0.8>, <F28 'Unweighted base'>, <F29 917.0>, <D17 0.824>, <C5 0.433>, <E14 0.414>, <C8 0.418>, <E32 0.452>, <F11 1096.0>, <F8 1159.0>, <F21 1043.0>, <F22 856.0>, <F14 1112.0>, <D32 0.466>, <D13 0.223>, <D18 0.82>, <D14 0.204>, <C20 0.104>, <F30 1078.0>, <D20 0.791>, <E7 0.4>, <F26 981.0>, <F18 1028.0>, <E22 0.153>, <C33 0.1>, <C18 0.09>, <C16 'Better'>, <C22 0.047>, <E21 0.104>, <E9 0.379>, <F13 1151.0>, <C6 0.366>, <E26 0.12>, <E

In [6]:
Period = tab.excel_ref('B5').expand(DOWN).is_not_blank()
Period

{<B7 2014.0>, <B19 2014.0>, <B18 2015.0>, <B12 2014.0>, <B8 2013.0>, <B23 2015.0>, <B5 2016.0>, <B14 2012.0>, <B16 'Year'>, <B9 2012.0>, <B6 2015.0>, <B20 2013.0>, <B22 2016.0>, <B26 2012.0>, <B31 2014.0>, <B10 2016.0>, <B21 2012.0>, <B30 2015.0>, <B13 2013.0>, <B29 2016.0>, <B28 'Year'>, <B33 2012.0>, <B17 2016.0>, <B11 2015.0>, <B32 2013.0>, <B24 2014.0>, <B25 2013.0>}

In [7]:
jh = tab.excel_ref('C4').expand(RIGHT).is_not_blank() -  tab.excel_ref('C15').expand(DOWN).is_not_blank()
jh

{<C4 '% agree'>, <E4 '% Disagree / strongly disagree'>, <D4 '% neither agree nor disagree'>, <F4 'Unweighted base'>}

In [8]:
gh = tab.excel_ref('C16').expand(RIGHT).is_not_blank() - tab.excel_ref('C27').expand(DOWN).is_not_blank()
gh

{<D16 'About the same'>, <C16 'Better'>, <E16 'Worse'>, <F16 'Unweighted base'>}

In [9]:
jk = tab.excel_ref('C28').expand(RIGHT).is_not_blank() - tab.excel_ref('C33').expand(DOWN).is_not_blank()
jk

{<E28 'Little or no confidence'>, <D28 'Some confidence'>, <F28 'Unweighted base'>, <C28 'Total or a lot of confidence'>}

In [10]:
tph = tab.excel_ref('A').expand(DOWN).by_index([5,17,29])
tph

{<A5 'I am concerned about alcohol related issues in my local area %'>, <A17 'Alcohol related issues in the local area % '>, <A29 ''>}

In [11]:
Dimensions = [
            HDim(jh,'Alcohol1',DIRECTLY,ABOVE),
            HDim(gh,'Alcohol2',DIRECTLY,ABOVE),
            HDim(jk,'Alcohol related issue response',DIRECTLY,ABOVE),
            HDim(Period,'Period',DIRECTLY,LEFT),
            HDim(tph,'Statement',CLOSEST,ABOVE),
            HDimConst('Measure Type', 'Percent'),
            HDimConst('Unit','People'),
            ]

In [12]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# savepreviewhtml(c1)

In [13]:
Final_table = c1.topandas()
Final_table




Unnamed: 0,OBS,DATAMARKER,Alcohol1,Alcohol2,Alcohol related issue response,Period,Statement,Measure Type,Unit
0,0.433,,% agree,,,2016.0,I am concerned about alcohol related issues in...,Percent,People
1,0.164,,% neither agree nor disagree,,,2016.0,I am concerned about alcohol related issues in...,Percent,People
2,0.403,,% Disagree / strongly disagree,,,2016.0,I am concerned about alcohol related issues in...,Percent,People
3,945,,Unweighted base,,,2016.0,I am concerned about alcohol related issues in...,Percent,People
4,0.366,,% agree,,,2015.0,I am concerned about alcohol related issues in...,Percent,People
5,0.196,,% neither agree nor disagree,,,2015.0,I am concerned about alcohol related issues in...,Percent,People
6,0.439,,% Disagree / strongly disagree,,,2015.0,I am concerned about alcohol related issues in...,Percent,People
7,1105,,Unweighted base,,,2015.0,I am concerned about alcohol related issues in...,Percent,People
8,0.44,,% agree,,,2014.0,I am concerned about alcohol related issues in...,Percent,People
9,0.16,,% neither agree nor disagree,,,2014.0,I am concerned about alcohol related issues in...,Percent,People


In [14]:
Final_table['Statement'] = Final_table['Statement'].map(
    lambda x: {
        '' : 'Overall level of confidence'
        }.get(x, x))

In [15]:
Final_table.columns = ['Value' if x=='OBS' else x for x in Final_table.columns]

In [16]:
Final_table = Final_table[Final_table['Value'] != '']

In [17]:
Final_table.dtypes

Value                             object
DATAMARKER                        object
Alcohol1                          object
Alcohol2                          object
Alcohol related issue response    object
Period                            object
Statement                         object
Measure Type                      object
Unit                              object
dtype: object

In [18]:
Final_table.tail(5)

Unnamed: 0,Value,DATAMARKER,Alcohol1,Alcohol2,Alcohol related issue response,Period,Statement,Measure Type,Unit
103,1125.0,,Unweighted base,Unweighted base,Unweighted base,2013.0,Overall level of confidence,Percent,People
104,0.1,,% agree,Better,Total or a lot of confidence,2012.0,Overall level of confidence,Percent,People
105,0.47,,% neither agree nor disagree,About the same,Some confidence,2012.0,Overall level of confidence,Percent,People
106,0.43,,% Disagree / strongly disagree,Worse,Little or no confidence,2012.0,Overall level of confidence,Percent,People
107,1096.0,,Unweighted base,Unweighted base,Unweighted base,2012.0,Overall level of confidence,Percent,People


In [19]:
Final_table.head()

Unnamed: 0,Value,DATAMARKER,Alcohol1,Alcohol2,Alcohol related issue response,Period,Statement,Measure Type,Unit
0,0.433,,% agree,,,2016.0,I am concerned about alcohol related issues in...,Percent,People
1,0.164,,% neither agree nor disagree,,,2016.0,I am concerned about alcohol related issues in...,Percent,People
2,0.403,,% Disagree / strongly disagree,,,2016.0,I am concerned about alcohol related issues in...,Percent,People
3,945.0,,Unweighted base,,,2016.0,I am concerned about alcohol related issues in...,Percent,People
4,0.366,,% agree,,,2015.0,I am concerned about alcohol related issues in...,Percent,People


In [20]:
def user_perc(x,y,z,p):
    
    if str(x) == 'I am concerned about alcohol related issues in my local area %':
        return y
    if str(x) == 'Alcohol related issues in the local area % ':
        return z 
    else:
        return p
    
Final_table['Alcohol related issue response'] = Final_table.apply(lambda row: user_perc(row['Statement'],row['Alcohol1'],row['Alcohol2'],row['Alcohol related issue response']), axis = 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.


In [21]:
Final_table['Value'] = pd.to_numeric(Final_table['Value'], errors='coerce')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [22]:
Final_table['Period'] = pd.to_numeric(Final_table['Period'], errors='coerce').fillna(0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [23]:
Final_table['Period'] = Final_table['Period'].apply(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [24]:
Final_table['Period'] = Final_table['Period'].apply(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [25]:
Final_table['Period'] = 'year/' + Final_table['Period']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [26]:
Final_table['Value'] = Final_table['Value'].apply(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [27]:
def user_perc(x,y):
    
    if str(x) == 'Unweighted base':
        return 'Count'
    else:
        return y
    
Final_table['Measure Type'] = Final_table.apply(lambda row: user_perc(row['Alcohol related issue response'],row['Measure Type']), axis = 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [28]:
Final_table['Statement'] = Final_table['Statement'].str.rstrip('%')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [29]:
Final_table['Alcohol related issue response'] = Final_table['Alcohol related issue response'].map(
    lambda x: {
        '% agree' : 'agree', 
        '% neither agree nor disagree' : 'neither',
        '% Disagree / strongly disagree': 'disagree' ,
        'Unweighted base': 'unweighted-base'
        }.get(x, x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [30]:
Final_table['Statement'] = Final_table['Statement'].map(
    lambda x: {
        'I am concerned about alcohol related issues in my local area ' : 'concerned', 
        'Alcohol related issues in the local area % ' : 'change-perception',
        'Overall level of confidence': 'confidence-level' ,
       }.get(x, x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [31]:
Final_table = Final_table[['Period','Alcohol related issue response','Statement','Measure Type','Value','Unit']]

In [32]:
Final_table.head(5)

Unnamed: 0,Period,Alcohol related issue response,Statement,Measure Type,Value,Unit
0,year/2016,agree,concerned,Percent,0.433,People
1,year/2016,neither,concerned,Percent,0.164,People
2,year/2016,disagree,concerned,Percent,0.403,People
3,year/2016,unweighted-base,concerned,Count,945.0,People
4,year/2015,agree,concerned,Percent,0.366,People


In [33]:
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

Final_table.to_csv(destinationFolder / ('table17.csv'), index = False)