Table A1: Concern about alcohol related issues in the local area by demographics (%)

In [1]:
from gssutils import *

if is_interactive():
    scraper = Scraper(
        "https://www.justice-ni.gov.uk/publications/"
        "research-and-statistical-bulletin-82017-views-alcohol-and-drug-related-issues-findings-october-2016")
scraper

## Research and Statistical Bulletin 8/2017 ‘Views on Alcohol and Drug Related Issues: Findings from the October 2016 Northern Ireland Omnibus Survey’

### Distributions

1. Bulletin 8-3028 omnibus survey views on alcohol and drug related issues: Findings from October 2016 ([application/pdf](https://www.justice-ni.gov.uk/sites/default/files/publications/justice/oct-2016-omnibus-survey-drugs-alcohol-bulletin.pdf))
1. October 2016 alcohol and drugs findings data table ([MS Excel Spreadsheet](https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx))
1. Drugs and alcohol infographics 2016 ([application/pdf](https://www.justice-ni.gov.uk/sites/default/files/publications/justice/drugs-and-alcohol-infographics-2016.pdf))


In [2]:
if is_interactive():
    tabs = {tab.name: tab for tab in scraper.distribution(
        title='October 2016 alcohol and drugs findings data table').as_databaker()}
tabs.keys()

dict_keys(['Metadata', 'Index', 'Table A1', 'Table A2', 'Table A3', 'Table A4', 'Table A5', 'Table A6', 'Table A7', 'Table A8', 'Table A9', 'Table A10', 'Table A11', 'Table A12', 'Table A13', 'Table A14', 'Table A15', 'Table A16', 'Table A17', 'Table B1', 'Table B2', 'Table B3'])

https://www.justice-ni.gov.uk/sites/default/files/publications/justice/ni-omnibus-survey-oct-2016-alcohol-drugs-tabular.xlsx

In [3]:
tab = tabs['Table A1']

In [4]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()

In [5]:
observations

{<C8 'n<100'>, <C57 0.136>, <B19 0.471>, <B56 0.589>, <B5 0.433>, <C38 'n<100'>, <E19 360.0>, <E24 166.0>, <B8 'n<100'>, <B57 0.514>, <B32 0.484>, <C20 0.183>, <D16 0.42>, <D26 0.414>, <E12 272.0>, <E51 243.0>, <B10 0.41>, <E16 510.0>, <C55 0.16>, <C16 0.168>, <D47 0.377>, <C42 0.112>, <E38 87.0>, <C28 0.154>, <B42 0.471>, <D51 0.404>, <D42 0.417>, <E31 446.0>, <E5 945.0>, <E39 108.0>, <C47 0.176>, <B36 0.444>, <B26 0.425>, <B11 0.491>, <B16 0.412>, <B37 'n<100'>, <E57 606.0>, <B20 0.406>, <D12 0.416>, <B55 0.42>, <B12 0.438>, <E52 702.0>, <E48 602.0>, <E10 248.0>, <B43 0.463>, <B39 0.388>, <D31 0.424>, <C11 0.141>, <E11 259.0>, <B44 0.366>, <D15 0.382>, <E47 343.0>, <D9 0.521>, <D35 0.415>, <B35 0.407>, <C26 0.161>, <E36 433.0>, <D36 0.384>, <C44 0.206>, <D48 0.42>, <E35 271.0>, <D20 0.411>, <E15 435.0>, <C5 0.164>, <E55 281.0>, <C56 0.117>, <C24 0.143>, <B27 0.399>, <B38 'n<100'>, <E43 382.0>, <E42 259.0>, <C9 0.167>, <C35 0.178>, <D43 0.373>, <C12 0.146>, <C32 0.138>, <D52 0.403>, <

In [6]:
Alcoholissue = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
Alcoholissue

{<D3 'Disagree / strongly disagree'>, <C3 'Neither agree nor disagree'>, <B3 'Agree / strongly agree'>, <E3 'Unweighted base'>}

In [7]:
sect = tab.excel_ref('A5').expand(DOWN).is_not_blank()
sect

{<A23 'Health and Social Care Trust'>, <A55 'Belfast'>, <A19 'Catholic'>, <A51 'Has disability / illness'>, <A5 'All adults'>, <A25 'Northern'>, <A58 'Rural'>, <A57 'All urban'>, <A43 'Secondary'>, <A61 'Source: Northern Ireland Omnibus Survey, October 2016'>, <A12 '65 and over'>, <A8 '16-24'>, <A37 'Married and separated from husband / wife'>, <A9 '25-34'>, <A31 'In paid employment'>, <A47 'Has dependants'>, <A7 'Age of household reference person (HRP)1'>, <A27 'Southern'>, <A34 'Marital status'>, <A16 'Female'>, <A14 'Gender'>, <A46 'Dependants'>, <A62 '1. Results exclude "don\'t know" and refusals.'>, <A42 'Primary'>, <A56 'Urban, excluding Belfast'>, <A30 'Employment status'>, <A11 '50-64'>, <A32 'Not in paid employment'>, <A28 'Western'>, <A39 'Widowed'>, <A10 '35-49'>, <A20 'Protestant'>, <A21 'Other'>, <A41 'Level of qualifications'>, <A38 'Divorced'>, <A24 'Belfast'>, <A15 'Male'>, <A35 'Single, that is never married'>, <A48 'No dependants'>, <A52 'No disability / illness'>, <A

In [8]:
category = tab.excel_ref('A').expand(DOWN).by_index([7,14,18,23,30,34,41,46,50,54])
category

{<A14 'Gender'>, <A23 'Health and Social Care Trust'>, <A34 'Marital status'>, <A54 'Area type'>, <A30 'Employment status'>, <A50 'Disability / illness'>, <A18 'Religion'>, <A7 'Age of household reference person (HRP)1'>, <A41 'Level of qualifications'>, <A46 'Dependants'>}

In [9]:
Dimensions = [
            HDim(Alcoholissue,'Alcohol related issue response',DIRECTLY,ABOVE),
            HDim(category,'Category',CLOSEST,ABOVE),
            HDim(sect,'Sect',DIRECTLY,LEFT),
            HDimConst('Measure Type', 'Percent'),
            HDimConst('Unit','People'),
            ]

In [10]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# savepreviewhtml(c1)

In [11]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,DATAMARKER,Alcohol related issue response,Category,Sect,Measure Type,Unit
0,0.433,,Agree / strongly agree,,All adults,Percent,People
1,0.164,,Neither agree nor disagree,,All adults,Percent,People
2,0.403,,Disagree / strongly disagree,,All adults,Percent,People
3,945,,Unweighted base,,All adults,Percent,People
4,,n<100,Agree / strongly agree,Age of household reference person (HRP)1,16-24,Percent,People
5,,n<100,Neither agree nor disagree,Age of household reference person (HRP)1,16-24,Percent,People
6,,n<100,Disagree / strongly disagree,Age of household reference person (HRP)1,16-24,Percent,People
7,66,,Unweighted base,Age of household reference person (HRP)1,16-24,Percent,People
8,0.313,,Agree / strongly agree,Age of household reference person (HRP)1,25-34,Percent,People
9,0.167,,Neither agree nor disagree,Age of household reference person (HRP)1,25-34,Percent,People


In [12]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [13]:
new_table['Value'] = pd.to_numeric(new_table['Value'], errors='coerce')

In [14]:
new_table = new_table[new_table['Value'].notnull() ]

In [15]:
new_table['Value'] = new_table['Value'].apply(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [16]:
def user_perc(x,y):
    
    if str(x) == 'Unweighted base':
        return 'Count'
    else:
        return y
    
new_table['Measure Type'] = new_table.apply(lambda row: user_perc(row['Alcohol related issue response'],row['Measure Type']), axis = 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [17]:
new_table['Category'] = new_table['Category'].str.rstrip('1')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [18]:
new_table['Demographic1'] = new_table['Category'].fillna('') + ' - ' + new_table['Sect'].fillna('')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [19]:
new_table['Demographic1'] = new_table['Demographic1'].str.lstrip(' - ')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [20]:
new_table = new_table[['Demographic1','Alcohol related issue response','Measure Type','Value','Unit']]

In [21]:
import urllib.request as request
import csv
r = request.urlopen('https://raw.githubusercontent.com/ONS-OpenData/ref_alcohol/master/codelists/demographic.csv').read().decode('utf8').split("\n")
reader = csv.reader(r)
for line in reader:
    print(line)

['Label', 'Notation', 'Parent Notation', 'Sort Priority']
['All adults', 'all-adults', '', '1']
['Age of household reference person (HRP) - 16-24', 'hrp/16-24', '', '2']
['Age of household reference person (HRP) - 25-34', 'hrp/25-34', '', '3']
['Age of household reference person (HRP) - 35-49', 'hrp/35-49', '', '4']
['Age of household reference person (HRP) - 50-64', 'hrp/50-64', '', '5']
['Age of household reference person (HRP) - 65 and over', 'hrp/65plus', '', '6']
['Gender - Male', 'male', '', '7']
['Gender - Female', 'female', '', '8']
['Religion - Catholic', 'catholic', '', '9']
['Religion - Protestant', 'protestant', '', '10']
['Religion - Other', 'other', '', '11']
['Health and Social Care Trust - Belfast', 'hsct/belfast', '', '12']
['Health and Social Care Trust - Northern', 'hsct/northern', '', '13']
['Health and Social Care Trust - South Eastern', 'hsct/south-eastern', '', '14']
['Health and Social Care Trust - Southern', 'hsct/southern', '', '15']
['Health and Social Care T

In [22]:
import io
import requests
url="https://raw.githubusercontent.com/ONS-OpenData/ref_alcohol/master/codelists/demographic.csv"
s=requests.get(url).content
c=pd.read_csv(io.StringIO(s.decode('utf-8')))

In [23]:
Final_table = pd.merge(new_table, c, how = 'left', left_on = 'Demographic1', right_on = 'Label')

In [24]:
Final_table.columns = ['Demographic' if x=='Notation' else x for x in Final_table.columns]

In [25]:
Final_table['Alcohol related issue response'] = Final_table['Alcohol related issue response'].map(
    lambda x: {
        'Agree / strongly agree' : 'agree', 
        'Neither agree nor disagree' : 'neither',
        'Disagree / strongly disagree': 'disagree' ,
        'Unweighted base': 'unweighted-base'
        }.get(x, x))

In [26]:
Final_table = Final_table[['Demographic','Alcohol related issue response','Measure Type','Value','Unit']]

In [28]:
from pathlib import Path
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

Final_table.to_csv(destinationFolder / ('taba1.csv'), index = False)