Census of Drug and Alcohol Treatment Services in Northern Ireland

In [1]:
from gssutils import *
scraper = Scraper('https://www.health-ni.gov.uk/publications/census-drug-and-alcohol-treatment-services-northern-ireland-2017')
scraper

## Census of drug and alcohol treatment services in Northern Ireland 2017

### Distributions

1. Drug and Alcohol Census 2017 ([application/pdf](https://www.health-ni.gov.uk/sites/default/files/publications/health/drug-alcohol-census-2017.pdf))
1. Data from Census of Drug and Alcohol Treatment Services ([MS Excel Spreadsheet](https://www.health-ni.gov.uk/sites/default/files/publications/dhssps/data-census-drug-alcohol-treatment-services.xlsx))
1. Pre-release Access List Drug and Alcohol Census ([application/pdf](https://www.health-ni.gov.uk/sites/default/files/publications/dhssps/pre-release-drug-alcohol-census.pdf))


In [2]:
tabs = {tab.name: tab for tab in scraper.distribution(
    title='Data from Census of Drug and Alcohol Treatment Services').as_databaker()}
tabs.keys()

dict_keys(['Contents & Notes to tables', 'Table 1', 'Table 2', 'Table 3', 'Table 4', 'Table 5', 'Metadata'])

In [3]:
tables = []
for tab_name, script in [
    ('Table 1', 'Treatment Services by Age and Gender.ipynb'),
    ('Table 2', 'Treatment Services by Service Type(Age).ipynb'),
    ('Table 2', 'Treatment Services by Service Type(sex).ipynb'),
    ('Table 3', 'Treatment Services by Residential status(Age).ipynb'),
    ('Table 3', 'Treatment Services by Residential Status(sex).ipynb'),
    ('Table 4', 'Treatment Services by Trust(Age).ipynb'),
    ('Table 4', 'Treatment Services by Trust(sex).ipynb'),
    ('Table 5', 'Treatment Services by Comparison table.ipynb')
]:
    tab = tabs[tab_name]
    %run "$script"
    tables.append(new_table)











In [4]:
tidy = pd.concat(tables)
tidy.count()

Period            834
Category          834
Age               834
Sex               834
Treatment Type    827
Measure Type      834
Value             834
Unit              834
dtype: int64

In [5]:
tidy['Treatment Type'].fillna('Total', inplace = True)

In [6]:
tidy.count()

Period            834
Category          834
Age               834
Sex               834
Treatment Type    834
Measure Type      834
Value             834
Unit              834
dtype: int64

In [7]:
tidy['Treatment Type'].unique()

array(['Alcohol Only', 'Drugs Only', 'Drugs & Alcohol', 'Total', 'All',
       'Under 18s', '18 and over', 'Male', 'Female  ', 'Under 18 ',
       'Statutory', 'Non-statutory', 'Prison', 'Residential',
       'Non-residential / Mixed', 'Belfast', 'Northern', 'South Eastern',
       'Southern', 'Western', 'Emergency admissions (HIS)'], dtype=object)

In [8]:
tidy.head()

Unnamed: 0,Period,Category,Age,Sex,Treatment Type,Measure Type,Value,Unit
0,1 March 2017,All,Under 18,Persons,Alcohol Only,Count,95,People
1,1 March 2017,All,Under 18,Persons,Drugs Only,Count,324,People
2,1 March 2017,All,Under 18,Persons,Drugs & Alcohol,Count,294,People
3,1 March 2017,All,Under 18,Persons,Total,Count,713,People
4,1 March 2017,All,18 and over,Persons,Alcohol Only,Count,2482,People


In [9]:
tidy = tidy[tidy['Treatment Type'] == 'Alcohol Only']

In [10]:
tidy.head()

Unnamed: 0,Period,Category,Age,Sex,Treatment Type,Measure Type,Value,Unit
0,1 March 2017,All,Under 18,Persons,Alcohol Only,Count,95,People
4,1 March 2017,All,18 and over,Persons,Alcohol Only,Count,2482,People
8,1 March 2017,All,All years,Persons,Alcohol Only,Count,2577,People
12,1 March 2017,All,Under 18,Male,Alcohol Only,Count,31,People
16,1 March 2017,All,18 and over,Male,Alcohol Only,Count,1536,People


In [11]:
tidy['Sex'].unique()

array(['Persons', 'Male', 'Female', 'Male (%)', 'Female (%)',
       '% of Total', '% of all Males ', '% of all Females', 'Female  '],
      dtype=object)

In [12]:
tidy.drop(tidy[tidy.Sex.isin(['% of Total', '% of all Males ','% of all Females'])].index, inplace = True)

In [13]:
tidy['Sex'].unique()

array(['Persons', 'Male', 'Female', 'Male (%)', 'Female (%)', 'Female  '],
      dtype=object)

In [14]:
tidy['Period'].unique()

array(['1 March 2017', '1st March 2007', '1st March 2010',
       '1st March 2012', '1st September2014', '1st March 2017'],
      dtype=object)

In [15]:
from datetime import datetime

In [16]:
tidy.Period = pd.to_datetime(tidy.Period).dt.strftime('%d-%b-%y')

In [17]:
tidy['Period'] = str('day/') + tidy['Period']

In [18]:
tidy['Age'].unique()

array(['Under 18 ', '18 and over', 'All years', 'All'], dtype=object)

In [19]:
tidy['Age'] = tidy['Age'].map(
    lambda x: {
        'Under 18 ' : 'under-18', 
        '18 and over' : '18-plus',
        'All years': 'all' ,
        'All': 'all'
        }.get(x, x))

In [20]:
tidy['Treatment Type'] = 'alcohol'

In [21]:
tidy['Category'] = tidy['Category'].str.lower()

In [22]:
tidy['Category'].unique()

array(['all', 'total', 'statutory', 'non-statutory', 'prison',
       'statutory (%)', 'non-statutory (%)', 'prison (%)', 'residential',
       'non-residential ', 'mixed', 'residential (%)',
       'non-residential (%)', 'mixed (%)', 'belfast', 'northern',
       'south eastern', 'southern', 'belfast (%)', 'northern (%)',
       'south eastern (%)', 'southern (%)', 'western (%)',
       'emergency admissions (his) (%)', 'western',
       'emergency admissions (his)', 'emergency admissions (his)  (%)',
       'treatment type'], dtype=object)

In [23]:
tidy['Category'] = tidy['Category'].map(
    lambda x: {
        'south eastern' : 'south-eastern', 
        'emergency admissions (his)' : 'emergency-admissions'
        }.get(x, x))

In [24]:
tidy['Category'] = tidy['Category'].map(
    lambda x: {
        'Headcount' : 'Count', 
        'Percentage of Headcount' : 'Percentage',
        }.get(x, x))

In [25]:
tidy['Measure Type'] = tidy['Measure Type'].map(
    lambda x: {
        'Headcount' : 'Count', 
        'Percentage of Headcount' : 'Percentage',
        }.get(x, x))

In [26]:
tidy.head()

Unnamed: 0,Period,Category,Age,Sex,Treatment Type,Measure Type,Value,Unit
0,day/01-Mar-17,all,under-18,Persons,alcohol,Count,95,People
4,day/01-Mar-17,all,18-plus,Persons,alcohol,Count,2482,People
8,day/01-Mar-17,all,all,Persons,alcohol,Count,2577,People
12,day/01-Mar-17,all,under-18,Male,alcohol,Count,31,People
16,day/01-Mar-17,all,18-plus,Male,alcohol,Count,1536,People


In [27]:
from pathlib import Path
out = Path('out')
out.mkdir(exist_ok=True)
tidy.to_csv(out / 'observations.csv', index = False)

There's a metadata tab in the spreadsheet with abstract and contact details. **Todo: extract these and also figure out what the license should really be.**

In [28]:
scraper.dataset.family = 'health'
scraper.dataset.license = 'http://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/'

with open(out / 'dataset.trig', 'wb') as metadata:
    metadata.write(scraper.generate_trig())