Census of Drug and Alcohol Treatment Services in Northern Ireland:Breakdown by Service Type

In [1]:
from databaker.framework import *
import pandas as pd 

In [2]:
import requests
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache
from cachecontrol.heuristics import LastModified
from pathlib import Path

session = CacheControl(requests.Session(),
                       cache=FileCache('.cache'),
                       heuristic=LastModified())

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

inputURL = 'https://www.health-ni.gov.uk/sites/default/files/publications/dhssps/data-census-drug-alcohol-treatment-services.xlsx'
inputFile = sourceFolder / 'data-census-drug-alcohol-treatment-services.xlsx'
response = session.get(inputURL)
with open(inputFile, 'wb') as f:
  f.write(response.content)

https://www.health-ni.gov.uk/sites/default/files/publications/dhssps/data-census-drug-alcohol-treatment-services.xlsx

In [3]:
tab = loadxlstabs(inputFile, sheetids='Table 4')[0]

Loading in\data-census-drug-alcohol-treatment-services.xlsx which has size 46265 bytes
Table names: ['Table 4']


In [4]:
observations = tab.excel_ref('B23').expand(DOWN).expand(RIGHT).is_not_blank() - tab.excel_ref('B38').expand(DOWN).expand(RIGHT)  


In [5]:
observations

{<K26 329.0>, <H28 249.0>, <D25 52.0>, <M32 17.1>, <K36 0.0>, <D23 294.0>, <D34 8.8>, <K32 24.3>, <L35 21.9>, <B34 3.2>, <G31 28.9>, <I36 '*'>, <B36 '-'>, <E26 122.0>, <L23 1356.0>, <F23 2482.0>, <J32 17.2>, <H26 124.0>, <H23 1062.0>, <G26 260.0>, <F36 '*'>, <H25 33.0>, <K25 495.0>, <F37 '*'>, <D33 12.9>, <E29 '-'>, <G23 1712.0>, <H24 257.0>, <E32 10.8>, <E25 77.0>, <F25 434.0>, <M37 2.6>, <K33 16.2>, <E30 '*'>, <L34 17.7>, <H37 '*'>, <I33 13.7>, <I31 22.4>, <F28 588.0>, <C33 21.3>, <H35 23.4>, <H33 11.7>, <F32 17.5>, <D32 17.7>, <B25 9.0>, <F27 556.0>, <F30 '*'>, <I23 5256.0>, <I26 719.0>, <L36 13.1>, <L31 28.4>, <D27 26.0>, <H34 20.2>, <C35 23.1>, <C32 4.9>, <B24 39.0>, <K24 649.0>, <I27 1022.0>, <F24 425.0>, <E27 36.0>, <K28 295.0>, <G30 '*'>, <L25 85.0>, <I34 19.4>, <E35 21.2>, <K31 31.9>, <H30 '*'>, <L28 297.0>, <K37 0.4>, <G33 15.2>, <L27 240.0>, <I32 18.0>, <J24 464.0>, <I35 20.1>, <E37 '*'>, <J27 559.0>, <D35 16.3>, <B28 28.0>, <J36 0.2>, <E23 713.0>, <G35 12.9>, <H27 214.0>, <

In [6]:
Service = tab.excel_ref('A23').expand(DOWN).is_not_blank()
Service

{<A35 'Western (%)'>, <A23 'Total'>, <A36 'Prison (%)'>, <A25 'Northern'>, <A29 'Prison'>, <A32 'Northern (%)'>, <A33 'South Eastern (%)'>, <A27 'Southern'>, <A26 'South Eastern'>, <A34 'Southern (%)'>, <A37 'Emergency admissions (HIS) (%)'>, <A30 'Emergency admissions (HIS)'>, <A28 'Western'>, <A31 'Belfast (%)'>, <A24 'Belfast'>}

In [7]:
Treatment = tab.excel_ref('B22').expand(RIGHT).is_not_blank()
Treatment

{<E22 'Total'>, <I22 'Total'>, <J22 'Alcohol Only'>, <G22 'Drugs Only'>, <D22 'Drugs & Alcohol'>, <L22 'Drugs & Alcohol'>, <K22 'Drugs Only'>, <H22 'Drugs & Alcohol'>, <F22 'Alcohol Only'>, <C22 'Drugs Only'>, <B22 'Alcohol Only'>}

In [8]:
age = tab.excel_ref('B21').expand(RIGHT).is_not_blank()
age

{<B21 'Under 18 '>, <F21 '18 and over'>, <M21 'Overall Total'>, <J21 'Treatment Type'>}

In [9]:
Dimensions = [
            HDim(Treatment,'Treatment Type',DIRECTLY,ABOVE),
            HDim(Service,'Category',DIRECTLY,LEFT),
            HDim(age,'Age',CLOSEST,LEFT),
            HDimConst('Measure Type', 'Count'),
            HDimConst('Unit','People'),
            HDimConst('Period','2006-2016'),
            HDimConst('Sex','Persons')
            ]

In [10]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# savepreviewhtml(c1)

In [11]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,DATAMARKER,Treatment Type,Category,Age,Measure Type,Unit,Period,Sex
0,95,,Alcohol Only,Total,Under 18,Count,People,2006-2016,Persons
1,324,,Drugs Only,Total,Under 18,Count,People,2006-2016,Persons
2,294,,Drugs & Alcohol,Total,Under 18,Count,People,2006-2016,Persons
3,713,,Total,Total,Under 18,Count,People,2006-2016,Persons
4,2482,,Alcohol Only,Total,18 and over,Count,People,2006-2016,Persons
5,1712,,Drugs Only,Total,18 and over,Count,People,2006-2016,Persons
6,1062,,Drugs & Alcohol,Total,18 and over,Count,People,2006-2016,Persons
7,5256,,Total,Total,18 and over,Count,People,2006-2016,Persons
8,2577,,Alcohol Only,Total,Treatment Type,Count,People,2006-2016,Persons
9,2036,,Drugs Only,Total,Treatment Type,Count,People,2006-2016,Persons


In [12]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [13]:
new_table = new_table[new_table['Value'] !=  0 ]

In [14]:
new_table = new_table[new_table['Value'] !=  '' ]

In [15]:
new_table.dtypes

Value             object
DATAMARKER        object
Treatment Type    object
Category          object
Age               object
Measure Type      object
Unit              object
Period            object
Sex               object
dtype: object

In [16]:
new_table.tail(5)

Unnamed: 0,Value,DATAMARKER,Treatment Type,Category,Age,Measure Type,Unit,Period,Sex
161,3.1,,,Prison (%),Overall Total,Count,People,2006-2016,Persons
168,5.4,,Alcohol Only,Emergency admissions (HIS) (%),Treatment Type,Count,People,2006-2016,Persons
169,0.4,,Drugs Only,Emergency admissions (HIS) (%),Treatment Type,Count,People,2006-2016,Persons
170,0.7,,Drugs & Alcohol,Emergency admissions (HIS) (%),Treatment Type,Count,People,2006-2016,Persons
171,2.6,,,Emergency admissions (HIS) (%),Overall Total,Count,People,2006-2016,Persons


In [17]:
new_table.count()

Value             146
DATAMARKER          0
Treatment Type    131
Category          146
Age               146
Measure Type      146
Unit              146
Period            146
Sex               146
dtype: int64

In [18]:
new_table = new_table[new_table['Value'] !=  '-' ]

In [19]:
new_table.count()

Value             146
DATAMARKER          0
Treatment Type    131
Category          146
Age               146
Measure Type      146
Unit              146
Period            146
Sex               146
dtype: int64

In [20]:
def user_perc(x):
    
    if str(x) == 'Treatment Type':
        return 'All years'
    else:
        return x
    
new_table['Age'] = new_table.apply(lambda row: user_perc(row['Age']), axis = 1)

In [21]:
new_table['Treatment Type'].fillna('Total', inplace = True)

In [22]:
new_table = new_table[['Period','Category','Age','Sex','Treatment Type','Measure Type','Value','Unit']]

In [23]:
new_table.head(5)

Unnamed: 0,Period,Category,Age,Sex,Treatment Type,Measure Type,Value,Unit
0,2006-2016,Total,Under 18,Persons,Alcohol Only,Count,95,People
1,2006-2016,Total,Under 18,Persons,Drugs Only,Count,324,People
2,2006-2016,Total,Under 18,Persons,Drugs & Alcohol,Count,294,People
3,2006-2016,Total,Under 18,Persons,Total,Count,713,People
4,2006-2016,Total,18 and over,Persons,Alcohol Only,Count,2482,People


In [24]:
# destinationFolder = Path('out')
# destinationFolder.mkdir(exist_ok=True, parents=True)

# new_table.to_csv(destinationFolder / ('tab4.2.csv'), index = False)

In [25]:
new_table.count()

Period            146
Category          146
Age               146
Sex               146
Treatment Type    146
Measure Type      146
Value             146
Unit              146
dtype: int64