Census of Drug and Alcohol Treatment Services in Northern Ireland:Breakdown by Service Type

In [1]:
from databaker.framework import *
import pandas as pd 

In [2]:
import requests
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache
from cachecontrol.heuristics import LastModified
from pathlib import Path

session = CacheControl(requests.Session(),
                       cache=FileCache('.cache'),
                       heuristic=LastModified())

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

inputURL = 'https://www.health-ni.gov.uk/sites/default/files/publications/dhssps/data-census-drug-alcohol-treatment-services.xlsx'
inputFile = sourceFolder / 'data-census-drug-alcohol-treatment-services.xlsx'
response = session.get(inputURL)
with open(inputFile, 'wb') as f:
  f.write(response.content)

https://www.health-ni.gov.uk/sites/default/files/publications/dhssps/data-census-drug-alcohol-treatment-services.xlsx

In [3]:
tab = loadxlstabs(inputFile, sheetids='Table 2')[0]

Loading in\data-census-drug-alcohol-treatment-services.xlsx which has size 46265 bytes
Table names: ['Table 2']


In [4]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank() - tab.excel_ref('B12').expand(DOWN).expand(RIGHT)  


In [5]:
observations

{<E7 432.0>, <H5 1010.0>, <I5 540.0>, <J6 141.0>, <D6 428.0>, <C11 0.0>, <K9 18.9>, <H10 35.6>, <G8 172.0>, <C9 61.8>, <C6 925.0>, <J5 324.0>, <F10 30.0>, <F8 172.0>, <K10 80.0>, <H9 64.4>, <K7 148.0>, <I11 0.0>, <B5 1567.0>, <L6 1143.0>, <M6 1178.0>, <E8 0.0>, <M11 0.6>, <L7 537.0>, <I9 71.7>, <D10 42.3>, <D7 437.0>, <F11 4.8>, <F5 3567.0>, <J10 53.1>, <G9 59.1>, <I8 0.0>, <C7 571.0>, <B11 0.3>, <E11 0.0>, <E6 96.0>, <L5 1689.0>, <G7 1501.0>, <J11 3.4>, <M8 11.0>, <L9 67.7>, <E5 528.0>, <M7 685.0>, <B7 493.0>, <D9 41.5>, <D5 1032.0>, <L10 31.8>, <B10 31.5>, <G5 4095.0>, <J9 43.5>, <M5 1874.0>, <D8 167.0>, <F7 1069.0>, <J7 172.0>, <H8 0.0>, <B6 1069.0>, <F6 2326.0>, <I7 153.0>, <G6 2422.0>, <M10 36.6>, <C10 38.2>, <J8 11.0>, <H7 360.0>, <B9 68.2>, <D11 16.2>, <H6 650.0>, <M9 62.9>, <G11 4.2>, <I6 387.0>, <K8 11.0>, <E10 81.8>, <H11 0.0>, <K11 0.6>, <G10 36.7>, <K6 35.0>, <C8 0.0>, <K5 185.0>, <I10 28.3>, <C5 1496.0>, <F9 65.2>, <E9 18.2>, <B8 5.0>}

In [6]:
Service = tab.excel_ref('A5').expand(DOWN).is_not_blank()
Service

{<A7 'Non-statutory'>, <A19 'Statutory (%)'>, <A13 'Service Type'>, <A8 'Prison'>, <A10 'Non-statutory (%)'>, <A20 'Non-statutory (%)'>, <A21 'Prison (%)'>, <A5 'Total'>, <A6 'Statutory'>, <A18 'Prison'>, <A9 'Statutory (%)'>, <A11 'Prison (%)'>, <A17 'Non-statutory'>, <A15 'Total'>, <A16 'Statutory'>}

In [7]:
Treatment = tab.excel_ref('B4').expand(RIGHT).is_not_blank()
Treatment

{<C4 'Drugs Only'>, <G4 'Total'>, <I4 'Drugs Only'>, <K4 'Under 18s'>, <E4 'Under 18s'>, <F4 '18 and over'>, <J4 'Drugs & Alcohol'>, <D4 'Drugs & Alcohol'>, <L4 '18 and over'>, <H4 'Alcohol Only'>, <M4 'Total'>, <B4 'Alcohol Only'>}

In [8]:
sex = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
sex

{<H3 'Female  '>, <B3 'Male'>}

In [9]:
Dimensions = [
            HDim(Treatment,'Treatment Type',DIRECTLY,ABOVE),
            HDim(Service,'Service Type',DIRECTLY,LEFT),
            HDim(sex,'Sex',CLOSEST,LEFT),
            HDimConst('Measure Type', 'Count'),
            HDimConst('Unit','People')
            ]

In [10]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# savepreviewhtml(c1)

In [11]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Treatment Type,Service Type,Sex,Measure Type,Unit
0,1567.0,Alcohol Only,Total,Male,Count,People
1,1496.0,Drugs Only,Total,Male,Count,People
2,1032.0,Drugs & Alcohol,Total,Male,Count,People
3,528.0,Under 18s,Total,Male,Count,People
4,3567.0,18 and over,Total,Male,Count,People
5,4095.0,Total,Total,Male,Count,People
6,1010.0,Alcohol Only,Total,Female,Count,People
7,540.0,Drugs Only,Total,Female,Count,People
8,324.0,Drugs & Alcohol,Total,Female,Count,People
9,185.0,Under 18s,Total,Female,Count,People


In [12]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [13]:
new_table.dtypes

Value             float64
Treatment Type     object
Service Type       object
Sex                object
Measure Type       object
Unit               object
dtype: object

In [14]:
new_table.tail(5)

Unnamed: 0,Value,Treatment Type,Service Type,Sex,Measure Type,Unit
77,0.0,Alcohol Only,Prison (%),Female,Count,People
78,0.0,Drugs Only,Prison (%),Female,Count,People
79,3.4,Drugs & Alcohol,Prison (%),Female,Count,People
80,0.6,Under 18s,Prison (%),Female,Count,People
81,0.6,Total,Prison (%),Female,Count,People


In [15]:
new_table.count()

Value             82
Treatment Type    82
Service Type      82
Sex               82
Measure Type      82
Unit              82
dtype: int64

In [16]:
new_table = new_table[new_table['Value'] !=  0 ]

In [17]:
new_table.count()

Value             74
Treatment Type    74
Service Type      74
Sex               74
Measure Type      74
Unit              74
dtype: int64

In [18]:
new_table = new_table[['Service Type','Sex','Treatment Type','Measure Type','Value','Unit']]

In [19]:
new_table.head(5)

Unnamed: 0,Service Type,Sex,Treatment Type,Measure Type,Value,Unit
0,Total,Male,Alcohol Only,Count,1567.0,People
1,Total,Male,Drugs Only,Count,1496.0,People
2,Total,Male,Drugs & Alcohol,Count,1032.0,People
3,Total,Male,Under 18s,Count,528.0,People
4,Total,Male,18 and over,Count,3567.0,People


In [20]:
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

new_table.to_csv(destinationFolder / ('tab2.1.csv'), index = False)