Census of Drug and Alcohol Treatment Services in Northern Ireland:Breakdown by Service Type

In [1]:
from databaker.framework import *
import pandas as pd 

In [2]:
import requests
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache
from cachecontrol.heuristics import LastModified
from pathlib import Path

session = CacheControl(requests.Session(),
                       cache=FileCache('.cache'),
                       heuristic=LastModified())

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

inputURL = 'https://www.health-ni.gov.uk/sites/default/files/publications/dhssps/data-census-drug-alcohol-treatment-services.xlsx'
inputFile = sourceFolder / 'data-census-drug-alcohol-treatment-services.xlsx'
response = session.get(inputURL)
with open(inputFile, 'wb') as f:
  f.write(response.content)

https://www.health-ni.gov.uk/sites/default/files/publications/dhssps/data-census-drug-alcohol-treatment-services.xlsx

In [3]:
tab = loadxlstabs(inputFile, sheetids='Table 2')[0]

Loading in\data-census-drug-alcohol-treatment-services.xlsx which has size 46265 bytes
Table names: ['Table 2']


In [4]:
observations = tab.excel_ref('B15').expand(DOWN).expand(RIGHT).is_not_blank() - tab.excel_ref('B22').expand(DOWN).expand(RIGHT)  


In [5]:
observations

{<E18 '-'>, <J16 1719.0>, <K17 724.0>, <D15 294.0>, <G16 '*'>, <G18 '*'>, <H21 '*'>, <H17 407.0>, <K16 1312.0>, <E21 '-'>, <F17 769.0>, <K19 64.4>, <I15 5256.0>, <H16 '*'>, <I17 1606.0>, <J21 0.2>, <I21 '*'>, <H18 '*'>, <J19 66.7>, <H19 '*'>, <G17 430.0>, <L16 569.0>, <F18 '*'>, <C15 324.0>, <J20 33.1>, <D20 68.7>, <C20 90.7>, <L17 609.0>, <L19 42.0>, <K20 35.6>, <B19 '*'>, <G19 '*'>, <M15 5969.0>, <D17 202.0>, <M21 3.1>, <J15 2577.0>, <M16 3600.0>, <B15 95.0>, <B16 '*'>, <K15 2036.0>, <E15 713.0>, <M19 60.3>, <L20 44.9>, <C19 '*'>, <M18 183.0>, <K21 0.0>, <I20 30.6>, <L21 13.1>, <C16 '*'>, <B21 '-'>, <D19 '*'>, <L18 178.0>, <M17 2186.0>, <F20 31.0>, <J18 5.0>, <C17 294.0>, <B18 '-'>, <E17 580.0>, <I16 '*'>, <L15 1356.0>, <E16 '*'>, <B20 88.4>, <D16 '*'>, <J17 853.0>, <G15 1712.0>, <E19 '*'>, <F15 2482.0>, <B17 84.0>, <I19 '*'>, <I18 '*'>, <K18 0.0>, <H15 1062.0>, <E20 81.3>, <F21 '*'>, <M20 36.6>, <F16 '*'>, <F19 '*'>, <G20 25.1>, <H20 38.3>, <G21 '*'>}

In [6]:
Service = tab.excel_ref('A15').expand(DOWN).is_not_blank()
Service

{<A20 'Non-statutory (%)'>, <A21 'Prison (%)'>, <A18 'Prison'>, <A17 'Non-statutory'>, <A16 'Statutory'>, <A15 'Total'>, <A19 'Statutory (%)'>}

In [7]:
Treatment = tab.excel_ref('B14').expand(RIGHT).is_not_blank()
Treatment

{<J14 'Alcohol Only'>, <I14 'Total'>, <L14 'Drugs & Alcohol'>, <B14 'Alcohol Only'>, <E14 'Total'>, <C14 'Drugs Only'>, <H14 'Drugs & Alcohol'>, <K14 'Drugs Only'>, <G14 'Drugs Only'>, <F14 'Alcohol Only'>, <D14 'Drugs & Alcohol'>}

In [8]:
age = tab.excel_ref('B13').expand(RIGHT).is_not_blank()
age

{<F13 '18 and over'>, <M13 'Overall Total'>, <B13 'Under 18 '>, <J13 'Treatment Type'>}

In [9]:
Dimensions = [
            HDim(Treatment,'Treatment Type',DIRECTLY,ABOVE),
            HDim(Service,'Service Type',DIRECTLY,LEFT),
            HDim(age,'Age',CLOSEST,LEFT),
            HDimConst('Measure Type', 'Count'),
            HDimConst('Unit','People')
            ]

In [10]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# savepreviewhtml(c1)

In [11]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,DATAMARKER,Treatment Type,Service Type,Age,Measure Type,Unit
0,95,,Alcohol Only,Total,Under 18,Count,People
1,324,,Drugs Only,Total,Under 18,Count,People
2,294,,Drugs & Alcohol,Total,Under 18,Count,People
3,713,,Total,Total,Under 18,Count,People
4,2482,,Alcohol Only,Total,18 and over,Count,People
5,1712,,Drugs Only,Total,18 and over,Count,People
6,1062,,Drugs & Alcohol,Total,18 and over,Count,People
7,5256,,Total,Total,18 and over,Count,People
8,2577,,Alcohol Only,Total,Treatment Type,Count,People
9,2036,,Drugs Only,Total,Treatment Type,Count,People


In [12]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [13]:
new_table.dtypes

Value             object
DATAMARKER        object
Treatment Type    object
Service Type      object
Age               object
Measure Type      object
Unit              object
dtype: object

In [14]:
new_table.tail(5)

Unnamed: 0,Value,DATAMARKER,Treatment Type,Service Type,Age,Measure Type,Unit
75,,*,Total,Prison (%),18 and over,Count,People
76,0.2,,Alcohol Only,Prison (%),Treatment Type,Count,People
77,0.0,,Drugs Only,Prison (%),Treatment Type,Count,People
78,13.1,,Drugs & Alcohol,Prison (%),Treatment Type,Count,People
79,3.1,,,Prison (%),Overall Total,Count,People


In [15]:
new_table.count()

Value             80
DATAMARKER        28
Treatment Type    73
Service Type      80
Age               80
Measure Type      80
Unit              80
dtype: int64

In [16]:
new_table = new_table[new_table['Value'] !=  0 ]

In [17]:
new_table = new_table[new_table['Value'] !=  '' ]

In [18]:
new_table.count()

Value             50
DATAMARKER         0
Treatment Type    43
Service Type      50
Age               50
Measure Type      50
Unit              50
dtype: int64

In [19]:
def user_perc(x):
    
    if str(x) == 'Treatment Type':
        return 'All years'
    else:
        return x
    
new_table['Age'] = new_table.apply(lambda row: user_perc(row['Age']), axis = 1)

In [20]:
new_table = new_table[['Service Type','Age','Treatment Type','Measure Type','Value','Unit']]

In [21]:
new_table.head(5)

Unnamed: 0,Service Type,Age,Treatment Type,Measure Type,Value,Unit
0,Total,Under 18,Alcohol Only,Count,95,People
1,Total,Under 18,Drugs Only,Count,324,People
2,Total,Under 18,Drugs & Alcohol,Count,294,People
3,Total,Under 18,Total,Count,713,People
4,Total,18 and over,Alcohol Only,Count,2482,People


In [22]:
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

new_table.to_csv(destinationFolder / ('tab2.2.csv'), index = False)