Census of Drug and Alcohol Treatment Services in Northern Ireland:Table 3 – Breakdown by Residential Status

In [1]:
from databaker.framework import *
import pandas as pd 

In [2]:
import requests
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache
from cachecontrol.heuristics import LastModified
from pathlib import Path

session = CacheControl(requests.Session(),
                       cache=FileCache('.cache'),
                       heuristic=LastModified())

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

inputURL = 'https://www.health-ni.gov.uk/sites/default/files/publications/dhssps/data-census-drug-alcohol-treatment-services.xlsx'
inputFile = sourceFolder / 'data-census-drug-alcohol-treatment-services.xlsx'
response = session.get(inputURL)
with open(inputFile, 'wb') as f:
  f.write(response.content)

https://www.health-ni.gov.uk/sites/default/files/publications/dhssps/data-census-drug-alcohol-treatment-services.xlsx

In [3]:
tab = loadxlstabs(inputFile, sheetids='Table 3')[0]

Loading in\data-census-drug-alcohol-treatment-services.xlsx which has size 46265 bytes
Table names: ['Table 3']


In [4]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank() - tab.excel_ref('B12').expand(DOWN).expand(RIGHT)  


In [5]:
observations

{<E5 528.0>, <H9 5.6>, <F9 9.4>, <D5 1032.0>, <I7 523.0>, <D11 4.3>, <G5 4095.0>, <I8 12.0>, <M5 1874.0>, <M10 92.2>, <H8 44.0>, <K9 4.4>, <E6 0.0>, <M6 83.0>, <J9 6.5>, <B7 1388.0>, <I9 0.9>, <F10 87.2>, <C9 1.5>, <J11 2.2>, <C6 22.0>, <C11 2.2>, <J5 324.0>, <H11 4.4>, <D8 44.0>, <K7 181.0>, <B6 108.0>, <E10 95.5>, <E9 0.0>, <M11 3.4>, <J6 21.0>, <M9 4.4>, <B5 1567.0>, <C7 1441.0>, <B10 88.6>, <H10 90.0>, <C8 33.0>, <B8 71.0>, <F7 3109.0>, <E11 4.5>, <D6 204.0>, <I5 540.0>, <L5 1689.0>, <J7 296.0>, <B11 4.5>, <G8 148.0>, <K8 63.0>, <L10 91.6>, <H7 909.0>, <K11 3.4>, <F5 3567.0>, <G9 8.2>, <D7 784.0>, <G11 3.6>, <C10 96.3>, <D10 76.0>, <J10 91.4>, <G7 3613.0>, <E7 504.0>, <F6 334.0>, <K10 97.8>, <H6 57.0>, <I6 5.0>, <J8 7.0>, <C5 1496.0>, <F11 3.5>, <M8 63.0>, <D9 19.8>, <H5 1010.0>, <E8 24.0>, <I10 96.9>, <G10 88.2>, <M7 1728.0>, <L7 1547.0>, <K6 83.0>, <G6 334.0>, <F8 124.0>, <K5 185.0>, <B9 6.9>, <I11 2.2>}

In [6]:
Service = tab.excel_ref('A5').expand(DOWN).is_not_blank()
Service

{<A19 'Residential (%)'>, <A8 'Mixed'>, <A13 'Residential Status'>, <A15 'Total'>, <A21 'Mixed (%)'>, <A11 'Mixed (%)'>, <A20 'Non-residential (%)'>, <A16 'Residential'>, <A6 'Residential'>, <A9 'Residential (%)'>, <A7 'Non-residential '>, <A5 'Total'>, <A18 'Mixed'>, <A17 'Non-residential '>, <A10 'Non-residential (%)'>}

In [7]:
Treatment = tab.excel_ref('B4').expand(RIGHT).is_not_blank()
Treatment

{<J4 'Drugs & Alcohol'>, <K4 'Under 18s'>, <E4 'Under 18s'>, <H4 'Alcohol Only'>, <M4 'Total'>, <C4 'Drugs Only'>, <F4 '18 and over'>, <I4 'Drugs Only'>, <D4 'Drugs & Alcohol'>, <L4 '18 and over'>, <G4 'Total'>, <B4 'Alcohol Only'>}

In [8]:
sex = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
sex

{<H3 'Female  '>, <B3 'Male'>}

In [9]:
Dimensions = [
            HDim(Treatment,'Treatment Type',DIRECTLY,ABOVE),
            HDim(Service,'Residential status',DIRECTLY,LEFT),
            HDim(sex,'Sex',CLOSEST,LEFT),
            HDimConst('Measure Type', 'Count'),
            HDimConst('Unit','People')
            ]

In [10]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# savepreviewhtml(c1)

In [11]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Treatment Type,Residential status,Sex,Measure Type,Unit
0,1567.0,Alcohol Only,Total,Male,Count,People
1,1496.0,Drugs Only,Total,Male,Count,People
2,1032.0,Drugs & Alcohol,Total,Male,Count,People
3,528.0,Under 18s,Total,Male,Count,People
4,3567.0,18 and over,Total,Male,Count,People
5,4095.0,Total,Total,Male,Count,People
6,1010.0,Alcohol Only,Total,Female,Count,People
7,540.0,Drugs Only,Total,Female,Count,People
8,324.0,Drugs & Alcohol,Total,Female,Count,People
9,185.0,Under 18s,Total,Female,Count,People


In [12]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [13]:
new_table.dtypes

Value                 float64
Treatment Type         object
Residential status     object
Sex                    object
Measure Type           object
Unit                   object
dtype: object

In [14]:
new_table.tail(5)

Unnamed: 0,Value,Treatment Type,Residential status,Sex,Measure Type,Unit
75,4.4,Alcohol Only,Mixed (%),Female,Count,People
76,2.2,Drugs Only,Mixed (%),Female,Count,People
77,2.2,Drugs & Alcohol,Mixed (%),Female,Count,People
78,3.4,Under 18s,Mixed (%),Female,Count,People
79,3.4,Total,Mixed (%),Female,Count,People


In [15]:
new_table.count()

Value                 80
Treatment Type        80
Residential status    80
Sex                   80
Measure Type          80
Unit                  80
dtype: int64

In [16]:
new_table = new_table[new_table['Value'] !=  0 ]

In [17]:
new_table.count()

Value                 78
Treatment Type        78
Residential status    78
Sex                   78
Measure Type          78
Unit                  78
dtype: int64

In [18]:
new_table = new_table[['Residential status','Sex','Treatment Type','Measure Type','Value','Unit']]

In [19]:
new_table.head(5)

Unnamed: 0,Residential status,Sex,Treatment Type,Measure Type,Value,Unit
0,Total,Male,Alcohol Only,Count,1567.0,People
1,Total,Male,Drugs Only,Count,1496.0,People
2,Total,Male,Drugs & Alcohol,Count,1032.0,People
3,Total,Male,Under 18s,Count,528.0,People
4,Total,Male,18 and over,Count,3567.0,People


In [20]:
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

new_table.to_csv(destinationFolder / ('tab3.1.csv'), index = False)