Census of Drug and Alcohol Treatment Services in Northern Ireland:Breakdown by Age and Gender

In [1]:
from databaker.framework import *
import pandas as pd 

In [2]:
import requests
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache
from cachecontrol.heuristics import LastModified
from pathlib import Path

session = CacheControl(requests.Session(),
                       cache=FileCache('.cache'),
                       heuristic=LastModified())

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

inputURL = 'https://www.health-ni.gov.uk/sites/default/files/publications/dhssps/data-census-drug-alcohol-treatment-services.xlsx'
inputFile = sourceFolder / 'data-census-drug-alcohol-treatment-services.xlsx'
response = session.get(inputURL)
with open(inputFile, 'wb') as f:
  f.write(response.content)

https://www.health-ni.gov.uk/sites/default/files/publications/dhssps/data-census-drug-alcohol-treatment-services.xlsx

In [3]:
tab = loadxlstabs(inputFile, sheetids='Table 1')[0]

Loading in\data-census-drug-alcohol-treatment-services.xlsx which has size 46265 bytes
Table names: ['Table 1']


In [4]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()

In [5]:
observations

{<B9 67.4>, <I8 67.9>, <G11 34.2>, <G5 1712.0>, <K10 34.1>, <L7 324.0>, <J6 1567.0>, <B5 95.0>, <I10 100.0>, <B8 32.6>, <F11 43.1>, <H9 23.7>, <D7 72.0>, <L10 22.7>, <I11 100.0>, <M6 4095.0>, <I9 32.1>, <D11 42.0>, <M9 31.4>, <H10 20.2>, <M7 1874.0>, <C7 49.0>, <M8 68.6>, <F5 2482.0>, <F7 946.0>, <F10 47.2>, <C12 26.5>, <C10 45.4>, <E6 528.0>, <H5 1062.0>, <M11 100.0>, <J9 39.2>, <G8 71.3>, <H11 22.7>, <E11 100.0>, <L5 1356.0>, <J12 53.9>, <K8 73.5>, <L8 76.1>, <H8 76.3>, <B11 5.9>, <F6 1536.0>, <L12 17.3>, <F12 56.0>, <E7 185.0>, <D6 222.0>, <G6 1221.0>, <I6 3567.0>, <H12 14.9>, <E5 713.0>, <D9 24.5>, <L6 1032.0>, <D5 294.0>, <K11 36.5>, <K7 540.0>, <D10 41.2>, <C11 52.1>, <K5 2036.0>, <I5 5256.0>, <E9 25.9>, <L9 23.9>, <C8 84.9>, <I7 1689.0>, <G7 491.0>, <C5 324.0>, <J7 1010.0>, <B6 31.0>, <I12 100.0>, <G9 28.7>, <L11 25.2>, <H7 252.0>, <F9 38.1>, <D8 75.5>, <C9 15.1>, <G12 29.1>, <D12 38.9>, <K9 26.5>, <K6 1496.0>, <E8 74.1>, <G10 32.6>, <M12 100.0>, <J10 43.2>, <M10 100.0>, <J5 257

In [6]:
age = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
age

{<F3 '18 and over'>, <B3 'Under 18 '>, <J3 'Treatment Type'>, <M3 'Overall Total'>}

In [7]:
Treatment = tab.excel_ref('B4').expand(RIGHT).is_not_blank()
Treatment

{<J4 'Alcohol Only'>, <F4 'Alcohol Only'>, <D4 'Drugs & Alcohol'>, <G4 'Drugs Only'>, <K4 'Drugs Only'>, <E4 'Total'>, <H4 'Drugs & Alcohol'>, <L4 'Drugs & Alcohol'>, <C4 'Drugs Only'>, <B4 'Alcohol Only'>, <I4 'Total'>}

In [8]:
sex = tab.excel_ref('A5').expand(DOWN) - tab.excel_ref('A13').expand(DOWN)  
sex

{<A10 '% of Total'>, <A11 '% of all Males '>, <A9 'Female (%)'>, <A5 'Total'>, <A6 'Male'>, <A12 '% of all Females'>, <A8 'Male (%)'>, <A7 'Female'>}

In [9]:
Dimensions = [
            HDim(Treatment,'Treatment Type',DIRECTLY,ABOVE),
            HDim(sex,'Sex',DIRECTLY,LEFT),
            HDim(age,'Age',CLOSEST,LEFT),
            HDimConst('Measure Type', 'Count'),
            HDimConst('Unit','People'),
            HDimConst('Period','2006-2016'),
            HDimConst('Category','All')
            ]

In [10]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# savepreviewhtml(c1)

In [11]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Treatment Type,Sex,Age,Measure Type,Unit,Period,Category
0,95.0,Alcohol Only,Total,Under 18,Count,People,2006-2016,All
1,324.0,Drugs Only,Total,Under 18,Count,People,2006-2016,All
2,294.0,Drugs & Alcohol,Total,Under 18,Count,People,2006-2016,All
3,713.0,Total,Total,Under 18,Count,People,2006-2016,All
4,2482.0,Alcohol Only,Total,18 and over,Count,People,2006-2016,All
5,1712.0,Drugs Only,Total,18 and over,Count,People,2006-2016,All
6,1062.0,Drugs & Alcohol,Total,18 and over,Count,People,2006-2016,All
7,5256.0,Total,Total,18 and over,Count,People,2006-2016,All
8,2577.0,Alcohol Only,Total,Treatment Type,Count,People,2006-2016,All
9,2036.0,Drugs Only,Total,Treatment Type,Count,People,2006-2016,All


In [12]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [13]:
new_table.dtypes

Value             float64
Treatment Type     object
Sex                object
Age                object
Measure Type       object
Unit               object
Period             object
Category           object
dtype: object

In [14]:
new_table.tail(5)

Unnamed: 0,Value,Treatment Type,Sex,Age,Measure Type,Unit,Period,Category
91,100.0,Total,% of all Females,18 and over,Count,People,2006-2016,All
92,53.9,Alcohol Only,% of all Females,Treatment Type,Count,People,2006-2016,All
93,28.8,Drugs Only,% of all Females,Treatment Type,Count,People,2006-2016,All
94,17.3,Drugs & Alcohol,% of all Females,Treatment Type,Count,People,2006-2016,All
95,100.0,,% of all Females,Overall Total,Count,People,2006-2016,All


In [15]:
def user_perc(x):
    
    if str(x) == 'Treatment Type':
        return 'All years'
    else:
        return x
    
new_table['Age'] = new_table.apply(lambda row: user_perc(row['Age']), axis = 1)

In [16]:
def user_perc(x):
    
    if str(x) == 'Total':
        return 'Persons'
    else:
        return x
    
new_table['Sex'] = new_table.apply(lambda row: user_perc(row['Sex']), axis = 1)


In [17]:
new_table['Treatment Type'].fillna('All', inplace = True)

In [18]:
new_table = new_table[['Period','Category','Age','Sex','Treatment Type','Measure Type','Value','Unit']]

In [19]:
new_table.head(5)

Unnamed: 0,Period,Category,Age,Sex,Treatment Type,Measure Type,Value,Unit
0,2006-2016,All,Under 18,Persons,Alcohol Only,Count,95.0,People
1,2006-2016,All,Under 18,Persons,Drugs Only,Count,324.0,People
2,2006-2016,All,Under 18,Persons,Drugs & Alcohol,Count,294.0,People
3,2006-2016,All,Under 18,Persons,Total,Count,713.0,People
4,2006-2016,All,18 and over,Persons,Alcohol Only,Count,2482.0,People


In [20]:
# destinationFolder = Path('out')
# destinationFolder.mkdir(exist_ok=True, parents=True)

# new_table.to_csv(destinationFolder / ('tab1.csv'), index = False)