Census of Drug and Alcohol Treatment Services in Northern Ireland:Breakdown by Service Type

In [159]:
from gssutils import *
import numpy
if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://www.health-ni.gov.uk/sites/default/files/publications/dhssps/data-census-drug-alcohol-treatment-services.xlsx'
    inputFile = sourceFolder / 'data-census-drug-alcohol-treatment-services.xlsx'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)
    tab = loadxlstabs(inputFile, sheetids='Table 2')[0]

Loading in/data-census-drug-alcohol-treatment-services.xlsx which has size 46265 bytes
Table names: ['Table 2']


In [160]:
observations = tab.excel_ref('B16').expand(DOWN).expand(RIGHT).is_not_blank() - tab.excel_ref('B22').expand(DOWN).expand(RIGHT)  


In [161]:
observations

{<G21 '*'>, <K17 724.0>, <L18 178.0>, <K16 1312.0>, <F20 31.0>, <B19 '*'>, <I21 '*'>, <H20 38.3>, <J20 33.1>, <I17 1606.0>, <K20 35.6>, <F21 '*'>, <B21 '-'>, <H18 '*'>, <C20 90.7>, <H17 407.0>, <C17 294.0>, <M16 3600.0>, <E18 '-'>, <K19 64.4>, <M19 60.3>, <I16 '*'>, <C16 '*'>, <C19 '*'>, <B18 '-'>, <F17 769.0>, <D20 68.7>, <G19 '*'>, <I19 '*'>, <M21 3.1>, <I18 '*'>, <J19 66.7>, <J21 0.2>, <H16 '*'>, <G17 430.0>, <K21 0.0>, <G16 '*'>, <E21 '-'>, <F18 '*'>, <E19 '*'>, <B20 88.4>, <F16 '*'>, <G20 25.1>, <E16 '*'>, <L20 44.9>, <M17 2186.0>, <L17 609.0>, <L19 42.0>, <J16 1719.0>, <E17 580.0>, <D16 '*'>, <H19 '*'>, <D17 202.0>, <J17 853.0>, <G18 '*'>, <L16 569.0>, <H21 '*'>, <I20 30.6>, <E20 81.3>, <J18 5.0>, <B17 84.0>, <M20 36.6>, <L21 13.1>, <K18 0.0>, <B16 '*'>, <D19 '*'>, <F19 '*'>, <M18 183.0>}

In [162]:
Service = tab.excel_ref('A15').expand(DOWN).is_not_blank()
Service

{<A20 'Non-statutory (%)'>, <A15 'Total'>, <A16 'Statutory'>, <A18 'Prison'>, <A19 'Statutory (%)'>, <A17 'Non-statutory'>, <A21 'Prison (%)'>}

In [163]:
Treatment = tab.excel_ref('B14').expand(RIGHT)
Treatment

{<H14 'Drugs & Alcohol'>, <C14 'Drugs Only'>, <I14 'Total'>, <J14 'Alcohol Only'>, <L14 'Drugs & Alcohol'>, <B14 'Alcohol Only'>, <D14 'Drugs & Alcohol'>, <G14 'Drugs Only'>, <M14 ''>, <F14 'Alcohol Only'>, <K14 'Drugs Only'>, <E14 'Total'>}

In [164]:
age = tab.excel_ref('B13').expand(RIGHT).is_not_blank()
age

{<B13 'Under 18 '>, <F13 '18 and over'>, <J13 'Treatment Type'>, <M13 'Overall Total'>}

In [165]:
Dimensions = [
            HDim(Treatment,'Treatment Type',CLOSEST,LEFT),
            HDim(Service,'Service Type',DIRECTLY,LEFT),
            HDim(age,'Age',CLOSEST,LEFT),
            HDimConst('Measure Type', 'Count'),
            HDimConst('Unit','People'),
            HDimConst('Period','1 March 2017'),
            HDimConst('Sex','Persons'),
            ]

In [166]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# savepreviewhtml(c1)

In [167]:
new_table = c1.topandas()
#new_table.loc[new_table['Age'] == 'Treatment Type', 'Age'] = 'All Ages'
#new_table.loc[new_table['Age'] == 'Overall Total', 'Age'] = 'All Ages'
#new_table.loc[new_table['Treatment Type'] == '', 'Treatment Type'] = 'Total'
new_table




Unnamed: 0,OBS,DATAMARKER,Treatment Type,Service Type,Age,Measure Type,Unit,Period,Sex
0,,*,Alcohol Only,Statutory,Under 18,Count,People,1 March 2017,Persons
1,,*,Drugs Only,Statutory,Under 18,Count,People,1 March 2017,Persons
2,,*,Drugs & Alcohol,Statutory,Under 18,Count,People,1 March 2017,Persons
3,,*,Total,Statutory,Under 18,Count,People,1 March 2017,Persons
4,,*,Alcohol Only,Statutory,18 and over,Count,People,1 March 2017,Persons
5,,*,Drugs Only,Statutory,18 and over,Count,People,1 March 2017,Persons
6,,*,Drugs & Alcohol,Statutory,18 and over,Count,People,1 March 2017,Persons
7,,*,Total,Statutory,18 and over,Count,People,1 March 2017,Persons
8,1719,,Alcohol Only,Statutory,Treatment Type,Count,People,1 March 2017,Persons
9,1312,,Drugs Only,Statutory,Treatment Type,Count,People,1 March 2017,Persons


In [168]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [169]:
new_table.dtypes

Value             object
DATAMARKER        object
Treatment Type    object
Service Type      object
Age               object
Measure Type      object
Unit              object
Period            object
Sex               object
dtype: object

In [170]:
new_table.tail(5)

Unnamed: 0,Value,DATAMARKER,Treatment Type,Service Type,Age,Measure Type,Unit,Period,Sex
63,,*,Total,Prison (%),18 and over,Count,People,1 March 2017,Persons
64,0.2,,Alcohol Only,Prison (%),Treatment Type,Count,People,1 March 2017,Persons
65,0.0,,Drugs Only,Prison (%),Treatment Type,Count,People,1 March 2017,Persons
66,13.1,,Drugs & Alcohol,Prison (%),Treatment Type,Count,People,1 March 2017,Persons
67,3.1,,,Prison (%),Overall Total,Count,People,1 March 2017,Persons


In [171]:
new_table.count()

Value             68
DATAMARKER        28
Treatment Type    68
Service Type      68
Age               68
Measure Type      68
Unit              68
Period            68
Sex               68
dtype: int64

In [172]:
new_table = new_table[new_table['Value'] !=  0 ]

In [173]:
new_table = new_table[new_table['Value'] !=  '' ]

In [174]:
new_table.count()

Value             38
DATAMARKER         0
Treatment Type    38
Service Type      38
Age               38
Measure Type      38
Unit              38
Period            38
Sex               38
dtype: int64

In [175]:
def user_perc(x):
    
    if str(x) == 'Treatment Type':
        return 'All years'
    else:
        return x
    
new_table['Age'] = new_table.apply(lambda row: user_perc(row['Age']), axis = 1)

In [176]:
new_table['Treatment Type'].fillna('all', inplace = True)
# new_table['Service Type'] = 'All'
new_table['Residential Status'] = 'all'
new_table['Health and Social Care Trust']  = 'all'

In [177]:
new_table = new_table[['Period', 'Sex', 'Age', 'Service Type', 'Residential Status', 'Treatment Type', 'Health and Social Care Trust', 'Measure Type', 'Unit', 'Value']]

In [178]:
new_table.head(5)

Unnamed: 0,Period,Sex,Age,Service Type,Residential Status,Treatment Type,Health and Social Care Trust,Measure Type,Unit,Value
8,1 March 2017,Persons,All years,Statutory,all,Alcohol Only,all,Count,People,1719
9,1 March 2017,Persons,All years,Statutory,all,Drugs Only,all,Count,People,1312
10,1 March 2017,Persons,All years,Statutory,all,Drugs & Alcohol,all,Count,People,569
11,1 March 2017,Persons,Overall Total,Statutory,all,,all,Count,People,3600
12,1 March 2017,Persons,Under 18,Non-statutory,all,Alcohol Only,all,Count,People,84


In [182]:
new_table['Treatment Type'] = numpy.where(new_table['Treatment Type'] == '', 'Total', new_table['Treatment Type'])
#new_table.to_csv('testCompare.csv', index = False)