Census of Drug and Alcohol Treatment Type Services in Northern Ireland:Table 5 Comparison table

In [2]:
from gssutils import *
import numpy
if is_interactive():
    import requests
    
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://www.health-ni.gov.uk/sites/default/files/publications/dhssps/data-census-drug-alcohol-treatment-services.xlsx'
    inputFile = sourceFolder / 'data-census-drug-alcohol-Treatment Type-services.xlsx'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)
    tab = loadxlstabs(inputFile, sheetids='Table 5')[0]

Loading in/data-census-drug-alcohol-Treatment Type-services.xlsx which has size 46265 bytes
Table names: ['Table 5']


In [1340]:
observations = tab.excel_ref('B6').expand(DOWN).expand(RIGHT).is_not_blank() - tab.excel_ref('J8').expand(DOWN).expand(RIGHT)  


In [1341]:
observations

{<I9 37.1>, <F23 5775.0>, <D20 205.0>, <G12 93.3>, <F27 675.0>, <F29 1015.0>, <F12 5518.0>, <C31 2.5>, <G30 3.1>, <G22 2.4>, <D16 1224.0>, <F8 4066.0>, <B20 257.0>, <I27 18.5>, <H9 3176.0>, <B19 3585.0>, <H14 3891.0>, <G20 3.1>, <J6 5969.0>, <I16 23.9>, <I8 62.9>, <B30 257.0>, <F18 1905.0>, <B18 1741.0>, <B12 4736.0>, <G29 17.2>, <H30 313.0>, <E12 89.0>, <E22 2.3>, <C23 97.9>, <H31 129.0>, <B23 5467.0>, <H20 313.0>, <I20 3.7>, <G6 100.0>, <E26 23.1>, <E6 100.0>, <D18 1606.0>, <H16 2045.0>, <G28 21.0>, <G18 32.2>, <I29 15.0>, <I30 3.7>, <E11 11.0>, <B15 1118.0>, <B22 116.0>, <G8 68.7>, <D30 205.0>, <C12 84.8>, <E31 3.1>, <H18 3646.0>, <D25 1272.0>, <B11 847.0>, <D8 4244.0>, <F16 1291.0>, <C22 2.1>, <E9 27.4>, <C9 34.0>, <F15 1514.0>, <G11 6.7>, <E18 27.5>, <D15 1294.0>, <C18 31.2>, <B25 5185.0>, <G15 25.6>, <E14 56.9>, <I18 42.6>, <E20 3.5>, <F14 3111.0>, <G23 97.6>, <G16 21.8>, <E28 13.5>, <D6 5846.0>, <D9 1602.0>, <G25 26.0>, <H8 5377.0>, <F9 1850.0>, <C30 4.6>, <C14 62.3>, <D11 644.0

In [1342]:
Service = tab.excel_ref('A').expand(DOWN).by_index([6,7,10,13,17,21,24])
Service

{<A6 'Total'>, <A10 'Age'>, <A13 'Treatment type'>, <A7 'Gender'>, <A24 'Trust'>, <A17 'Service type'>, <A21 'Residential Status'>}

In [1343]:
TreatmentType = tab.excel_ref('A').expand(DOWN).is_not_blank() - tab.excel_ref('A').expand(DOWN).by_index([6,7,10,13,17,21,24])
TreatmentType

{<A8 'Male'>, <A11 'Under 18 '>, <A18 'Statutory'>, <A31 'Emergency admissions (HIS)'>, <A25 'Belfast'>, <A19 'Non-statutory'>, <A30 'Prison'>, <A20 'Prison'>, <A23 'Non-residential / Mixed'>, <A15 'Drugs Only'>, <A16 'Drugs & Alcohol'>, <A22 'Residential'>, <A27 'South Eastern'>, <A28 'Southern'>, <A1 'Table 5 – Comparison Table'>, <A26 'Northern'>, <A12 '18 and over'>, <A9 'Female  '>, <A14 'Alcohol Only'>, <A29 'Western'>}

In [1344]:
month = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
month

{<F3 '1st March '>, <H3 '1st September'>, <D3 '1st March '>, <J3 '1st March '>, <B3 '1st March '>}

In [1345]:
year = tab.excel_ref('B4').expand(RIGHT).is_not_blank()
year

{<F4 2012.0>, <H4 2014.0>, <B4 2007.0>, <J4 2017.0>, <D4 2010.0>}

In [1346]:
mt = tab.excel_ref('B5').expand(RIGHT).is_not_blank()
mt

{<G5 '%'>, <B5 'Headcount'>, <I5 '%'>, <J5 'Headcount'>, <K5 '%'>, <D5 'Headcount'>, <F5 'Headcount'>, <C5 '%'>, <E5 '%'>, <H5 'Headcount'>}

In [1347]:
Dimensions = [
            HDim(TreatmentType,'Treatment Type',DIRECTLY,LEFT),
            HDim(Service,'Category',CLOSEST,ABOVE),
            HDim(month,'month',CLOSEST,LEFT),
            HDim(year,'Year',CLOSEST,LEFT),
            HDim(mt,'Measure Type',DIRECTLY,ABOVE),
            HDimConst('Unit','People'),
            HDimConst('Age','All'),
            HDimConst('Sex','Persons')
            ]

In [1348]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# savepreviewhtml(c1)

In [1349]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Treatment Type,Category,month,Year,Measure Type,Unit,Age,Sex
0,5583.0,,Total,1st March,2007.0,Headcount,People,All,Persons
1,100.0,,Total,1st March,2007.0,%,People,All,Persons
2,5846.0,,Total,1st March,2010.0,Headcount,People,All,Persons
3,100.0,,Total,1st March,2010.0,%,People,All,Persons
4,5916.0,,Total,1st March,2012.0,Headcount,People,All,Persons
5,100.0,,Total,1st March,2012.0,%,People,All,Persons
6,8553.0,,Total,1st September,2014.0,Headcount,People,All,Persons
7,100.0,,Total,1st September,2014.0,%,People,All,Persons
8,5969.0,,Total,1st March,2017.0,Headcount,People,All,Persons
9,100.0,,Total,1st March,2017.0,%,People,All,Persons


In [1350]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [1351]:
new_table.dtypes

Value             float64
Treatment Type     object
Category           object
month              object
Year               object
Measure Type       object
Unit               object
Age                object
Sex                object
dtype: object

In [1352]:
new_table.dtypes

Value             float64
Treatment Type     object
Category           object
month              object
Year               object
Measure Type       object
Unit               object
Age                object
Sex                object
dtype: object

In [1353]:
new_table['Year'] = pd.to_numeric(new_table['Year'], errors='coerce').fillna(0)

In [1354]:
new_table['Year'] = new_table['Year'].astype(int)

In [1355]:
new_table['Year'] = new_table['Year'].astype(str)

In [1356]:
new_table['Period'] = new_table['month'] + new_table['Year']

In [1357]:
new_table['Measure Type'] = new_table['Measure Type'].map(
    lambda x: {
        '%' : 'Percentage of Headcount'
        }.get(x, x))


In [1358]:
new_table.tail(5)

Unnamed: 0,Value,Treatment Type,Category,month,Year,Measure Type,Unit,Age,Sex,Period
149,3.1,Emergency admissions (HIS),Trust,1st March,2010,Percentage of Headcount,People,All,Persons,1st March 2010
150,172.0,Emergency admissions (HIS),Trust,1st March,2012,Headcount,People,All,Persons,1st March 2012
151,2.9,Emergency admissions (HIS),Trust,1st March,2012,Percentage of Headcount,People,All,Persons,1st March 2012
152,129.0,Emergency admissions (HIS),Trust,1st September,2014,Headcount,People,All,Persons,1st September2014
153,1.5,Emergency admissions (HIS),Trust,1st September,2014,Percentage of Headcount,People,All,Persons,1st September2014


In [1359]:
new_table.count()

Value             154
Treatment Type    144
Category          154
month             154
Year              154
Measure Type      154
Unit              154
Age               154
Sex               154
Period            154
dtype: int64

In [1360]:
new_table = new_table[new_table['Value'] !=  0 ]

In [1361]:
new_table.count()

Value             154
Treatment Type    144
Category          154
month             154
Year              154
Measure Type      154
Unit              154
Age               154
Sex               154
Period            154
dtype: int64

In [1362]:
new_table['Treatment Type'].unique()
new_table['Treatment Type'] = new_table['Treatment Type'].str.strip() #Get rid of extra spce at end of Under 18

In [1363]:
new_table['Treatment Type'].fillna('All', inplace = True)

In [1364]:
new_table

Unnamed: 0,Value,Treatment Type,Category,month,Year,Measure Type,Unit,Age,Sex,Period
0,5583.0,All,Total,1st March,2007,Headcount,People,All,Persons,1st March 2007
1,100.0,All,Total,1st March,2007,Percentage of Headcount,People,All,Persons,1st March 2007
2,5846.0,All,Total,1st March,2010,Headcount,People,All,Persons,1st March 2010
3,100.0,All,Total,1st March,2010,Percentage of Headcount,People,All,Persons,1st March 2010
4,5916.0,All,Total,1st March,2012,Headcount,People,All,Persons,1st March 2012
5,100.0,All,Total,1st March,2012,Percentage of Headcount,People,All,Persons,1st March 2012
6,8553.0,All,Total,1st September,2014,Headcount,People,All,Persons,1st September2014
7,100.0,All,Total,1st September,2014,Percentage of Headcount,People,All,Persons,1st September2014
8,5969.0,All,Total,1st March,2017,Headcount,People,All,Persons,1st March 2017
9,100.0,All,Total,1st March,2017,Percentage of Headcount,People,All,Persons,1st March 2017


In [1365]:
new_table['Service Type'] = 'All'
new_table['Residential Status'] = 'All'
new_table['Health and Social Care Trust']  = 'All'

# Get everything in the right columns
new_table['Age'] = numpy.where(new_table['Category'] == 'Age', new_table['Treatment Type'], 'All')
new_table['Sex'] = numpy.where(new_table['Category'] == 'Gender', new_table['Treatment Type'], 'All')
new_table['Service type'] = numpy.where(new_table['Category'] == 'Service type', new_table['Treatment Type'], 'All')
new_table['Residential Status'] = numpy.where(new_table['Category'] == 'Residential Status', new_table['Treatment Type'], 'All')
new_table['Trust'] = numpy.where(new_table['Category'] == 'Trust', new_table['Treatment Type'], 'All')

new_table['Treatment Type'] = numpy.where(new_table['Category'] == 'Treatment type', new_table['Treatment Type'], 'All')

# Merged cell over 5 rows for 2007, equals total for a few trusts rather than just one. Need to change Trust
new_table['Trust'] = numpy.where((new_table['Category'] == 'Trust') & 
                (new_table['Period'] == '1st March 2007') &
                (new_table['Trust'] == 'Belfast'), 
                'Belfast + Northern + South Eastern + Southern + Western', new_table['Trust'])

#new_table.to_csv('testCompare.csv', index = False)
new_table

Unnamed: 0,Value,Treatment Type,Category,month,Year,Measure Type,Unit,Age,Sex,Period,Service Type,Residential Status,Health and Social Care Trust,Service type,Trust
0,5583.0,All,Total,1st March,2007,Headcount,People,All,All,1st March 2007,All,All,All,All,All
1,100.0,All,Total,1st March,2007,Percentage of Headcount,People,All,All,1st March 2007,All,All,All,All,All
2,5846.0,All,Total,1st March,2010,Headcount,People,All,All,1st March 2010,All,All,All,All,All
3,100.0,All,Total,1st March,2010,Percentage of Headcount,People,All,All,1st March 2010,All,All,All,All,All
4,5916.0,All,Total,1st March,2012,Headcount,People,All,All,1st March 2012,All,All,All,All,All
5,100.0,All,Total,1st March,2012,Percentage of Headcount,People,All,All,1st March 2012,All,All,All,All,All
6,8553.0,All,Total,1st September,2014,Headcount,People,All,All,1st September2014,All,All,All,All,All
7,100.0,All,Total,1st September,2014,Percentage of Headcount,People,All,All,1st September2014,All,All,All,All,All
8,5969.0,All,Total,1st March,2017,Headcount,People,All,All,1st March 2017,All,All,All,All,All
9,100.0,All,Total,1st March,2017,Percentage of Headcount,People,All,All,1st March 2017,All,All,All,All,All


In [1366]:



new_table = new_table[['Period', 'Sex', 'Age', 'Service Type', 'Residential Status', 'Treatment Type', 'Health and Social Care Trust', 'Measure Type', 'Unit', 'Value']]