Table 8.1.1: Treatment contact status at 31st March 2018 by main substance groups for clients commencing treatment since 2005-06

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    inputFile = sourceFolder / 'AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='Table 8.1.1')[0]

Loading in\AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx which has size 272149 bytes
Table names: ['Table 8.1.1']


https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx

In [3]:
observations = tab.excel_ref('B6').expand(DOWN).expand(RIGHT).is_not_blank()
observations

{<P16 11677.0>, <O6 3316.0>, <D19 11755.0>, <E19 14071.0>, <E27 32948.0>, <L26 4274.0>, <G14 8011.0>, <Q11 0.03843066776011586>, <O22 4722.0>, <P8 77000.0>, <M12 3370.0>, <H29 65532.0>, <D16 529.0>, <D8 8555.0>, <O7 1279.0>, <J11 73.0>, <F29 82441.0>, <E17 6149.0>, <H16 661.0>, <L29 53119.0>, <I7 4339.0>, <H17 4904.0>, <M11 204.0>, <F21 896.0>, <N11 402.0>, <O19 8311.0>, <M13 5620.0>, <H28 34913.0>, <F24 34927.0>, <I18 6676.0>, <E12 4508.0>, <J9 9070.0>, <B14 4047.0>, <I24 29081.0>, <Q18 0.517062286000077>, <B28 25764.0>, <G18 7755.0>, <F26 8273.0>, <C23 7241.0>, <D13 3502.0>, <Q12 0.4066409589782066>, <D6 10463.0>, <B26 34483.0>, <F17 6827.0>, <O8 680.0>, <P26 140070.0>, <H21 819.0>, <I28 32970.0>, <C24 16274.0>, <D9 33577.0>, <N9 6355.0>, <E22 10833.0>, <B13 1588.0>, <G22 14205.0>, <M24 25572.0>, <K24 29481.0>, <O26 18314.0>, <G23 18119.0>, <O24 21579.0>, <Q9 1.0>, <K26 4525.0>, <C21 327.0>, <H9 13620.0>, <J18 6156.0>, <F22 15637.0>, <L11 129.0>, <D7 14559.0>, <C8 11868.0>, <F7 9808.

In [4]:
clients = tab.excel_ref('A').expand(DOWN).by_index([5,10,15,20,25])
clients

{<A15 'Non-opiate and alcohol clients'>, <A5 'Opiate clients'>, <A25 'Total clients'>, <A10 'Non-opiate only clients'>, <A20 'Alcohol only clients'>}

In [5]:
treatmentstatus = tab.excel_ref('A6').expand(DOWN).is_not_blank() - clients
treatmentstatus

{<A28 'Subtotal treatment complete'>, <A13 'Subtotal treatment complete'>, <A8 'Subtotal treatment complete'>, <A16 'Retained at 31st March 2017'>, <A27 'Subtotal exited (treatment incomplete)'>, <A17 'Subtotal exited (treatment incomplete)'>, <A9 'Total clients in treatment since 1st April 2005'>, <A26 'Retained at 31st March 2017'>, <A14 'Total clients in treatment since 1st April 2005'>, <A22 'Subtotal exited (treatment incomplete)'>, <A21 'Retained at 31st March 2017'>, <A11 'Retained at 31st March 2017'>, <A7 'Subtotal exited (treatment incomplete)'>, <A12 'Subtotal exited (treatment incomplete)'>, <A19 'Total clients in treatment since 1st April 2005'>, <A18 'Subtotal treatment complete'>, <A24 'Total clients in treatment since 1st April 2005'>, <A6 'Retained at 31st March 2017'>, <A23 'Subtotal treatment complete'>, <A29 'Total clients in treatment since 1st April 2005'>}

In [6]:
period = tab.excel_ref('B4').expand(RIGHT).is_not_blank() 
period

{<F4 '2008-09'>, <E4 '2007-08'>, <I4 '2011-12'>, <G4 '2009-10'>, <C4 '2005-06'>, <M4 '2015-16'>, <P4 'Total'>, <B4 'Prior to 2005-06'>, <Q4 '%'>, <L4 '2014-15'>, <H4 '2010-11'>, <K4 '2013-14'>, <D4 '2006-07'>, <O4 '2017-18'>, <J4 '2012-13'>, <N4 '2016-17'>}

In [7]:
Dimensions = [
            HDim(clients,'Clients in treatment',CLOSEST,ABOVE),
            HDimConst('Measure Type','Count'),
            HDim(treatmentstatus, 'Treatment Status',DIRECTLY,LEFT),
            HDim(period, 'Period',DIRECTLY,ABOVE),
            HDimConst('Unit','People')            
            ]

In [8]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [9]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Clients in treatment,Measure Type,Treatment Status,Period,Unit
0,33985.000000,Opiate clients,Count,Retained at 31st March 2017,Prior to 2005-06,People
1,17654.000000,Opiate clients,Count,Retained at 31st March 2017,2005-06,People
2,10463.000000,Opiate clients,Count,Retained at 31st March 2017,2006-07,People
3,7824.000000,Opiate clients,Count,Retained at 31st March 2017,2007-08,People
4,6497.000000,Opiate clients,Count,Retained at 31st March 2017,2008-09,People
5,4875.000000,Opiate clients,Count,Retained at 31st March 2017,2009-10,People
6,3660.000000,Opiate clients,Count,Retained at 31st March 2017,2010-11,People
7,2862.000000,Opiate clients,Count,Retained at 31st March 2017,2011-12,People
8,2657.000000,Opiate clients,Count,Retained at 31st March 2017,2012-13,People
9,2627.000000,Opiate clients,Count,Retained at 31st March 2017,2013-14,People


In [10]:
new_table = new_table[new_table['OBS'] != 0 ]

In [11]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [12]:
new_table.head()

Unnamed: 0,Value,Clients in treatment,Measure Type,Treatment Status,Period,Unit
0,33985.0,Opiate clients,Count,Retained at 31st March 2017,Prior to 2005-06,People
1,17654.0,Opiate clients,Count,Retained at 31st March 2017,2005-06,People
2,10463.0,Opiate clients,Count,Retained at 31st March 2017,2006-07,People
3,7824.0,Opiate clients,Count,Retained at 31st March 2017,2007-08,People
4,6497.0,Opiate clients,Count,Retained at 31st March 2017,2008-09,People


In [13]:
new_table['Measure Type'].unique()

array(['Count'], dtype=object)

In [14]:
new_table.tail()

Unnamed: 0,Value,Clients in treatment,Measure Type,Treatment Status,Period,Unit
315,51293.0,Total clients,Count,Total clients in treatment since 1st April 2005,2015-16,People
316,46614.0,Total clients,Count,Total clients in treatment since 1st April 2005,2016-17,People
317,43720.0,Total clients,Count,Total clients in treatment since 1st April 2005,2017-18,People
318,919823.0,Total clients,Count,Total clients in treatment since 1st April 2005,Total,People
319,1.0,Total clients,Count,Total clients in treatment since 1st April 2005,%,People


In [15]:
new_table.dtypes

Value                   float64
Clients in treatment     object
Measure Type             object
Treatment Status         object
Period                   object
Unit                     object
dtype: object

In [16]:
new_table['Value'] = new_table['Value'].astype(str)

In [17]:
new_table.head(3)

Unnamed: 0,Value,Clients in treatment,Measure Type,Treatment Status,Period,Unit
0,33985.0,Opiate clients,Count,Retained at 31st March 2017,Prior to 2005-06,People
1,17654.0,Opiate clients,Count,Retained at 31st March 2017,2005-06,People
2,10463.0,Opiate clients,Count,Retained at 31st March 2017,2006-07,People


In [18]:
def user_perc(x,y):
    
    if str(x) == '%':
        return 'Percentage'
    else:
        return y
    
new_table['Measure Type'] = new_table.apply(lambda row: user_perc(row['Period'],row['Measure Type']), axis = 1)


In [19]:
new_table['Period'] = new_table['Period'].map(
    lambda x: {
        'Total' : 'All years',
        '%'     : 'All years'
        }.get(x, x))

In [20]:
new_table.tail()

Unnamed: 0,Value,Clients in treatment,Measure Type,Treatment Status,Period,Unit
315,51293.0,Total clients,Count,Total clients in treatment since 1st April 2005,2015-16,People
316,46614.0,Total clients,Count,Total clients in treatment since 1st April 2005,2016-17,People
317,43720.0,Total clients,Count,Total clients in treatment since 1st April 2005,2017-18,People
318,919823.0,Total clients,Count,Total clients in treatment since 1st April 2005,All years,People
319,1.0,Total clients,Percentage,Total clients in treatment since 1st April 2005,All years,People


In [21]:
new_table = new_table[['Period','Treatment Status','Clients in treatment','Measure Type','Value','Unit']]

In [22]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('table8.1.1.csv'), index = False)