Appendix B: Thirteen-year treatment population first presentation and treatment contact status at 31 March 2018

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    inputFile = sourceFolder / 'AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='Appendix B')[0]

Loading in\AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx which has size 272149 bytes
Table names: ['Appendix B']


https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx

In [3]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()
observations = observations - tab.excel_ref('B').expand(DOWN).expand(RIGHT).by_index([12,20,28,36])
observations

{<K23 2903.0>, <E8 32149.0>, <O30 3220.0>, <C30 543.0>, <C39 8706.0>, <J41 28643.0>, <O33 8311.0>, <K9 58212.0>, <P6 140070.0>, <P21 227.0>, <H25 7688.0>, <D41 17211.0>, <D22 44.0>, <H31 4904.0>, <C23 5428.0>, <D12 '2006-07'>, <G16 5269.0>, <H15 5671.0>, <G17 17841.0>, <Q22 0.03843066776011586>, <M28 '2015-16'>, <I20 '2011-12'>, <H37 241.0>, <D15 14559.0>, <P8 415722.0>, <E38 586.0>, <E40 12955.0>, <L33 9772.0>, <K16 2493.0>, <H38 819.0>, <M8 28106.0>, <H7 25422.0>, <F24 4610.0>, <D29 104.0>, <E29 133.0>, <B9 96888.0>, <H36 '2010-11'>, <O28 '2017-18'>, <N21 16.0>, <M16 1893.0>, <B14 33985.0>, <M5 765.0>, <B31 2522.0>, <E36 '2007-08'>, <L32 5399.0>, <Q12 '%'>, <G33 14361.0>, <P32 80640.0>, <P33 155958.0>, <H8 34913.0>, <N31 3502.0>, <K36 '2013-14'>, <I28 '2011-12'>, <L13 394.0>, <O9 43720.0>, <H32 7075.0>, <C20 '2005-06'>, <E6 9149.0>, <Q23 0.4066409589782066>, <J8 32056.0>, <I15 4339.0>, <J25 8513.0>, <D25 8330.0>, <N7 16250.0>, <H17 13620.0>, <O20 '2017-18'>, <J28 '2012-13'>, <E25 906

In [4]:
clients = tab.excel_ref('A').expand(DOWN).by_index([3,11,19,27,35])
clients

{<A3 'All substance groups'>, <A35 'Alcohol only clients'>, <A27 'Non-opiate and alcohol clients'>, <A11 'Opiate clients'>, <A19 'Non-opiate only clients'>}

In [5]:
treatmentstatus = tab.excel_ref('A5').expand(DOWN).is_not_blank() - clients
treatmentstatus

{<A24 'Subtotal treatment complete'>, <A38 'Retained at 31st March 2018'>, <A5 'Three journeys since first presentation'>, <A28 'Category'>, <A14 'Retained at 31st March 2018'>, <A17 'Total clients in treatment since 1st April 2005'>, <A22 'Retained at 31st March 2018'>, <A41 'Total clients in treatment since 1st April 2005'>, <A31 'Subtotal exited (treatment incomplete)'>, <A7 'Subtotal exited (treatment incomplete)'>, <A37 'Three journeys since first presentation'>, <A32 'Subtotal treatment complete'>, <A29 'Three journeys since first presentation'>, <A36 'Category'>, <A9 'Total clients in treatment since 1st April 2005'>, <A12 'Category'>, <A30 'Retained at 31st March 2018'>, <A33 'Total clients in treatment since 1st April 2005'>, <A39 'Subtotal exited (treatment incomplete)'>, <A15 'Subtotal exited (treatment incomplete)'>, <A8 'Subtotal treatment complete'>, <A21 'Three journeys since first presentation'>, <A13 'Three journeys since first presentation'>, <A20 'Category'>, <A6 'Re

In [6]:
period = tab.excel_ref('B4').expand(RIGHT).is_not_blank() 
period

{<G4 '2009-10'>, <C4 '2005-06'>, <J4 '2012-13'>, <F4 '2008-09'>, <K4 '2013-14'>, <B4 'Prior to 2005-06'>, <O4 '2017-18'>, <H4 '2010-11'>, <I4 '2011-12'>, <L4 '2014-15'>, <P4 'Total'>, <N4 '2016-17'>, <M4 '2015-16'>, <Q4 '%'>, <D4 '2006-07'>, <E4 '2007-08'>}

In [7]:
Dimensions = [
            HDim(clients,'Clients in treatment',CLOSEST,ABOVE),
            HDimConst('Measure Type','Count'),
            HDim(treatmentstatus, 'Treatment Status',DIRECTLY,LEFT),
            HDim(period, 'Period',DIRECTLY,ABOVE),
            HDimConst('Unit','People')            
            ]

In [8]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [9]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,DATAMARKER,Clients in treatment,Measure Type,Treatment Status,Period,Unit
0,5343,,All substance groups,Count,Three journeys since first presentation,Prior to 2005-06,People
1,2896,,All substance groups,Count,Three journeys since first presentation,2005-06,People
2,1986,,All substance groups,Count,Three journeys since first presentation,2006-07,People
3,1649,,All substance groups,Count,Three journeys since first presentation,2007-08,People
4,1661,,All substance groups,Count,Three journeys since first presentation,2008-09,People
5,1432,,All substance groups,Count,Three journeys since first presentation,2009-10,People
6,1217,,All substance groups,Count,Three journeys since first presentation,2010-11,People
7,1053,,All substance groups,Count,Three journeys since first presentation,2011-12,People
8,1065,,All substance groups,Count,Three journeys since first presentation,2012-13,People
9,1100,,All substance groups,Count,Three journeys since first presentation,2013-14,People


In [10]:
new_table = new_table[new_table['OBS'] != 0 ]

In [11]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [12]:
new_table.head()

Unnamed: 0,Value,DATAMARKER,Clients in treatment,Measure Type,Treatment Status,Period,Unit
0,5343,,All substance groups,Count,Three journeys since first presentation,Prior to 2005-06,People
1,2896,,All substance groups,Count,Three journeys since first presentation,2005-06,People
2,1986,,All substance groups,Count,Three journeys since first presentation,2006-07,People
3,1649,,All substance groups,Count,Three journeys since first presentation,2007-08,People
4,1661,,All substance groups,Count,Three journeys since first presentation,2008-09,People


In [13]:
new_table['Measure Type'].unique()

array(['Count'], dtype=object)

In [14]:
new_table.tail()

Unnamed: 0,Value,DATAMARKER,Clients in treatment,Measure Type,Treatment Status,Period,Unit
463,25572,,Alcohol only clients,Count,Total clients in treatment since 1st April 2005,2015-16,People
464,22757,,Alcohol only clients,Count,Total clients in treatment since 1st April 2005,2016-17,People
465,21579,,Alcohol only clients,Count,Total clients in treatment since 1st April 2005,2017-18,People
466,349193,,Alcohol only clients,Count,Total clients in treatment since 1st April 2005,Total,People
467,1,,Alcohol only clients,Count,Total clients in treatment since 1st April 2005,%,People


In [15]:
new_table.dtypes

Value                   object
DATAMARKER              object
Clients in treatment    object
Measure Type            object
Treatment Status        object
Period                  object
Unit                    object
dtype: object

In [16]:
new_table['Value'] = new_table['Value'].astype(str)

In [17]:
new_table.head(3)

Unnamed: 0,Value,DATAMARKER,Clients in treatment,Measure Type,Treatment Status,Period,Unit
0,5343.0,,All substance groups,Count,Three journeys since first presentation,Prior to 2005-06,People
1,2896.0,,All substance groups,Count,Three journeys since first presentation,2005-06,People
2,1986.0,,All substance groups,Count,Three journeys since first presentation,2006-07,People


In [18]:
def user_perc(x,y):
    
    if str(x) == '%':
        return 'Percentage'
    else:
        return y
    
new_table['Measure Type'] = new_table.apply(lambda row: user_perc(row['Period'],row['Measure Type']), axis = 1)


In [19]:
new_table['Period'] = new_table['Period'].map(
    lambda x: {
        'Total' : 'All years',
        '%'     : 'All years'
        }.get(x, x))

In [20]:
new_table.tail()

Unnamed: 0,Value,DATAMARKER,Clients in treatment,Measure Type,Treatment Status,Period,Unit
463,25572.0,,Alcohol only clients,Count,Total clients in treatment since 1st April 2005,2015-16,People
464,22757.0,,Alcohol only clients,Count,Total clients in treatment since 1st April 2005,2016-17,People
465,21579.0,,Alcohol only clients,Count,Total clients in treatment since 1st April 2005,2017-18,People
466,349193.0,,Alcohol only clients,Count,Total clients in treatment since 1st April 2005,All years,People
467,1.0,,Alcohol only clients,Percentage,Total clients in treatment since 1st April 2005,All years,People


In [21]:
new_table = new_table[['Period','Treatment Status','Clients in treatment','Measure Type','Value','Unit']]

In [22]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('Appendix B.csv'), index = False)