Table 7.5.1: Trends in waiting times of three weeks and under for first intervention

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    inputFile = sourceFolder / 'AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='Table 7.5.1')[0]

Loading in\AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx which has size 272149 bytes
Table names: ['Table 7.5.1']


https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx

In [3]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()
observations

{<E17 0.983943370165746>, <J8 128927.0>, <K9 0.878110118251378>, <F5 3300.0>, <H14 60593.0>, <F11 17545.0>, <B16 63548.0>, <B7 55438.0>, <G7 0.859297565179918>, <H15 57886.0>, <D14 17599.0>, <J12 144473.0>, <E14 0.978211327886165>, <G10 0.900255328596803>, <C8 0.930032879871597>, <G9 0.881269134428055>, <K15 0.971314976065088>, <B5 25058.0>, <F16 19399.0>, <J10 131973.0>, <J17 149174.0>, <J13 164038.0>, <C16 0.988320191604846>, <D5 5309.0>, <F14 18648.0>, <B10 53848.0>, <E6 0.883174510802357>, <B8 59683.0>, <C17 0.987643752422794>, <H7 25076.0>, <I7 0.771237005597589>, <J11 133341.0>, <H6 14761.0>, <E10 0.959568733153639>, <E9 0.946819210460146>, <B15 62784.0>, <B12 54812.0>, <B14 64152.0>, <C5 0.868892818752384>, <D7 14788.0>, <D17 17097.0>, <G17 0.981071428571429>, <K6 0.842393468737555>, <C14 0.979958450446047>, <F8 15828.0>, <K5 0.840749559995859>, <G6 0.830919363122753>, <H16 53745.0>, <G11 0.921190801218104>, <B9 57911.0>, <I11 0.849943600867679>, <D15 18328.0>, <H17 51701.0>, <I

In [4]:
clients = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
clients

{<J3 'Total'>, <D3 'Non-opiate only'>, <F3 'Non-opiate and Alcohol'>, <H3 'Alcohol only'>, <B3 'Opiate'>}

In [5]:
MeasureType = tab.excel_ref('B4').expand(RIGHT).is_not_blank()
MeasureType

{<H4 'n'>, <I4 '%'>, <E4 '%'>, <B4 'n'>, <K4 '%'>, <G4 '%'>, <D4 'n'>, <J4 'n'>, <C4 '%'>, <F4 'n'>}

In [6]:
period = tab.excel_ref('A5').expand(DOWN).is_not_blank() - tab.excel_ref('A228').expand(DOWN)
period

{<A12 '2012-13'>, <A7 '2007-08'>, <A6 '2006-07'>, <A15 '2015-16'>, <A13 '2013-14'>, <A17 '2017-18'>, <A5 '2005-06'>, <A10 '2010-11'>, <A11 '2011-12'>, <A9 '2009-10'>, <A14 '2014-15'>, <A8 '2008-09'>, <A16 '2016-17'>}

In [7]:
Dimensions = [
            HDim(clients,'Clients in treatment',CLOSEST,LEFT),
            HDimConst('Category','Waiting time of three weeks or under for first intervention'),
            HDim(MeasureType,'Measure Type',DIRECTLY,ABOVE),
            HDim(period, 'Period',DIRECTLY,LEFT),
            HDimConst('Unit','People')            
            ]

In [8]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [9]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Clients in treatment,Category,Measure Type,Period,Unit
0,25058.000000,Opiate,Waiting time of three weeks or under for first...,n,2005-06,People
1,0.868893,Opiate,Waiting time of three weeks or under for first...,%,2005-06,People
2,5309.000000,Non-opiate only,Waiting time of three weeks or under for first...,n,2005-06,People
3,0.879410,Non-opiate only,Waiting time of three weeks or under for first...,%,2005-06,People
4,3300.000000,Non-opiate and Alcohol,Waiting time of three weeks or under for first...,n,2005-06,People
5,0.836926,Non-opiate and Alcohol,Waiting time of three weeks or under for first...,%,2005-06,People
6,6937.000000,Alcohol only,Waiting time of three weeks or under for first...,n,2005-06,People
7,0.732060,Alcohol only,Waiting time of three weeks or under for first...,%,2005-06,People
8,40604.000000,Total,Waiting time of three weeks or under for first...,n,2005-06,People
9,0.840750,Total,Waiting time of three weeks or under for first...,%,2005-06,People


In [10]:
new_table = new_table[new_table['OBS'] != 0 ]

In [11]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [12]:
new_table.head()

Unnamed: 0,Value,Clients in treatment,Category,Measure Type,Period,Unit
0,25058.0,Opiate,Waiting time of three weeks or under for first...,n,2005-06,People
1,0.868893,Opiate,Waiting time of three weeks or under for first...,%,2005-06,People
2,5309.0,Non-opiate only,Waiting time of three weeks or under for first...,n,2005-06,People
3,0.87941,Non-opiate only,Waiting time of three weeks or under for first...,%,2005-06,People
4,3300.0,Non-opiate and Alcohol,Waiting time of three weeks or under for first...,n,2005-06,People


In [13]:
new_table['Measure Type'].unique()

array(['n', '%'], dtype=object)

In [14]:
new_table['Measure Type'] = new_table['Measure Type'].map(
    lambda x: {
        'n' : 'Count', 
        '%' : 'Percentage',
        None: 'Count'
        }.get(x, x))

In [15]:
new_table.tail()

Unnamed: 0,Value,Clients in treatment,Category,Measure Type,Period,Unit
125,0.981071,Non-opiate and Alcohol,Waiting time of three weeks or under for first...,Percentage,2017-18,People
126,51701.0,Alcohol only,Waiting time of three weeks or under for first...,Count,2017-18,People
127,0.978204,Alcohol only,Waiting time of three weeks or under for first...,Percentage,2017-18,People
128,149174.0,Total,Waiting time of three weeks or under for first...,Count,2017-18,People
129,0.983083,Total,Waiting time of three weeks or under for first...,Percentage,2017-18,People


In [16]:
new_table.dtypes

Value                   float64
Clients in treatment     object
Category                 object
Measure Type             object
Period                   object
Unit                     object
dtype: object

In [17]:
new_table['Value'] = new_table['Value'].astype(str)

In [18]:
new_table.head(3)

Unnamed: 0,Value,Clients in treatment,Category,Measure Type,Period,Unit
0,25058.0,Opiate,Waiting time of three weeks or under for first...,Count,2005-06,People
1,0.868892818752384,Opiate,Waiting time of three weeks or under for first...,Percentage,2005-06,People
2,5309.0,Non-opiate only,Waiting time of three weeks or under for first...,Count,2005-06,People


In [19]:
new_table['Clients in treatment'] = new_table['Clients in treatment'].map(
    lambda x: {
        'Total' : 'All clients' 
        }.get(x, x))

In [20]:
new_table.tail()

Unnamed: 0,Value,Clients in treatment,Category,Measure Type,Period,Unit
125,0.981071428571429,Non-opiate and Alcohol,Waiting time of three weeks or under for first...,Percentage,2017-18,People
126,51701.0,Alcohol only,Waiting time of three weeks or under for first...,Count,2017-18,People
127,0.978203697046525,Alcohol only,Waiting time of three weeks or under for first...,Percentage,2017-18,People
128,149174.0,All clients,Waiting time of three weeks or under for first...,Count,2017-18,People
129,0.983083016455671,All clients,Waiting time of three weeks or under for first...,Percentage,2017-18,People


In [21]:
new_table = new_table[['Period','Category','Clients in treatment','Measure Type','Value','Unit']]

In [22]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('table7.5.1.csv'), index = False)