Table 3.1.1: Waiting times, first and subsequent interventions 2016-17

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://www.gov.uk/government/uploads/system/uploads/attachment_data/file/664944/'\
                    'Young-people-statistics-data-tables-from-the-national-drug-treatment-monitoring-system-2016-2017.xls'
    inputFile = sourceFolder / 'Young-people-statistics-data-tables-from-the-national-drug-treatment-monitoring-system-2016-2017.xls'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='3.1.1 Waiting Times')[0]

Loading in\Young-people-statistics-data-tables-from-the-national-drug-treatment-monitoring-system-2016-2017.xls which has size 281600 bytes
Table names: ['3.1.1 Waiting Times']


In [3]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()
observations

{<D6 25.0>, <G7 1.0>, <D5 395.0>, <D7 420.0>, <B7 17721.0>, <B6 671.0>, <B5 17050.0>, <F5 17445.0>, <G5 1.0>, <C6 0.96>, <E7 0.02>, <G6 1.0>, <C5 0.98>, <F7 18141.0>, <F6 696.0>, <E5 0.02>, <E6 0.04>, <C7 0.98>}

In [4]:
intervention = tab.excel_ref('A5').expand(DOWN).is_not_blank() 
intervention

{<A5 'First Intervention'>, <A6 'Subsequent Intervention'>, <A7 'Total Interventions'>}

In [5]:
measuretype = tab.excel_ref('B4').expand(RIGHT).is_not_blank() 
measuretype

{<C4 '%'>, <E4 '%'>, <B4 'n'>, <G4 '%'>, <F4 'n'>, <D4 'n'>}

In [6]:
wt = tab.excel_ref('B3').expand(RIGHT).is_not_blank() 
wt

{<B3 '3 weeks or under'>, <F3 'Total'>, <D3 'Over 3 weeks'>}

In [7]:
Dimensions = [
            HDim(intervention,'Clients in treatment',DIRECTLY,LEFT),
            HDim(wt,'Basis of treatment',CLOSEST, LEFT),
            HDim(measuretype,'Measure Type',DIRECTLY,ABOVE),
            HDimConst('Unit','People')            
            ]

In [8]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [9]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Clients in treatment,Basis of treatment,Measure Type,Unit
0,17050.0,First Intervention,3 weeks or under,n,People
1,0.98,First Intervention,3 weeks or under,%,People
2,395.0,First Intervention,Over 3 weeks,n,People
3,0.02,First Intervention,Over 3 weeks,%,People
4,17445.0,First Intervention,Total,n,People
5,1.0,First Intervention,Total,%,People
6,671.0,Subsequent Intervention,3 weeks or under,n,People
7,0.96,Subsequent Intervention,3 weeks or under,%,People
8,25.0,Subsequent Intervention,Over 3 weeks,n,People
9,0.04,Subsequent Intervention,Over 3 weeks,%,People


In [10]:
new_table = new_table[new_table['OBS'] != 0 ]

In [11]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [12]:
new_table.head()

Unnamed: 0,Value,Clients in treatment,Basis of treatment,Measure Type,Unit
0,17050.0,First Intervention,3 weeks or under,n,People
1,0.98,First Intervention,3 weeks or under,%,People
2,395.0,First Intervention,Over 3 weeks,n,People
3,0.02,First Intervention,Over 3 weeks,%,People
4,17445.0,First Intervention,Total,n,People


In [13]:
new_table['Measure Type'] = new_table['Measure Type'].map(
    lambda x: {
        'n' : 'Count', 
        '%' : 'Percentage',
        }.get(x, x))

In [14]:
new_table.tail()

Unnamed: 0,Value,Clients in treatment,Basis of treatment,Measure Type,Unit
13,0.98,Total Interventions,3 weeks or under,Percentage,People
14,420.0,Total Interventions,Over 3 weeks,Count,People
15,0.02,Total Interventions,Over 3 weeks,Percentage,People
16,18141.0,Total Interventions,Total,Count,People
17,1.0,Total Interventions,Total,Percentage,People


In [15]:
new_table.dtypes

Value                   float64
Clients in treatment     object
Basis of treatment       object
Measure Type             object
Unit                     object
dtype: object

In [16]:
new_table['Value'] = new_table['Value'].astype(str)

In [17]:
new_table.head(3)

Unnamed: 0,Value,Clients in treatment,Basis of treatment,Measure Type,Unit
0,17050.0,First Intervention,3 weeks or under,Count,People
1,0.98,First Intervention,3 weeks or under,Percentage,People
2,395.0,First Intervention,Over 3 weeks,Count,People


In [18]:
new_table['Basis of treatment'] = new_table['Basis of treatment'].map(
    lambda x: {
        'Total' : 'All weeks'
        }.get(x, x))

In [19]:
new_table['Basis of treatment'] =  'Waiting times/' + new_table['Basis of treatment'] 

In [20]:
new_table['Period'] = '2016-17'
new_table['Substance'] = 'All'
new_table = new_table[['Period','Basis of treatment','Substance','Clients in treatment','Measure Type','Value','Unit']]

In [21]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('table3.1.1.csv'), index = False)

In [22]:
new_table.tail()

Unnamed: 0,Period,Basis of treatment,Substance,Clients in treatment,Measure Type,Value,Unit
13,2016-17,Waiting times/3 weeks or under,All,Total Interventions,Percentage,0.98,People
14,2016-17,Waiting times/Over 3 weeks,All,Total Interventions,Count,420.0,People
15,2016-17,Waiting times/Over 3 weeks,All,Total Interventions,Percentage,0.02,People
16,2016-17,Waiting times/All weeks,All,Total Interventions,Count,18141.0,People
17,2016-17,Waiting times/All weeks,All,Total Interventions,Percentage,1.0,People
