Table 5.1.1: Waiting times, first and subsequent interventions

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    inputFile = sourceFolder / 'AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='Table 5.1.1')[0]

Loading in\AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx which has size 272149 bytes
Table names: ['Table 5.1.1']


https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx

In [3]:
observations = tab.excel_ref('C6').expand(DOWN).expand(RIGHT).is_not_blank()
observations

{<H8 4878.0>, <G6 1.71>, <D7 279.0>, <G9 2.61>, <I6 0.970586141855582>, <E10 0.0169169835443288>, <H10 76200.0>, <E6 0.0123562475772064>, <J9 1191.0>, <C6 0.987643752422794>, <K10 0.0432424288081965>, <G10 2.17>, <C8 0.981071428571429>, <H7 1461.0>, <H6 54545.0>, <C7 0.983943370165746>, <K8 0.101822868716627>, <J6 1653.0>, <J10 3444.0>, <K6 0.029413858144418>, <D10 2567.0>, <D9 1152.0>, <E9 0.0217963029534747>, <K7 0.0311671087533156>, <D8 371.0>, <K9 0.072151208578179>, <E7 0.0160566298342541>, <C10 0.983083016455671>, <I10 0.956757571191803>, <G8 2.45>, <C9 0.978203697046525>, <I9 0.927848791421821>, <E8 0.0189285714285714>, <G7 2.13>, <I8 0.898177131283373>, <I7 0.968832891246684>, <H9 15316.0>, <J7 47.0>, <J8 553.0>, <D6 765.0>}

In [4]:
Source = tab.excel_ref('A6').expand(DOWN).is_not_blank()
Source

{<A6 'Opiate'>, <A9 'Alcohol only'>, <A7 'Non-opiate only'>, <A10 'Total'>, <A8 'Non-opiate and Alcohol'>}

In [5]:
intervention = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
intervention

{<B3 'First intervention'>, <H3 'Subsequent intervention'>}

In [6]:
Waitingtimes = tab.excel_ref('B4').expand(RIGHT).is_not_blank()
Waitingtimes

{<F4 'Average waiting time'>, <J4 'Over 3 weeks'>, <D4 'Over 3 weeks'>, <H4 '3 weeks or under'>, <B4 '3 weeks or under'>}

In [7]:
MeasureType = tab.excel_ref('B5').expand(RIGHT).is_not_blank()
MeasureType

{<B5 'n'>, <I5 '%'>, <C5 '%'>, <H5 'n'>, <K5 '%'>, <E5 '%'>, <G5 'days'>, <D5 'n'>, <J5 'n'>}

In [8]:
Dimensions = [
            HDim(Source,'Substance',DIRECTLY,LEFT),
            HDim(intervention,'Intervention',CLOSEST,LEFT),
            HDim(Waitingtimes,'Waiting time',CLOSEST,LEFT),
            HDim(MeasureType,'Measure Type',DIRECTLY,ABOVE),
            HDimConst('Unit','People')            
            ]

In [9]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [10]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Substance,Intervention,Waiting time,Measure Type,Unit
0,0.987644,Opiate,First intervention,3 weeks or under,%,People
1,765.0,Opiate,First intervention,Over 3 weeks,n,People
2,0.012356,Opiate,First intervention,Over 3 weeks,%,People
3,1.71,Opiate,First intervention,Average waiting time,days,People
4,54545.0,Opiate,Subsequent intervention,3 weeks or under,n,People
5,0.970586,Opiate,Subsequent intervention,3 weeks or under,%,People
6,1653.0,Opiate,Subsequent intervention,Over 3 weeks,n,People
7,0.029414,Opiate,Subsequent intervention,Over 3 weeks,%,People
8,0.983943,Non-opiate only,First intervention,3 weeks or under,%,People
9,279.0,Non-opiate only,First intervention,Over 3 weeks,n,People


In [11]:
new_table = new_table[new_table['OBS'] != 0 ]

In [12]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [13]:
new_table.head()

Unnamed: 0,Value,Substance,Intervention,Waiting time,Measure Type,Unit
0,0.987644,Opiate,First intervention,3 weeks or under,%,People
1,765.0,Opiate,First intervention,Over 3 weeks,n,People
2,0.012356,Opiate,First intervention,Over 3 weeks,%,People
3,1.71,Opiate,First intervention,Average waiting time,days,People
4,54545.0,Opiate,Subsequent intervention,3 weeks or under,n,People


In [14]:
new_table['Measure Type'] = new_table['Measure Type'].map(
    lambda x: {
        'n' : 'Count', 
        '%' : 'Percentage',
        }.get(x, x))

In [15]:
new_table.tail()

Unnamed: 0,Value,Substance,Intervention,Waiting time,Measure Type,Unit
35,2.17,Total,First intervention,Average waiting time,days,People
36,76200.0,Total,Subsequent intervention,3 weeks or under,Count,People
37,0.956758,Total,Subsequent intervention,3 weeks or under,Percentage,People
38,3444.0,Total,Subsequent intervention,Over 3 weeks,Count,People
39,0.043242,Total,Subsequent intervention,Over 3 weeks,Percentage,People


In [16]:
new_table.dtypes

Value           float64
Substance        object
Intervention     object
Waiting time     object
Measure Type     object
Unit             object
dtype: object

In [17]:
new_table['Value'] = new_table['Value'].astype(str)

In [18]:
new_table.head(3)

Unnamed: 0,Value,Substance,Intervention,Waiting time,Measure Type,Unit
0,0.987643752422794,Opiate,First intervention,3 weeks or under,Percentage,People
1,765.0,Opiate,First intervention,Over 3 weeks,Count,People
2,0.0123562475772064,Opiate,First intervention,Over 3 weeks,Percentage,People


In [19]:
new_table['Substance'] = new_table['Substance'].map(
    lambda x: {
        'Total' : 'All' 
        }.get(x, x))

In [20]:
new_table.head(3)

Unnamed: 0,Value,Substance,Intervention,Waiting time,Measure Type,Unit
0,0.987643752422794,Opiate,First intervention,3 weeks or under,Percentage,People
1,765.0,Opiate,First intervention,Over 3 weeks,Count,People
2,0.0123562475772064,Opiate,First intervention,Over 3 weeks,Percentage,People


In [21]:
new_table = new_table[['Substance','Intervention','Waiting time','Measure Type','Value','Unit']]

In [22]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('table5.1.1.csv'), index = False)