Table 5.4.1: Trends in treatment exit reasons (2005-06 to 2016-17)

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://www.gov.uk/government/uploads/system/uploads/attachment_data/file/664944/'\
                    'Young-people-statistics-data-tables-from-the-national-drug-treatment-monitoring-system-2016-2017.xls'
    inputFile = sourceFolder / 'Young-people-statistics-data-tables-from-the-national-drug-treatment-monitoring-system-2016-2017.xls'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='5.4.1 Trends of Service Exits')[0]

Loading in\Young-people-statistics-data-tables-from-the-national-drug-treatment-monitoring-system-2016-2017.xls which has size 281600 bytes
Table names: ['5.4.1 Trends of Service Exits']


In [3]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()
observations

{<J8 183.0>, <E6 0.06090356211989574>, <N10 0.0>, <M10 0.001142367556761388>, <L11 260.0>, <L8 139.0>, <B12 '8,615*'>, <L5 10507.0>, <T5 9613.0>, <W6 0.0643264433357092>, <T6 773.0>, <E12 1.0>, <C9 0.0003482298316889147>, <X8 43.0>, <S12 1.0>, <P10 0.0>, <M6 0.05661859203198629>, <B8 200.0>, <X12 10834.0>, <B7 2525.0>, <I5 0.6528518670496513>, <I11 0.08548762139242237>, <H6 510.0>, <B10 102.0>, <D5 5726.0>, <G6 0.066614587032171>, <V10 0.0>, <U10 0.004555242670200431>, <R12 12510.0>, <Q11 0.00810998686954507>, <L7 1851.0>, <G5 0.5733257581137703>, <I10 0.004855696895089591>, <F8 339.0>, <T12 12074.0>, <J12 14665.0>, <R9 244.0>, <Q9 0.021472155711747896>, <O8 0.0073557291271706985>, <S6 0.06810551558752997>, <N11 175.0>, <C12 1.0>, <U8 0.004306774888189498>, <K11 0.0325946130242073>, <O9 0.024721316448016985>, <Q7 0.11817409438479956>, <N8 97.0>, <R8 62.0>, <Q6 0.058700857341469063>, <P5 10208.0>, <J6 856.0>, <Y8 0.003968986523906221>, <C6 0.06639582124201973>, <F11 1401.0>, <O11 0.0132

In [4]:
tr = tab.excel_ref('A5').expand(DOWN).is_not_blank() 
tr

{<A8 'Prison'>, <A11 'Other'>, <A9 'Treatment declined by client'>, <A5 'Complete'>, <A6 'Referred on'>, <A14 '* All numbers under 5 have been suppressed. Where totals could be derived, figures have been rounded to the nearest 5 and marked with an asterisk. '>, <A10 'Not known'>, <A12 'Total'>, <A7 'Dropped out / left'>}

In [5]:
period = tab.excel_ref('B3').expand(RIGHT).is_not_blank() 
period

{<H3 '2008-09'>, <D3 '2006-07'>, <X3 '2016-17'>, <P3 '2012-13'>, <F3 '2007-08'>, <L3 '2010-11'>, <T3 '2014-15'>, <N3 '2011-12'>, <B3 '2005-06'>, <V3 '2015-16'>, <R3 '2013-14'>, <J3 '2009-10'>}

In [6]:
mt = tab.excel_ref('B4').expand(RIGHT).is_not_blank() 
mt

{<Q4 '%'>, <X4 'n'>, <C4 '%'>, <J4 'n'>, <O4 '%'>, <E4 '%'>, <F4 'n'>, <K4 '%'>, <B4 'n'>, <T4 'n'>, <Y4 '%'>, <P4 'n'>, <L4 'n'>, <W4 '%'>, <V4 'n'>, <R4 'n'>, <M4 '%'>, <H4 'n'>, <S4 '%'>, <N4 'n'>, <I4 '%'>, <U4 '%'>, <G4 '%'>, <D4 'n'>}

In [7]:
Dimensions = [
            HDim(mt,'Measure Type',DIRECTLY,ABOVE),
            HDim(tr,'Clients in treatment',DIRECTLY,LEFT),
            HDimConst('Substance','All'),
            HDim(period,'Period',CLOSEST,LEFT),
            HDimConst('Unit','People')            
            ]

In [8]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [9]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,DATAMARKER,Measure Type,Clients in treatment,Substance,Period,Unit
0,4105,,n,Complete,All,2005-06,People
1,0.476494,,%,Complete,All,2005-06,People
2,5726,,n,Complete,All,2006-07,People
3,0.49748,,%,Complete,All,2006-07,People
4,8073,,n,Complete,All,2007-08,People
5,0.573326,,%,Complete,All,2007-08,People
6,9546,,n,Complete,All,2008-09,People
7,0.652852,,%,Complete,All,2008-09,People
8,10160,,n,Complete,All,2009-10,People
9,0.692806,,%,Complete,All,2009-10,People


In [10]:
new_table = new_table[new_table['OBS'] != '' ]

In [11]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [12]:
new_table.head()

Unnamed: 0,Value,DATAMARKER,Measure Type,Clients in treatment,Substance,Period,Unit
0,4105.0,,n,Complete,All,2005-06,People
1,0.476494,,%,Complete,All,2005-06,People
2,5726.0,,n,Complete,All,2006-07,People
3,0.49748,,%,Complete,All,2006-07,People
4,8073.0,,n,Complete,All,2007-08,People


In [13]:
new_table.tail()

Unnamed: 0,Value,DATAMARKER,Measure Type,Clients in treatment,Substance,Period,Unit
187,1,,%,Total,All,2014-15,People
188,11224,,n,Total,All,2015-16,People
189,1,,%,Total,All,2015-16,People
190,10834,,n,Total,All,2016-17,People
191,1,,%,Total,All,2016-17,People


In [14]:
new_table.dtypes

Value                   object
DATAMARKER              object
Measure Type            object
Clients in treatment    object
Substance               object
Period                  object
Unit                    object
dtype: object

In [15]:
new_table['Value'] = new_table['Value'].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [16]:
new_table.head(3)

Unnamed: 0,Value,DATAMARKER,Measure Type,Clients in treatment,Substance,Period,Unit
0,4105.0,,n,Complete,All,2005-06,People
1,0.4764944863609983,,%,Complete,All,2005-06,People
2,5726.0,,n,Complete,All,2006-07,People


In [17]:
new_table['Basis of treatment'] =  'Treatment exit reasons' 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [18]:
new_table['Substance'] = new_table['Substance'].str.rstrip('‡')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [19]:
new_table['Clients in treatment'] = new_table['Clients in treatment'].map(
    lambda x: {
        'Total' : 'All reasons', 
        }.get(x, x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [20]:
new_table['Measure Type'] = new_table['Measure Type'].map(
    lambda x: {
        'n' : 'Count', 
        '%' : 'Percentage',
        }.get(x, x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [21]:
new_table = new_table[['Period','Basis of treatment','Substance','Clients in treatment','Measure Type','Value','Unit']]

In [22]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('table5.4.1.csv'), index = False)

In [23]:
new_table.tail()

Unnamed: 0,Period,Basis of treatment,Substance,Clients in treatment,Measure Type,Value,Unit
187,2014-15,Treatment exit reasons,All,All reasons,Percentage,1.0,People
188,2015-16,Treatment exit reasons,All,All reasons,Count,11224.0,People
189,2015-16,Treatment exit reasons,All,All reasons,Percentage,1.0,People
190,2016-17,Treatment exit reasons,All,All reasons,Count,10834.0,People
191,2016-17,Treatment exit reasons,All,All reasons,Percentage,1.0,People
