Table 7.4.1: Trends in treatment exit reason

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    inputFile = sourceFolder / 'AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='Table 7.4.1')[0]

Loading in\AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx which has size 272149 bytes
Table names: ['Table 7.4.1']


https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx

In [3]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()
observations

{<G141 6016.0>, <K165 2360.0>, <L116 0.0110046347987437>, <C149 652.0>, <H39 0.119629934963818>, <G8 3168.0>, <E60 45.0>, <L18 0.0148998436518648>, <E175 6335.0>, <I89 50641.0>, <E167 73.0>, <C39 3529.0>, <L63 0.0126561472715319>, <G33 710.0>, <L119 0.0>, <C215 531.0>, <K186 0.0>, <G63 60.0>, <D34 0.0458395897668357>, <L175 0.31727258419893>, <B97 'Died'>, <I131 649.0>, <G44 0.0>, <J217 0.00209263954283875>, <E40 3902.0>, <J197 0.00821316015662305>, <C71 546.0>, <I18 372.0>, <K160 67788.0>, <J190 0.0>, <B187 'Moved away'>, <D206 2.53074859543453e-05>, <E172 0.0>, <K112 11598.0>, <H61 0.00073495022382575>, <F179 0.0415879017013233>, <E160 10568.0>, <L19 0.0356259938797055>, <G139 0.0>, <L85 0.00234278973462132>, <E131 33.0>, <L20 0.056124971603437>, <C47 3387.0>, <G70 39.0>, <C128 6674.0>, <K212 42287.0>, <D10 0.0>, <B12 'Died'>, <K54 2925.0>, <B100 'Exit reason inconsistent'>, <J105 0.00165693842967426>, <H210 0.22710169855945>, <F91 0.258440276406713>, <K149 1125.0>, <D145 0.168123686

In [4]:
reason = tab.excel_ref('B5').expand(DOWN).is_not_blank() 
reason

{<B126 'Treatment completed free of dependence subtotal'>, <B83 'Exit reason inconsistent'>, <B38 'Total'>, <B218 'Treatment withdrawn'>, <B105 'Other'>, <B186 'Referred on'>, <B111 'Transferred – not in custody'>, <B15 'Exit reason inconsistent'>, <B213 'Transferred – not in custody'>, <B177 'Treatment completed free of dependence subtotal'>, <B27 'Transferred – in custody'>, <B123 'Total'>, <B144 'Dropped out/left'>, <B222 'No appropriate treatment'>, <B198 'Treatment declined'>, <B75 'Treatment completed free of dependence subtotal'>, <B178 'Dropped out/left'>, <B26 'Transferred – not in custody'>, <B210 'Completed free of dependence'>, <B116 'Treatment withdrawn'>, <B146 'Transferred – in custody'>, <B153 'Moved away'>, <B94 'Transferred – not in custody'>, <B86 'No appropriate treatment'>, <B59 'Dropped out/left'>, <B214 'Transferred – in custody'>, <B221 'Moved away'>, <B133 'Treatment withdrawn'>, <B87 'Not known'>, <B165 'Died'>, <B17 'Moved away'>, <B97 'Died'>, <B14 'Treatmen

In [5]:
clients = tab.excel_ref('C3').expand(RIGHT).is_not_blank()
clients

{<E3 'Non-opiate only'>, <K3 'Total'>, <I3 'Alcohol only'>, <G3 'Non-opiate and Alcohol'>, <C3 'Opiate'>}

In [6]:
MeasureType = tab.excel_ref('B4').expand(RIGHT).is_not_blank()
MeasureType

{<I4 'n'>, <D4 '%'>, <J4 '%'>, <G4 'n'>, <K4 'n'>, <H4 '%'>, <C4 'n'>, <F4 '%'>, <L4 '%'>, <E4 'n'>}

In [7]:
period = tab.excel_ref('A5').expand(DOWN).is_not_blank() - tab.excel_ref('A228').expand(DOWN)
period

{<A22 '2006-07'>, <A124 '2012-13'>, <A90 '2010-11'>, <A141 '2013-14'>, <A107 '2011-12'>, <A56 '2008-09'>, <A158 '2014-15'>, <A209 '2017-18'>, <A73 '2009-10'>, <A39 '2007-08'>, <A175 '2015-16'>, <A5 '2005-06'>, <A192 '2016-17'>}

In [8]:
Dimensions = [
            HDim(clients,'Clients in treatment',CLOSEST,LEFT),
            HDim(reason,'Treatment exit reason',DIRECTLY,LEFT),
            HDim(MeasureType,'Measure Type',DIRECTLY,ABOVE),
            HDim(period, 'Period',CLOSEST,ABOVE),
            HDimConst('Unit','People')            
            ]

In [9]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [10]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,DATAMARKER,Clients in treatment,Treatment exit reason,Measure Type,Period,Unit
0,,Completed free of dependence – no drug or alco...,,Completed free of dependence – no drug or alco...,,2005-06,People
1,2512,,Opiate,Completed free of dependence – no drug or alco...,n,2005-06,People
2,0.0700952,,Opiate,Completed free of dependence – no drug or alco...,%,2005-06,People
3,1054,,Non-opiate only,Completed free of dependence – no drug or alco...,n,2005-06,People
4,0.0797277,,Non-opiate only,Completed free of dependence – no drug or alco...,%,2005-06,People
5,594,,Non-opiate and Alcohol,Completed free of dependence – no drug or alco...,n,2005-06,People
6,0.0811808,,Non-opiate and Alcohol,Completed free of dependence – no drug or alco...,%,2005-06,People
7,1194,,Alcohol only,Completed free of dependence – no drug or alco...,n,2005-06,People
8,0.0646839,,Alcohol only,Completed free of dependence – no drug or alco...,%,2005-06,People
9,5354,,Total,Completed free of dependence – no drug or alco...,n,2005-06,People


In [11]:
new_table = new_table[new_table['OBS'] != 0 ]

In [12]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [13]:
new_table.head()

Unnamed: 0,Value,DATAMARKER,Clients in treatment,Treatment exit reason,Measure Type,Period,Unit
0,,Completed free of dependence – no drug or alco...,,Completed free of dependence – no drug or alco...,,2005-06,People
1,2512.0,,Opiate,Completed free of dependence – no drug or alco...,n,2005-06,People
2,0.0700952,,Opiate,Completed free of dependence – no drug or alco...,%,2005-06,People
3,1054.0,,Non-opiate only,Completed free of dependence – no drug or alco...,n,2005-06,People
4,0.0797277,,Non-opiate only,Completed free of dependence – no drug or alco...,%,2005-06,People


In [14]:
new_table['Measure Type'].unique()

array([None, 'n', '%'], dtype=object)

In [15]:
new_table['Measure Type'] = new_table['Measure Type'].map(
    lambda x: {
        'n' : 'Count', 
        '%' : 'Percentage',
        None: 'Count'
        }.get(x, x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [16]:
new_table.tail()

Unnamed: 0,Value,DATAMARKER,Clients in treatment,Treatment exit reason,Measure Type,Period,Unit
2426,1,,Non-opiate and Alcohol,Total,Percentage,2017-18,People
2427,49698,,Alcohol only,Total,Count,2017-18,People
2428,1,,Alcohol only,Total,Percentage,2017-18,People
2429,121338,,Total,Total,Count,2017-18,People
2430,1,,Total,Total,Percentage,2017-18,People


In [17]:
new_table.dtypes

Value                    object
DATAMARKER               object
Clients in treatment     object
Treatment exit reason    object
Measure Type             object
Period                   object
Unit                     object
dtype: object

In [18]:
new_table['Value'] = new_table['Value'].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [19]:
new_table.head(3)

Unnamed: 0,Value,DATAMARKER,Clients in treatment,Treatment exit reason,Measure Type,Period,Unit
0,,Completed free of dependence – no drug or alco...,,Completed free of dependence – no drug or alco...,Count,2005-06,People
1,2512.0,,Opiate,Completed free of dependence – no drug or alco...,Count,2005-06,People
2,0.0700951530541061,,Opiate,Completed free of dependence – no drug or alco...,Percentage,2005-06,People


In [20]:
new_table['Treatment exit reason'] = new_table['Treatment exit reason'].map(
    lambda x: {
        'Total' : 'All reasons' 
        }.get(x, x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [21]:
new_table['Clients in treatment'] = new_table['Clients in treatment'].map(
    lambda x: {
        'Total' : 'All clients' 
        }.get(x, x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [22]:
new_table.tail()

Unnamed: 0,Value,DATAMARKER,Clients in treatment,Treatment exit reason,Measure Type,Period,Unit
2426,1.0,,Non-opiate and Alcohol,All reasons,Percentage,2017-18,People
2427,49698.0,,Alcohol only,All reasons,Count,2017-18,People
2428,1.0,,Alcohol only,All reasons,Percentage,2017-18,People
2429,121338.0,,All clients,All reasons,Count,2017-18,People
2430,1.0,,All clients,All reasons,Percentage,2017-18,People


In [23]:
new_table = new_table[['Period','Treatment exit reason','Clients in treatment','Measure Type','Value','Unit']]

In [24]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('table7.4.1.csv'), index = False)