Table 6.2.1: Change in use of cited substance for clients with a review TOP/AOR in the year who reported using at the start of treatment

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    inputFile = sourceFolder / 'AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='Table 6.2.1')[0]

Loading in\AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx which has size 272149 bytes
Table names: ['Table 6.2.1']


https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx

In [3]:
observations = tab.excel_ref('B7').expand(DOWN).expand(RIGHT).is_not_blank()
observations

{<B34 14613.0>, <C9 21.5026906200845>, <C30 26.6642981438515>, <I12 6.68045977011494>, <C18 13.9043367346939>, <C15 27.251503897055>, <E26 0.101445599797109>, <F25 0.28755364806867>, <D7 0.399067103865515>, <B28 4709.0>, <E27 0.0483460559796438>, <B9 12079.0>, <B20 594.0>, <D30 0.315110208816705>, <F23 0.263888888888889>, <C23 11.4166666666667>, <H35 0.0>, <H10 0.067042987407729>, <I26 2.43748414912504>, <C34 26.8200232669541>, <E21 0.173305526590198>, <C14 18.1752015173068>, <C27 12.1806615776081>, <B27 393.0>, <C10 15.6835432045159>, <H28 0.0339774899129327>, <D13 0.495811855670103>, <B29 10630.0>, <I28 9.79592270121042>, <F31 0.21>, <B33 33022.0>, <E10 0.18419452887538>, <I7 8.55207130651648>, <E28 0.11255043533659>, <H9 0.0490106796920275>, <B14 4218.0>, <E18 0.0931122448979592>, <H19 0.0186744782131088>, <I35 1.0>, <I9 9.7904627866545>, <H26 0.0187674359624651>, <I34 17.9380688428111>, <I31 2.9>, <H29 0.0292568203198495>, <B12 435.0>, <D16 0.540228224758053>, <D9 0.33140160609322>

In [4]:
reason = tab.excel_ref('A6').expand(DOWN).is_not_blank() 
reason

{<A32 'Alcohol only'>, <A19 'Cocaine use'>, <A21 'Cannabis use'>, <A34 'Tobacco use'>, <A9 '          Opiate use (in opiate and crack clients)'>, <A7 'Opiate use (all opiate clients)'>, <A24 'Non-opiate and Alcohol'>, <A35 'Injecting'>, <A10 'Crack use'>, <A13 'Cannabis use'>, <A12 'Amphetamines use'>, <A30 'Tobacco use'>, <A25 'Crack use'>, <A6 'Opiates'>, <A14 'Alcohol use'>, <A20 'Amphetamines use'>, <A11 'Cocaine use'>, <A26 'Cocaine use'>, <A8 '          Opiate use (in opiate only clients)'>, <A29 'Alcohol use'>, <A23 'Injecting'>, <A17 'Non-opiate only'>, <A18 'Crack use'>, <A28 'Cannabis use'>, <A22 'Tobacco use'>, <A16 'Injecting'>, <A33 'Alcohol use'>, <A27 'Amphetamines use'>, <A31 'Injecting'>, <A15 'Tobacco use'>}

In [5]:
clients = tab.excel_ref('B4').expand(RIGHT).is_not_blank()
clients

{<F4 'Unchanged'>, <D4 'Abstinent'>, <C4 'Average days of use at start'>, <H4 'Deteriorated'>, <B4 'Reviewed clients using at start'>, <E4 'Improved'>, <I4 'Average days of use at review'>}

In [6]:
Treatmentstatus = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
Treatmentstatus

{<D3 'AT SIX MONTH REVIEW'>, <B3 'START OF TREATMENT'>}

In [7]:
MeasureType = tab.excel_ref('B5').expand(RIGHT).is_not_blank()
MeasureType

{<B5 'n'>, <H5 '%'>, <C5 'mean'>, <E5 '%'>, <D5 '%'>, <F5 '%'>, <I5 'mean'>}

In [10]:
Dimensions = [
            HDim(clients,'Clients in treatment',CLOSEST,LEFT),
            HDim(Treatmentstatus,'Treatment Status',CLOSEST,LEFT),
            HDim(reason,'Substance',DIRECTLY,LEFT),
            HDim(MeasureType,'Measure Type',DIRECTLY,ABOVE),
            HDimConst('Unit','People')            
            ]

In [11]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [12]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Clients in treatment,Treatment Status,Substance,Measure Type,Unit
0,21653.000000,Reviewed clients using at start,START OF TREATMENT,Opiate use (all opiate clients),n,People
1,22.139057,Average days of use at start,START OF TREATMENT,Opiate use (all opiate clients),mean,People
2,0.399067,Abstinent,AT SIX MONTH REVIEW,Opiate use (all opiate clients),%,People
3,0.249296,Improved,AT SIX MONTH REVIEW,Opiate use (all opiate clients),%,People
4,0.312705,Unchanged,AT SIX MONTH REVIEW,Opiate use (all opiate clients),%,People
5,0.038932,Deteriorated,AT SIX MONTH REVIEW,Opiate use (all opiate clients),%,People
6,8.552071,Average days of use at review,AT SIX MONTH REVIEW,Opiate use (all opiate clients),mean,People
7,9574.000000,Reviewed clients using at start,START OF TREATMENT,Opiate use (in opiate only clients),n,People
8,22.941926,Average days of use at start,START OF TREATMENT,Opiate use (in opiate only clients),mean,People
9,0.484437,Abstinent,AT SIX MONTH REVIEW,Opiate use (in opiate only clients),%,People


In [13]:
new_table = new_table[new_table['OBS'] != 0 ]

In [14]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [15]:
new_table.head()

Unnamed: 0,Value,Clients in treatment,Treatment Status,Substance,Measure Type,Unit
0,21653.0,Reviewed clients using at start,START OF TREATMENT,Opiate use (all opiate clients),n,People
1,22.139057,Average days of use at start,START OF TREATMENT,Opiate use (all opiate clients),mean,People
2,0.399067,Abstinent,AT SIX MONTH REVIEW,Opiate use (all opiate clients),%,People
3,0.249296,Improved,AT SIX MONTH REVIEW,Opiate use (all opiate clients),%,People
4,0.312705,Unchanged,AT SIX MONTH REVIEW,Opiate use (all opiate clients),%,People


In [18]:
new_table['Measure Type'].unique()

array(['Count', 'mean', 'Percentage'], dtype=object)

In [17]:
new_table['Measure Type'] = new_table['Measure Type'].map(
    lambda x: {
        'n' : 'Count', 
        '%' : 'Percentage'
        }.get(x, x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [19]:
new_table.tail()

Unnamed: 0,Value,Clients in treatment,Treatment Status,Substance,Measure Type,Unit
163,48.0,Reviewed clients using at start,START OF TREATMENT,Injecting,Count,People
164,14.166667,Average days of use at start,START OF TREATMENT,Injecting,mean,People
165,0.916667,Abstinent,AT SIX MONTH REVIEW,Injecting,Percentage,People
167,0.083333,Unchanged,AT SIX MONTH REVIEW,Injecting,Percentage,People
169,1.0,Average days of use at review,AT SIX MONTH REVIEW,Injecting,mean,People


In [20]:
new_table.dtypes

Value                   float64
Clients in treatment     object
Treatment Status         object
Substance                object
Measure Type             object
Unit                     object
dtype: object

In [21]:
new_table['Value'] = new_table['Value'].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [22]:
new_table.head(3)

Unnamed: 0,Value,Clients in treatment,Treatment Status,Substance,Measure Type,Unit
0,21653.0,Reviewed clients using at start,START OF TREATMENT,Opiate use (all opiate clients),Count,People
1,22.1390569436107,Average days of use at start,START OF TREATMENT,Opiate use (all opiate clients),mean,People
2,0.399067103865515,Abstinent,AT SIX MONTH REVIEW,Opiate use (all opiate clients),Percentage,People


In [23]:
new_table['Treatment Status'] = new_table['Treatment Status'].map(
    lambda x: {
        'START OF TREATMENT' : 'Start of treatment',
        'AT SIX MONTH REVIEW' : 'At six month Review'
        }.get(x, x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [24]:
new_table.tail()

Unnamed: 0,Value,Clients in treatment,Treatment Status,Substance,Measure Type,Unit
163,48.0,Reviewed clients using at start,Start of treatment,Injecting,Count,People
164,14.1666666666667,Average days of use at start,Start of treatment,Injecting,mean,People
165,0.916666666666667,Abstinent,At six month Review,Injecting,Percentage,People
167,0.0833333333333333,Unchanged,At six month Review,Injecting,Percentage,People
169,1.0,Average days of use at review,At six month Review,Injecting,mean,People


In [25]:
new_table = new_table[['Substance','Treatment Status','Clients in treatment','Measure Type','Value','Unit']]

In [26]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('table6.2.1.csv'), index = False)