Table 5.2.4: Length of time in prescribing for clients in continuous prescribing treatment

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    inputFile = sourceFolder / 'AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='Table 5.2.4')[0]

Loading in\AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx which has size 272149 bytes
Table names: ['Table 5.2.4']


https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx

In [3]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()
observations

{<I7 0.01>, <F10 27.0>, <B9 6801.0>, <G6 0.05>, <K11 1.0>, <K7 0.1>, <B8 11363.0>, <H11 14805.0>, <I5 0.93>, <K9 0.05>, <C10 0.22>, <G8 0.01>, <B6 20289.0>, <H7 153.0>, <C5 0.37>, <G11 1.0>, <F8 29.0>, <J8 11495.0>, <F9 18.0>, <C7 0.12>, <B7 15405.0>, <E8 0.03>, <K10 0.19>, <J7 15713.0>, <G5 0.91>, <C11 1.0>, <E6 0.16>, <C9 0.05>, <J11 152001.0>, <K6 0.14>, <H10 27.0>, <B11 131875.0>, <F7 57.0>, <G10 0.01>, <G9 0.0>, <K5 0.44>, <B5 48827.0>, <F11 3784.0>, <J10 29328.0>, <I11 1.0>, <H9 41.0>, <C6 0.15>, <J5 67118.0>, <D11 1537.0>, <D8 45.0>, <D10 84.0>, <F5 3448.0>, <D9 28.0>, <H5 13803.0>, <C8 0.09>, <G7 0.02>, <E9 0.02>, <F6 205.0>, <B10 29190.0>, <E10 0.05>, <D6 242.0>, <E11 1.0>, <D5 1040.0>, <K8 0.08>, <I6 0.05>, <D7 98.0>, <E7 0.06>, <J9 6888.0>, <H6 723.0>, <I10 0.0>, <I9 0.0>, <E5 0.68>, <I8 0.0>, <J6 21459.0>, <H8 58.0>}

In [4]:
time = tab.excel_ref('A5').expand(DOWN).is_not_blank() 
time

{<A7 '2-3 years'>, <A14 '*Percentages may equal 0% or not sum to 100% due to rounding'>, <A6 '1-2 years'>, <A8 '3-4 years'>, <A10 '5 years +'>, <A5 'Less than 12 months'>, <A11 'Total'>, <A9 '4-5 years'>}

In [5]:
clients = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
clients

{<H3 'Alcohol only'>, <B3 'Opiate'>, <D3 'Non-opiate only'>, <F3 'Non-opiate and Alcohol'>, <J3 'Total'>}

In [6]:
MeasureType = tab.excel_ref('B4').expand(RIGHT).is_not_blank()
MeasureType

{<F4 'n'>, <I4 '%'>, <C4 '%'>, <E4 '%'>, <H4 'n'>, <B4 'n'>, <K4 '%'>, <G4 '%'>, <J4 'n'>, <D4 'n'>}

In [7]:
Dimensions = [
            HDim(clients,'Clients in treatment',CLOSEST,LEFT),
            HDim(time,'Length of time in prescribing',DIRECTLY,LEFT),
            HDim(MeasureType,'Measure Type',DIRECTLY,ABOVE),
            HDimConst('Unit','People')            
            ]

In [8]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [9]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Clients in treatment,Length of time in prescribing,Measure Type,Unit
0,48827.00,Opiate,Less than 12 months,n,People
1,0.37,Opiate,Less than 12 months,%,People
2,1040.00,Non-opiate only,Less than 12 months,n,People
3,0.68,Non-opiate only,Less than 12 months,%,People
4,3448.00,Non-opiate and Alcohol,Less than 12 months,n,People
5,0.91,Non-opiate and Alcohol,Less than 12 months,%,People
6,13803.00,Alcohol only,Less than 12 months,n,People
7,0.93,Alcohol only,Less than 12 months,%,People
8,67118.00,Total,Less than 12 months,n,People
9,0.44,Total,Less than 12 months,%,People


In [10]:
new_table = new_table[new_table['OBS'] != 0 ]

In [11]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [12]:
new_table.head()

Unnamed: 0,Value,Clients in treatment,Length of time in prescribing,Measure Type,Unit
0,48827.0,Opiate,Less than 12 months,n,People
1,0.37,Opiate,Less than 12 months,%,People
2,1040.0,Non-opiate only,Less than 12 months,n,People
3,0.68,Non-opiate only,Less than 12 months,%,People
4,3448.0,Non-opiate and Alcohol,Less than 12 months,n,People


In [13]:
new_table['Measure Type'].unique()

array(['n', '%'], dtype=object)

In [14]:
new_table['Measure Type'] = new_table['Measure Type'].map(
    lambda x: {
        'n' : 'Count', 
        '%' : 'Percentage'
        }.get(x, x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [15]:
new_table.tail()

Unnamed: 0,Value,Clients in treatment,Length of time in prescribing,Measure Type,Unit
65,1.0,Non-opiate and Alcohol,Total,Percentage,People
66,14805.0,Alcohol only,Total,Count,People
67,1.0,Alcohol only,Total,Percentage,People
68,152001.0,Total,Total,Count,People
69,1.0,Total,Total,Percentage,People


In [16]:
new_table.dtypes

Value                            float64
Clients in treatment              object
Length of time in prescribing     object
Measure Type                      object
Unit                              object
dtype: object

In [17]:
new_table['Value'] = new_table['Value'].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [18]:
new_table.head(3)

Unnamed: 0,Value,Clients in treatment,Length of time in prescribing,Measure Type,Unit
0,48827.0,Opiate,Less than 12 months,Count,People
1,0.37,Opiate,Less than 12 months,Percentage,People
2,1040.0,Non-opiate only,Less than 12 months,Count,People


In [19]:
new_table['Length of time in prescribing'] = new_table['Length of time in prescribing'].map(
    lambda x: {
        'Total' : 'All years' 
        }.get(x, x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [20]:
new_table['Clients in treatment'] = new_table['Clients in treatment'].map(
    lambda x: {
        'Total' : 'All' 
        }.get(x, x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [21]:
new_table.tail()

Unnamed: 0,Value,Clients in treatment,Length of time in prescribing,Measure Type,Unit
65,1.0,Non-opiate and Alcohol,All years,Percentage,People
66,14805.0,Alcohol only,All years,Count,People
67,1.0,Alcohol only,All years,Percentage,People
68,152001.0,All,All years,Count,People
69,1.0,All,All years,Percentage,People


In [22]:
new_table = new_table[['Clients in treatment','Length of time in prescribing','Measure Type','Value','Unit']]

In [23]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('table5.2.4.csv'), index = False)