Table 4.9.1: Substance breakdown of new presentations to treatment

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    inputFile = sourceFolder / 'AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='Table 4.9.1')[0]

Loading in\AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx which has size 272149 bytes
Table names: ['Table 4.9.1']


https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx

In [3]:
observations = tab.excel_ref('B6').expand(DOWN).expand(RIGHT).is_not_blank()
observations

{<J14 1464.0>, <K14 0.0114997604216579>, <B6 18767.0>, <E14 0.0386456089973678>, <K16 0.600100544353413>, <C8 0.0>, <B12 1065.0>, <E8 0.136336444125389>, <G17 1.0>, <B8 0.0>, <D13 910.0>, <H17 50656.0>, <F16 18757.0>, <I13 0.0>, <B7 22411.0>, <I11 0.0>, <J13 4057.0>, <E11 0.382986360373295>, <I7 0.0>, <D10 9566.0>, <G14 0.0170602974889375>, <H7 0.0>, <C6 0.455753072028753>, <H11 0.0>, <G6 0.0>, <H13 0.0>, <G16 1.0>, <G7 0.0>, <F14 320.0>, <D8 2279.0>, <I16 1.0>, <K7 0.176039023777169>, <G11 0.497894119528709>, <C10 0.13805915780271>, <G8 0.107799754758224>, <F17 18757.0>, <F6 0.0>, <H16 50656.0>, <E17 1.0>, <F8 2022.0>, <F13 663.0>, <E6 0.0>, <J6 18767.0>, <C12 0.0258633250764972>, <K13 0.0318678470154823>, <D6 0.0>, <E13 0.0544388609715243>, <C13 0.0603234736995483>, <E10 0.572266092366595>, <J11 17796.0>, <E16 0.0>, <G12 0.0572053100175934>, <D14 646.0>, <C14 0.0120938365146437>, <F11 9339.0>, <C7 0.544246927971247>, <J8 4301.0>, <D16 0.0>, <K11 0.139788071355071>, <K6 0.147415303164

In [4]:
Substance = tab.excel_ref('A6').expand(DOWN).is_not_blank() - tab.excel_ref('A19').expand(DOWN)
Substance

{<A7 'Both opiate and crack cocaine'>, <A17 'Total number of individuals*'>, <A13 'Benzodiazepine'>, <A9 'Other drug use'>, <A11 'Cocaine'>, <A14 'Other'>, <A8 'Crack cocaine (not opiate)'>, <A10 'Cannabis'>, <A15 'Alcohol'>, <A16 'Alcohol'>, <A6 'Opiate (not crack cocaine)'>, <A12 'Amphetamine (other than ecstasy)'>}

In [5]:
Clients = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
Clients

{<J3 'Total'>, <D3 'Non-opiate only'>, <B3 'Opiate'>, <F3 'Non-opiate and Alcohol'>, <H3 'Alcohol only'>}

In [6]:
MeasureType = tab.excel_ref('B4').expand(RIGHT).is_not_blank()
MeasureType

{<H4 'n'>, <G4 '%'>, <J4 'n'>, <I4 '%'>, <C4 '%'>, <F4 'n'>, <K4 '%'>, <E4 '%'>, <D4 'n'>, <B4 'n'>}

In [7]:
Dimensions = [
            HDim(Substance,'Substance',DIRECTLY,LEFT),
            HDim(Clients,'Clients in new treatment',CLOSEST,LEFT),
            HDim(MeasureType,'Measure Type',DIRECTLY,ABOVE),
            HDimConst('Unit','People')            
            ]

In [8]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [9]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Substance,Clients in new treatment,Measure Type,Unit
0,18767.000000,Opiate (not crack cocaine),Opiate,n,People
1,0.455753,Opiate (not crack cocaine),Opiate,%,People
2,0.000000,Opiate (not crack cocaine),Non-opiate only,n,People
3,0.000000,Opiate (not crack cocaine),Non-opiate only,%,People
4,0.000000,Opiate (not crack cocaine),Non-opiate and Alcohol,n,People
5,0.000000,Opiate (not crack cocaine),Non-opiate and Alcohol,%,People
6,0.000000,Opiate (not crack cocaine),Alcohol only,n,People
7,0.000000,Opiate (not crack cocaine),Alcohol only,%,People
8,18767.000000,Opiate (not crack cocaine),Total,n,People
9,0.147415,Opiate (not crack cocaine),Total,%,People


In [10]:
new_table = new_table[new_table['OBS'] != 0 ]

In [11]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [12]:
new_table.head()

Unnamed: 0,Value,Substance,Clients in new treatment,Measure Type,Unit
0,18767.0,Opiate (not crack cocaine),Opiate,n,People
1,0.455753,Opiate (not crack cocaine),Opiate,%,People
8,18767.0,Opiate (not crack cocaine),Total,n,People
9,0.147415,Opiate (not crack cocaine),Total,%,People
10,22411.0,Both opiate and crack cocaine,Opiate,n,People


In [13]:
new_table['Measure Type'] = new_table['Measure Type'].map(
    lambda x: {
        'n' : 'Count', 
        '%' : 'Percentage',
        }.get(x, x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [14]:
new_table.tail()

Unnamed: 0,Value,Substance,Clients in new treatment,Measure Type,Unit
95,1.0,Total number of individuals*,Non-opiate and Alcohol,Percentage,People
96,50656.0,Total number of individuals*,Alcohol only,Count,People
97,1.0,Total number of individuals*,Alcohol only,Percentage,People
98,127307.0,Total number of individuals*,Total,Count,People
99,1.0,Total number of individuals*,Total,Percentage,People


In [15]:
new_table.dtypes

Value                       float64
Substance                    object
Clients in new treatment     object
Measure Type                 object
Unit                         object
dtype: object

In [16]:
new_table['Value'] = new_table['Value'].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [17]:
new_table.head(3)

Unnamed: 0,Value,Substance,Clients in new treatment,Measure Type,Unit
0,18767.0,Opiate (not crack cocaine),Opiate,Count,People
1,0.455753072028753,Opiate (not crack cocaine),Opiate,Percentage,People
8,18767.0,Opiate (not crack cocaine),Total,Count,People


In [18]:
new_table['Substance'] = new_table['Substance'].str.rstrip('*')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [19]:
new_table['Clients in new treatment'] = new_table['Clients in new treatment'].map(
    lambda x: {
        'Total' : 'All Clients' 
        }.get(x, x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [20]:
new_table.head(3)

Unnamed: 0,Value,Substance,Clients in new treatment,Measure Type,Unit
0,18767.0,Opiate (not crack cocaine),Opiate,Count,People
1,0.455753072028753,Opiate (not crack cocaine),Opiate,Percentage,People
8,18767.0,Opiate (not crack cocaine),All Clients,Count,People


In [21]:
new_table = new_table[['Substance','Clients in new treatment','Measure Type','Value','Unit']]

In [22]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('table4.9.1.csv'), index = False)