Table 4.1.2: Club drug and new psychoactive substances breakdown of all clients in treatment

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    inputFile = sourceFolder / 'AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='Table 4.1.2')[0]

Loading in\AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx which has size 272149 bytes
Table names: ['Table 4.1.2']


https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx

In [3]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()

In [4]:
observations

{<C7 0.00225230010836538>, <C13 0.000630360722152576>, <C10 0.000651608836382438>, <H17 33.0>, <B20 141189.0>, <E17 0.000463548251158871>, <D13 65.0>, <B12 631.0>, <G10 0.00350382892645571>, <H20 268390.0>, <H15 112.0>, <B13 89.0>, <H14 526.0>, <B10 92.0>, <C5 0.00223105199413552>, <F7 623.0>, <C16 0.000247894666015058>, <B18 2066.0>, <B15 63.0>, <F18 1604.0>, <E16 0.00126422250316056>, <D17 11.0>, <H12 1164.0>, <D15 34.0>, <F8 340.0>, <F17 11.0>, <B5 315.0>, <G7 0.0225039734142465>, <F16 19.0>, <E13 0.00273914875684787>, <G13 0.00130039011703511>, <C17 7.7909752176161e-05>, <D12 356.0>, <E9 0.012810788032027>, <G20 1.0>, <G8 0.0122814622164427>, <D8 600.0>, <H6 2074.0>, <H9 430.0>, <B8 196.0>, <I8 0.00423264652185253>, <E10 0.0156763590391909>, <I18 0.0244606729013749>, <E7 0.0309313105773283>, <G5 0.00429851177575495>, <E8 0.0252844500632111>, <I15 0.000417303178210813>, <H7 1675.0>, <F19 1516.0>, <G6 0.0123175841641381>, <F14 88.0>, <B6 1103.0>, <G19 0.0547608727062563>, <D5 255.0>,

In [5]:
Substance = tab.excel_ref('A5').expand(DOWN).is_not_blank()
Substance

{<A20 'Total number in treatment'>, <A10 'Methamphetamine'>, <A11 'Further breakdown of new psychoactive substances:'>, <A12 'Predominantly cannabinoid'>, <A24 '** This is a count of individuals as clients may cited multiple NPS substances in the same treatment journey.'>, <A16 'Predominantly hallucinogenic'>, <A5 'Mephedrone'>, <A19 'Total number of individuals'>, <A9 'GHB/GBL'>, <A25 'Percentages may equal 0% or not sum to 100% due to rounding'>, <A8 'Ketamine'>, <A18 'Total number of citations'>, <A14 'Other'>, <A23 '* This total is for the substances listed in the top part of the table (excluding NPS) plus the individual citations of the NPS substances in the bottom half of the table as clients may have multiple citations for different NPS substances. '>, <A15 'Predominantly sedative/opioid'>, <A13 'Predominantly stimulant'>, <A6 'New psychoactive substances'>, <A7 'Ecstasy'>, <A17 'Predominantly dissociative'>}

In [6]:
Clients = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
Clients

{<D3 'Non-opiate only'>, <B3 'Opiate'>, <H3 'Total'>, <F3 'Non-opiate and Alcohol'>}

In [7]:
MeasureType = tab.excel_ref('B4').expand(RIGHT).is_not_blank()
MeasureType

{<D4 'n'>, <H4 'n'>, <B4 'n'>, <C4 '%'>, <F4 'n'>, <G4 '%'>, <E4 '%'>, <I4 '%'>}

In [8]:
Dimensions = [
            HDim(Substance,'Substance',DIRECTLY,LEFT),
            HDim(Clients,'Clients',CLOSEST,LEFT),
            HDim(MeasureType,'Measure Type',DIRECTLY,ABOVE),
            HDimConst('Unit','People')            
            ]

In [9]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [10]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Substance,Clients,Measure Type,Unit
0,315.000000,Mephedrone,Opiate,n,People
1,0.002231,Mephedrone,Opiate,%,People
2,255.000000,Mephedrone,Non-opiate only,n,People
3,0.010746,Mephedrone,Non-opiate only,%,People
4,119.000000,Mephedrone,Non-opiate and Alcohol,n,People
5,0.004299,Mephedrone,Non-opiate and Alcohol,%,People
6,689.000000,Mephedrone,Total,n,People
7,0.002567,Mephedrone,Total,%,People
8,1103.000000,New psychoactive substances,Opiate,n,People
9,0.007812,New psychoactive substances,Opiate,%,People


In [11]:
new_table = new_table[new_table['OBS'] != 0 ]

In [12]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [13]:
new_table['Measure Type'] = new_table['Measure Type'].map(
    lambda x: {
        'n' : 'Count', 
        '%' : 'Percentage',
        }.get(x, x))


In [14]:
new_table.head()

Unnamed: 0,Value,Substance,Clients,Measure Type,Unit
0,315.0,Mephedrone,Opiate,Count,People
1,0.002231,Mephedrone,Opiate,Percentage,People
2,255.0,Mephedrone,Non-opiate only,Count,People
3,0.010746,Mephedrone,Non-opiate only,Percentage,People
4,119.0,Mephedrone,Non-opiate and Alcohol,Count,People


In [15]:
new_table.dtypes

Value           float64
Substance        object
Clients          object
Measure Type     object
Unit             object
dtype: object

In [16]:
new_table['Value'] = new_table['Value'].astype(str)

In [17]:
# def user_perc(x,y):
    
#     if x == 'Count':
#         return str(y)
#     else:
#         return y
    
# new_table['Value'] = new_table.apply(lambda row: user_perc(row['Measure Type'], row['Value']), axis = 1)

In [18]:
new_table.head(3)

Unnamed: 0,Value,Substance,Clients,Measure Type,Unit
0,315.0,Mephedrone,Opiate,Count,People
1,0.0022310519941355,Mephedrone,Opiate,Percentage,People
2,255.0,Mephedrone,Non-opiate only,Count,People


In [19]:
new_table['Clients'] = new_table['Clients'].map(
    lambda x: {
        'Total' : 'All Clients' 
        }.get(x, x))

In [20]:
new_table.columns = ['Clients in treatment' if x=='Clients' else x for x in new_table.columns]
new_table['Period'] = '2017-18'
new_table['Basis of treatment'] = 'All'
new_table = new_table[['Period','Basis of treatment','Substance','Clients in treatment','Measure Type','Value','Unit']]

In [21]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('table4.1.2.csv'), index = False)