Table 4.1.2: Club drug and new psychoactive substances breakdown of all clients in treatment

In [1]:
from gssutils import *

if is_interactive():
    scraper = Scraper('https://www.gov.uk/government/collections/alcohol-and-drug-misuse-and-treatment-statistics')
    scraper.select_dataset(title=lambda x: x.startswith('Substance misuse treatment for adults'), latest=True)
    tabs = {tab.name: tab for tab in scraper.distribution(title=lambda x: x.startswith('Data tables')).as_databaker()}

In [2]:
tab = tabs['Table 4.1.2']

https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx

In [3]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()

In [4]:
observations

{<I8 0.00423264652185253>, <E7 0.0309313105773283>, <C19 0.0143424771051569>, <C8 0.00138821012968432>, <F14 88.0>, <H18 6565.0>, <D6 630.0>, <D5 255.0>, <B7 318.0>, <H13 190.0>, <E20 1.0>, <I20 1.0>, <F5 119.0>, <I12 0.00433697231640523>, <C18 0.0146328679996317>, <I6 0.00772756063936808>, <I13 0.000707925034464771>, <D16 30.0>, <F10 97.0>, <D14 145.0>, <H5 689.0>, <F15 15.0>, <E18 0.121997471554994>, <E14 0.00611040876527602>, <E19 0.102107037505268>, <C7 0.00225230010836538>, <I19 0.0222213942397258>, <G9 0.00303424360641526>, <B19 2025.0>, <C14 0.0020752324897832>, <B20 141189.0>, <B17 11.0>, <D13 65.0>, <F6 341.0>, <G10 0.00350382892645571>, <G14 0.00317873139719694>, <H20 268390.0>, <E6 0.0265486725663717>, <B13 89.0>, <G17 0.000397341424649617>, <D9 304.0>, <I14 0.00195983456909721>, <B10 92.0>, <C16 0.000247894666015058>, <B18 2066.0>, <B15 63.0>, <F13 36.0>, <D19 2423.0>, <B16 35.0>, <B9 42.0>, <E16 0.00126422250316056>, <H12 1164.0>, <D15 34.0>, <H19 5964.0>, <B5 315.0>, <F17

In [5]:
Substance = tab.excel_ref('A5').expand(DOWN).is_not_blank()
Substance

{<A13 'Predominantly stimulant'>, <A8 'Ketamine'>, <A24 '** This is a count of individuals as clients may cited multiple NPS substances in the same treatment journey.'>, <A25 'Percentages may equal 0% or not sum to 100% due to rounding'>, <A15 'Predominantly sedative/opioid'>, <A19 'Total number of individuals'>, <A16 'Predominantly hallucinogenic'>, <A12 'Predominantly cannabinoid'>, <A23 '* This total is for the substances listed in the top part of the table (excluding NPS) plus the individual citations of the NPS substances in the bottom half of the table as clients may have multiple citations for different NPS substances. '>, <A14 'Other'>, <A10 'Methamphetamine'>, <A17 'Predominantly dissociative'>, <A20 'Total number in treatment'>, <A5 'Mephedrone'>, <A18 'Total number of citations'>, <A7 'Ecstasy'>, <A9 'GHB/GBL'>, <A11 'Further breakdown of new psychoactive substances:'>, <A6 'New psychoactive substances'>}

In [6]:
Clients = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
Clients

{<D3 'Non-opiate only'>, <H3 'Total'>, <F3 'Non-opiate and Alcohol'>, <B3 'Opiate'>}

In [7]:
MeasureType = tab.excel_ref('B4').expand(RIGHT).is_not_blank()
MeasureType

{<H4 'n'>, <C4 '%'>, <D4 'n'>, <E4 '%'>, <G4 '%'>, <I4 '%'>, <F4 'n'>, <B4 'n'>}

In [8]:
Dimensions = [
            HDim(Substance,'Substance',DIRECTLY,LEFT),
            HDim(Clients,'Clients',CLOSEST,LEFT),
            HDim(MeasureType,'Measure Type',DIRECTLY,ABOVE),
            HDimConst('Unit','People')            
            ]

In [9]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [10]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Substance,Clients,Measure Type,Unit
0,315.000000,Mephedrone,Opiate,n,People
1,0.002231,Mephedrone,Opiate,%,People
2,255.000000,Mephedrone,Non-opiate only,n,People
3,0.010746,Mephedrone,Non-opiate only,%,People
4,119.000000,Mephedrone,Non-opiate and Alcohol,n,People
5,0.004299,Mephedrone,Non-opiate and Alcohol,%,People
6,689.000000,Mephedrone,Total,n,People
7,0.002567,Mephedrone,Total,%,People
8,1103.000000,New psychoactive substances,Opiate,n,People
9,0.007812,New psychoactive substances,Opiate,%,People


In [11]:
new_table = new_table[new_table['OBS'] != 0 ]

In [12]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [13]:
new_table['Measure Type'] = new_table['Measure Type'].map(
    lambda x: {
        'n' : 'Count', 
        '%' : 'Percentage',
        }.get(x, x))


In [14]:
new_table.head()

Unnamed: 0,Value,Substance,Clients,Measure Type,Unit
0,315.0,Mephedrone,Opiate,Count,People
1,0.002231,Mephedrone,Opiate,Percentage,People
2,255.0,Mephedrone,Non-opiate only,Count,People
3,0.010746,Mephedrone,Non-opiate only,Percentage,People
4,119.0,Mephedrone,Non-opiate and Alcohol,Count,People


In [15]:
new_table.dtypes

Value           float64
Substance        object
Clients          object
Measure Type     object
Unit             object
dtype: object

In [16]:
new_table['Value'] = new_table['Value'].astype(str)

In [17]:
# def user_perc(x,y):
    
#     if x == 'Count':
#         return str(y)
#     else:
#         return y
    
# new_table['Value'] = new_table.apply(lambda row: user_perc(row['Measure Type'], row['Value']), axis = 1)

In [18]:
new_table.head(3)

Unnamed: 0,Value,Substance,Clients,Measure Type,Unit
0,315.0,Mephedrone,Opiate,Count,People
1,0.0022310519941355,Mephedrone,Opiate,Percentage,People
2,255.0,Mephedrone,Non-opiate only,Count,People


In [19]:
new_table['Clients'] = new_table['Clients'].map(
    lambda x: {
        'Total' : 'All Clients' 
        }.get(x, x))

In [20]:
new_table.columns = ['Clients in treatment' if x=='Clients' else x for x in new_table.columns]
new_table['Period'] = '2017-18'
new_table['Basis of treatment'] = 'All'
new_table = new_table[['Period','Basis of treatment','Substance','Clients in treatment','Measure Type','Value','Unit']]