Table 4.10.1: Injecting status of new presentations to treatment

In [1]:
from gssutils import *

if is_interactive():
    scraper = Scraper('https://www.gov.uk/government/collections/alcohol-and-drug-misuse-and-treatment-statistics')
    scraper.select_dataset(title=lambda x: x.startswith('Substance misuse treatment for adults'), latest=True)
    tabs = {tab.name: tab for tab in scraper.distribution(title=lambda x: x.startswith('Data tables')).as_databaker()}

In [2]:
tab = tabs['Table 4.10.1']

https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx

In [3]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()
observations

{<E9 1.0>, <G5 0.908902922930755>, <D6 1092.0>, <B5 16165.0>, <F8 38.0>, <G6 0.0786341027217348>, <B9 40967.0>, <I8 0.0023187015271448>, <H7 108.0>, <F7 185.0>, <G7 0.0103392388084726>, <H11 50656.0>, <J6 17824.0>, <K7 0.0909958612724181>, <B8 110.0>, <K5 0.763723571497321>, <B10 211.0>, <K9 1.0>, <C8 0.00268508799765665>, <C6 0.34227549002856>, <F6 1407.0>, <I9 1.0>, <C5 0.394585886201089>, <I7 0.00215879107699688>, <K8 0.00231800827745516>, <F5 16263.0>, <D5 14289.0>, <G9 1.0>, <C7 0.260453535772695>, <H6 1303.0>, <D11 16716.0>, <F11 18757.0>, <J7 11345.0>, <D7 382.0>, <F9 17893.0>, <J11 127307.0>, <J5 95218.0>, <B7 10670.0>, <E5 0.905054471750697>, <I6 0.026045414567842>, <E8 0.00158348112490499>, <C9 1.0>, <F10 864.0>, <I5 0.969477092828016>, <H10 628.0>, <H5 48501.0>, <K6 0.142962558952806>, <B11 41178.0>, <D8 25.0>, <E6 0.06916645553585>, <E7 0.0241955915885483>, <J9 124676.0>, <D9 15788.0>, <J10 2631.0>, <H9 50028.0>, <G8 0.00212373553903761>, <D10 928.0>, <J8 289.0>, <B6 14022.

In [4]:
status = tab.excel_ref('A5').expand(DOWN).is_not_blank() - tab.excel_ref('A11').expand(DOWN)
status

{<A6 'Previously injected'>, <A8 'Declined to answer'>, <A5 'Never injected'>, <A9 'Total'>, <A7 'Currently injecting'>, <A10 'Missing/inconsistent'>}

In [5]:
Clients = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
Clients

{<B3 'Opiate'>, <J3 'Total'>, <F3 'Non-opiate and Alcohol'>, <H3 'Alcohol only'>, <D3 'Non-opiate only'>}

In [6]:
MeasureType = tab.excel_ref('B4').expand(RIGHT).is_not_blank()
MeasureType

{<B4 'n'>, <F4 'n'>, <G4 '%'>, <D4 'n'>, <H4 'n'>, <E4 '%'>, <K4 '%'>, <I4 '%'>, <J4 'n'>, <C4 '%'>}

In [7]:
Dimensions = [
            HDim(status,'Injecting Status',DIRECTLY,LEFT),
            HDim(Clients,'Clients in new treatment',CLOSEST,LEFT),
            HDim(MeasureType,'Measure Type',DIRECTLY,ABOVE),
            HDimConst('Unit','People')            
            ]

In [8]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [9]:
new_table = c1.topandas()
new_table

NoLookupError: No header found for <B11 41178.0>

In [None]:
new_table = new_table[new_table['OBS'] != 0 ]

In [None]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [None]:
new_table.head()

In [None]:
new_table['Measure Type'] = new_table['Measure Type'].map(
    lambda x: {
        'n' : 'Count', 
        '%' : 'Percentage',
        }.get(x, x))

In [None]:
new_table.tail()

In [None]:
new_table.dtypes

In [None]:
new_table['Value'] = new_table['Value'].astype(str)

In [None]:
new_table.head(3)

In [None]:
new_table['Clients in new treatment'] = new_table['Clients in new treatment'].map(
    lambda x: {
        'Total' : 'All Clients' 
        }.get(x, x))

In [None]:
new_table.head(3)

In [None]:
new_table['Injecting Status'].unique()

In [None]:
new_table['Injecting Status'] = new_table['Injecting Status'].map(
    lambda x: {
        'Total' : 'All' 
        }.get(x, x))

In [None]:
def user_perc(x):
    
    if x == None:
        return 'All inclusice Inconsistent/missing'
    else:
        return x
    
new_table['Injecting Status'] = new_table.apply(lambda row: user_perc(row['Injecting Status']), axis = 1)

In [None]:
new_table['Injecting Status'] =  'Injecting Status/' + new_table['Injecting Status']

In [None]:
new_table['Clients in new treatment'] = 'New clients/' + new_table['Clients in new treatment']

In [None]:
new_table.columns = ['Clients in treatment' if x=='Clients in new treatment' else x for x in new_table.columns]
new_table['Period'] = '2017-18'
new_table.columns = ['Basis of treatment' if x=='Injecting Status' else x for x in new_table.columns]
new_table['Substance'] = 'All'
new_table = new_table[['Period','Basis of treatment','Substance','Clients in treatment','Measure Type','Value','Unit']]

In [None]:
new_table.head()