Table 4.2.1: Age of all clients in treatment

In [1]:
from gssutils import *

if is_interactive():
    scraper = Scraper('https://www.gov.uk/government/collections/alcohol-and-drug-misuse-and-treatment-statistics')
    scraper.select_dataset(title=lambda x: x.startswith('Substance misuse treatment for adults'), latest=True)
    tabs = {tab.name: tab for tab in scraper.distribution(title=lambda x: x.startswith('Data tables')).as_databaker()}

In [2]:
tab = tabs['Table 4.2.1']

https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx

In [3]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()

In [4]:
observations

{<H17 1605.0>, <B6 184.0>, <I11 0.143560241202317>, <J14 15524.0>, <B14 5784.0>, <I18 1.0>, <C5 0.000821593750221335>, <C14 0.0409663642351741>, <C17 0.00165027020518596>, <F9 5106.0>, <C15 0.0166443561467253>, <G13 0.0639358474208929>, <E16 0.00476190476190476>, <F8 4698.0>, <I14 0.111668228060221>, <F11 3288.0>, <K6 0.00622974030328999>, <B5 116.0>, <D10 3234.0>, <H15 4761.0>, <B7 2772.0>, <J15 7571.0>, <I5 0.00269175452254344>, <C18 1.0>, <K5 0.00741085733447595>, <J5 1989.0>, <E15 0.00695322376738306>, <J18 268390.0>, <K11 0.168124743842915>, <G10 0.160056350238405>, <J7 12079.0>, <H10 9369.0>, <K18 1.0>, <B18 141189.0>, <B15 2350.0>, <B9 21067.0>, <H13 11602.0>, <C16 0.00602029903179426>, <K7 0.0450054025857893>, <B8 10114.0>, <E9 0.182764433206911>, <F6 483.0>, <H8 4516.0>, <H9 7184.0>, <J8 24329.0>, <K15 0.0282089496628041>, <G18 1.0>, <C9 0.149211340826835>, <G6 0.0174469007368877>, <D14 408.0>, <H12 12624.0>, <D15 165.0>, <C11 0.205171791003548>, <K9 0.140444874995343>, <I16 0

In [5]:
Age = tab.excel_ref('A5').expand(DOWN).is_not_blank()
Age

{<A11 '40-44'>, <A15 '60-64'>, <A9 '30-34'>, <A14 '55-59'>, <A16 '65-69'>, <A5 '18'>, <A7 '20-24'>, <A10 '35-39'>, <A6 '19'>, <A8 '25-29'>, <A13 '50-54'>, <A21 '*Percentages may equal 0% or not sum to 100% due to rounding'>, <A17 '70+'>, <A12 '45-49'>, <A18 'Total'>}

In [6]:
Clients = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
Clients

{<F3 'Non-opiate and Alcohol'>, <H3 'Alcohol only'>, <B3 'Opiate'>, <D3 'Non-opiate only'>, <J3 'Total'>}

In [7]:
MeasureType = tab.excel_ref('B4').expand(RIGHT).is_not_blank()
MeasureType

{<H4 'n'>, <G4 '%'>, <K4 '%'>, <E4 '%'>, <B4 'n'>, <I4 '%'>, <C4 '%'>, <D4 'n'>, <J4 'n'>, <F4 'n'>}

In [8]:
Dimensions = [
            HDim(Age,'Age',DIRECTLY,LEFT),
            HDim(Clients,'Clients',CLOSEST,LEFT),
            HDim(MeasureType,'Measure Type',DIRECTLY,ABOVE),
            HDimConst('Unit','People')            
            ]

In [9]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [10]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Age,Clients,Measure Type,Unit
0,116.000000,18,Opiate,n,People
1,0.000822,18,Opiate,%,People
2,948.000000,18,Non-opiate only,n,People
3,0.039949,18,Non-opiate only,%,People
4,721.000000,18,Non-opiate and Alcohol,n,People
5,0.026044,18,Non-opiate and Alcohol,%,People
6,204.000000,18,Alcohol only,n,People
7,0.002692,18,Alcohol only,%,People
8,1989.000000,18,Total,n,People
9,0.007411,18,Total,%,People


In [11]:
new_table = new_table[new_table['OBS'] != 0 ]

In [12]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [13]:
new_table['Measure Type'] = new_table['Measure Type'].map(
    lambda x: {
        'n' : 'Count', 
        '%' : 'Percentage',
        }.get(x, x))

In [14]:
new_table.head()

Unnamed: 0,Value,Age,Clients,Measure Type,Unit
0,116.0,18,Opiate,Count,People
1,0.000822,18,Opiate,Percentage,People
2,948.0,18,Non-opiate only,Count,People
3,0.039949,18,Non-opiate only,Percentage,People
4,721.0,18,Non-opiate and Alcohol,Count,People


In [15]:
new_table.dtypes

Value           float64
Age              object
Clients          object
Measure Type     object
Unit             object
dtype: object

In [16]:
new_table['Value'] = new_table['Value'].astype(str)

In [17]:
new_table.head(3)

Unnamed: 0,Value,Age,Clients,Measure Type,Unit
0,116.0,18,Opiate,Count,People
1,0.0008215937502213,18,Opiate,Percentage,People
2,948.0,18,Non-opiate only,Count,People


In [18]:
new_table['Age'] = 'Ag/' + new_table['Age']

In [19]:
new_table['Age'] = new_table['Age'].map(
    lambda x: {
        'Total' : 'Ag/All years' 
        }.get(x, x))

In [20]:
new_table['Clients'] = new_table['Clients'].map(
    lambda x: {
        'Total' : 'All Clients' 
        }.get(x, x))

In [21]:
new_table.columns = ['Clients in treatment' if x=='Clients' else x for x in new_table.columns]
new_table['Period'] = '2017-18'
new_table.columns = ['Basis of treatment' if x=='Age' else x for x in new_table.columns]
new_table['Substance'] = 'All'
new_table = new_table[['Period','Basis of treatment','Substance','Clients in treatment','Measure Type','Value','Unit']]