Table 4.9.2: Age and presenting substance of new presentations to treatment

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    inputFile = sourceFolder / 'AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='Table 4.9.2')[0]

Loading in\AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx which has size 272149 bytes
Table names: ['Table 4.9.2']


https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx

In [3]:
observations = tab.excel_ref('C6').expand(DOWN).expand(RIGHT).is_not_blank()
observations

{<C12 0.0344112769485904>, <Y12 3685.0>, <Y13 4057.0>, <W16 2857.0>, <Z12 0.0289457767444053>, <R14 0.00720992622401073>, <E16 0.449115580324691>, <C14 0.0107794361525705>, <C13 0.0427031509121061>, <F6 2094.0>, <G12 0.0354562031471601>, <V8 0.00760249796361662>, <Z17 1.0>, <R8 0.02280348759222>, <Z6 0.147415303164791>, <T8 0.0199617172545803>, <M11 1708.0>, <K16 0.510018467220683>, <O6 2453.0>, <J10 3725.0>, <V6 0.0581048058647841>, <M17 18094.0>, <F10 4544.0>, <E7 0.090380421613763>, <G17 1.0>, <N17 1.0>, <T13 0.0125786163522013>, <W6 140.0>, <G7 0.16471938947795>, <K14 0.0102031394275162>, <P11 0.067613460944736>, <J11 3010.0>, <E14 0.0144172522413375>, <P12 0.0245137388082742>, <S12 105.0>, <D14 119.0>, <H16 9874.0>, <X7 0.013724864347271>, <F8 759.0>, <N11 0.0943959323532663>, <J17 21660.0>, <X14 0.016597510373444>, <M10 2568.0>, <G16 0.491119065306949>, <O8 418.0>, <U12 30.0>, <Y8 4301.0>, <D12 241.0>, <J6 4297.0>, <U7 133.0>, <W8 6.0>, <M16 10695.0>, <F12 525.0>, <U17 3683.0>, <

In [4]:
Substance = tab.excel_ref('A5').expand(DOWN).is_not_blank() - tab.excel_ref('A19').expand(DOWN)
Substance

{<A12 'Amphetamine (other than ecstasy)'>, <A5 'Opiate and/or crack cocaine use'>, <A9 'Other drug use'>, <A6 'Opiate (not crack cocaine)'>, <A8 'Crack cocaine (not opiate)'>, <A7 'Both opiate and crack cocaine'>, <A14 'Other'>, <A17 'Total number of individuals*'>, <A10 'Cannabis'>, <A13 'Benzodiazepine'>, <A16 'Alcohol'>, <A11 'Cocaine'>, <A15 'Alcohol'>}

In [5]:
Clients = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
Clients

{<F3 '25-29'>, <U3 '60-64'>, <J3 '35-39'>, <Y3 'Total'>, <H3 '30-34'>, <M3 '40-44'>, <O3 '45-49'>, <B3 '18-19'>, <D3 '20-24'>, <Q3 '50-54'>, <W3 '65+'>, <S3 '55-59'>}

In [6]:
MeasureType = tab.excel_ref('B4').expand(RIGHT).is_not_blank()
MeasureType

{<M4 'n'>, <B4 'n'>, <U4 'n'>, <Z4 '%'>, <T4 '%'>, <Q4 'n'>, <P4 '%'>, <H4 'n'>, <E4 '%'>, <X4 '%'>, <J4 'n'>, <O4 'n'>, <D4 'n'>, <W4 'n'>, <I4 '%'>, <Y4 'n'>, <C4 '%'>, <S4 'n'>, <G4 '%'>, <R4 '%'>, <N4 '%'>, <F4 'n'>, <K4 '%'>, <V4 '%'>}

In [7]:
Dimensions = [
            HDim(Substance,'Substance',DIRECTLY,LEFT),
            HDim(Clients,'Age of clients in new treatment',CLOSEST,LEFT),
            HDim(MeasureType,'Measure Type',DIRECTLY,ABOVE),
            HDimConst('Unit','People')            
            ]

In [8]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [9]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Substance,Age of clients in new treatment,Measure Type,Unit
0,0.039801,Opiate (not crack cocaine),18-19,%,People
1,791.000000,Opiate (not crack cocaine),20-24,n,People
2,0.095832,Opiate (not crack cocaine),20-24,%,People
3,2094.000000,Opiate (not crack cocaine),25-29,n,People
4,0.141420,Opiate (not crack cocaine),25-29,%,People
5,3390.000000,Opiate (not crack cocaine),30-34,n,People
6,0.170979,Opiate (not crack cocaine),30-34,%,People
7,4297.000000,Opiate (not crack cocaine),35-39,n,People
8,0.198384,Opiate (not crack cocaine),35-39,%,People
9,3311.000000,Opiate (not crack cocaine),40-44,n,People


In [10]:
new_table = new_table[new_table['OBS'] != 0 ]

In [11]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [12]:
new_table.head()

Unnamed: 0,Value,Substance,Age of clients in new treatment,Measure Type,Unit
0,0.039801,Opiate (not crack cocaine),18-19,%,People
1,791.0,Opiate (not crack cocaine),20-24,n,People
2,0.095832,Opiate (not crack cocaine),20-24,%,People
3,2094.0,Opiate (not crack cocaine),25-29,n,People
4,0.14142,Opiate (not crack cocaine),25-29,%,People


In [13]:
new_table['Measure Type'] = new_table['Measure Type'].map(
    lambda x: {
        'n' : 'Count', 
        '%' : 'Percentage',
        }.get(x, x))

In [14]:
new_table.tail()

Unnamed: 0,Value,Substance,Age of clients in new treatment,Measure Type,Unit
225,1.0,Total number of individuals*,60-64,Percentage,People
226,3133.0,Total number of individuals*,65+,Count,People
227,1.0,Total number of individuals*,65+,Percentage,People
228,127307.0,Total number of individuals*,Total,Count,People
229,1.0,Total number of individuals*,Total,Percentage,People


In [15]:
new_table.dtypes

Value                              float64
Substance                           object
Age of clients in new treatment     object
Measure Type                        object
Unit                                object
dtype: object

In [16]:
new_table['Value'] = new_table['Value'].astype(str)

In [17]:
new_table.head(3)

Unnamed: 0,Value,Substance,Age of clients in new treatment,Measure Type,Unit
0,0.0398009950248756,Opiate (not crack cocaine),18-19,Percentage,People
1,791.0,Opiate (not crack cocaine),20-24,Count,People
2,0.0958323237218318,Opiate (not crack cocaine),20-24,Percentage,People


In [18]:
new_table['Age of clients in new treatment'] = new_table['Age of clients in new treatment'].map(
    lambda x: {
        'Total' : 'All years' 
        }.get(x, x))

In [19]:
new_table['Substance'] = new_table['Substance'].str.rstrip('*')

In [20]:
new_table.head(3)

Unnamed: 0,Value,Substance,Age of clients in new treatment,Measure Type,Unit
0,0.0398009950248756,Opiate (not crack cocaine),18-19,Percentage,People
1,791.0,Opiate (not crack cocaine),20-24,Count,People
2,0.0958323237218318,Opiate (not crack cocaine),20-24,Percentage,People


In [21]:
new_table = new_table[['Substance','Age of clients in new treatment','Measure Type','Value','Unit']]

In [22]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('table4.9.2.csv'), index = False)