Trends in age group and presenting substances among all clients in treatment

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    inputFile = sourceFolder / 'AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='Trends all clients in treatment')[0]

Loading in\AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx which has size 272149 bytes
Table names: ['Trends all clients in treatment']


https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx

In [3]:
observations = tab.excel_ref('C6').expand(DOWN).expand(RIGHT).is_not_blank()
observations

{<K157 674.0>, <T33 385.0>, <W99 0.00545004128819158>, <Q90 0.0395378016398704>, <C138 70.0>, <K34 1575.0>, <Z103 15506.0>, <I47 1747.0>, <I34 1730.0>, <H163 0.202310000411032>, <I134 48920.0>, <J72 0.284395017793594>, <P45 7494.0>, <Q56 1.0>, <N91 4271.0>, <H94 0.391230858497944>, <V26 123.0>, <N104 4388.0>, <C84 538.0>, <G29 9525.0>, <V117 260.0>, <N88 8260.0>, <N42 15695.0>, <H156 0.0549916341327384>, <K147 51740.0>, <K167 5293.0>, <L102 0.0809900047596383>, <K32 19324.0>, <Y160 1.0>, <X52 106.0>, <V160 7718.0>, <E46 6422.0>, <S45 0.234397723680051>, <W137 0.0660501981505945>, <S82 1.0>, <D99 0.0147880973850316>, <F26 0.067444613563335>, <I131 759.0>, <W85 0.0430469144106272>, <D26 0.0347388294524858>, <L7 0.213245566899516>, <H125 0.0225535289452815>, <J118 0.0159760076956448>, <P79 367.0>, <T102 292.0>, <U144 0.0100318251003183>, <X123 508.0>, <J134 1.0>, <R112 361.0>, <C123 275.0>, <S137 0.160404191616766>, <E128 3675.0>, <K53 849.0>, <F7 0.214115114205941>, <W40 0.01590159015901

In [4]:
Period = tab.excel_ref('A5').expand(DOWN).is_not_blank() - tab.excel_ref('A174').expand(DOWN)
Period

{<A5 '2005-06'>, <A148 '2016-17'>, <A70 '2010-11'>, <A57 '2009-10'>, <A122 '2014-15'>, <A31 '2007-08'>, <A161 '2017-18'>, <A109 '2013-14'>, <A96 '2012-13'>, <A135 '2015-16'>, <A83 '2011-12'>, <A18 '2006-07'>, <A44 '2008-09'>}

In [5]:
substance = tab.excel_ref('B5').expand(DOWN).is_not_blank()- tab.excel_ref('B174').expand(DOWN)
substance

{<B17 'Total number of individuals*'>, <B158 'Alcohol'>, <B149 'Opiate (not crack cocaine)'>, <B25 'Amphetamine (other than ecstasy)'>, <B146 'Alcohol'>, <B128 'Cocaine'>, <B39 'Benzodiazepine'>, <B41 'Alcohol'>, <B59 'Both opiate and crack cocaine'>, <B118 'Other'>, <B168 'Amphetamine (other than ecstasy)'>, <B130 'Benzodiazepine'>, <B71 'Opiate (not crack cocaine)'>, <B7 'Both opiate and crack cocaine'>, <B139 'Other drug use'>, <B166 'Cannabis'>, <B87 'Other drug use'>, <B76 'Cocaine'>, <B93 'Alcohol'>, <B34 'Crack cocaine (not opiate)'>, <B5 'Opiate and/or crack cocaine use'>, <B173 'Total number of individuals*'>, <B121 'Total number of individuals*'>, <B58 'Opiate (not crack cocaine)'>, <B50 'Cocaine'>, <B112 'Crack cocaine (not opiate)'>, <B161 'Opiate and/or crack cocaine use'>, <B142 'Amphetamine (other than ecstasy)'>, <B124 'Both opiate and crack cocaine'>, <B154 'Cocaine'>, <B86 'Crack cocaine (not opiate)'>, <B91 'Benzodiazepine'>, <B132 'Alcohol'>, <B53 'Other'>, <B143 'B

In [6]:
age = tab.excel_ref('C3').expand(RIGHT).is_not_blank() 
age

{<G3 '25-29'>, <P3 '45-49'>, <K3 '35-39'>, <R3 '50-54'>, <X3 '65+'>, <Z3 'Total'>, <T3 '55-59'>, <E3 '20-24'>, <C3 '18-19'>, <V3 '60-64'>, <N3 '40-44'>, <I3 '30-34'>}

In [7]:
measuretype = tab.excel_ref('C4').expand(RIGHT).is_not_blank() 
measuretype

{<R4 'n'>, <H4 '%'>, <U4 '%'>, <I4 'n'>, <N4 'n'>, <D4 '%'>, <Q4 '%'>, <E4 'n'>, <X4 'n'>, <J4 '%'>, <T4 'n'>, <F4 '%'>, <AA4 '%'>, <K4 'n'>, <P4 'n'>, <W4 '%'>, <G4 'n'>, <Z4 'n'>, <S4 '%'>, <C4 'n'>, <V4 'n'>, <L4 '%'>, <Y4 '%'>, <O4 '%'>}

In [8]:
Dimensions = [
            HDim(age,'Age',CLOSEST,LEFT),
            HDim(measuretype,'Measure Type',DIRECTLY,ABOVE),
            HDim(substance, 'Substance',DIRECTLY,LEFT),
            HDim(Period, 'Period',CLOSEST,ABOVE),
            HDimConst('Unit','People')            
            ]

In [9]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [10]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Age,Measure Type,Substance,Period,Unit
0,1707.000000,18-19,n,Opiate (not crack cocaine),2005-06,People
1,0.240898,18-19,%,Opiate (not crack cocaine),2005-06,People
2,13774.000000,20-24,n,Opiate (not crack cocaine),2005-06,People
3,0.463350,20-24,%,Opiate (not crack cocaine),2005-06,People
4,23100.000000,25-29,n,Opiate (not crack cocaine),2005-06,People
5,0.527795,25-29,%,Opiate (not crack cocaine),2005-06,People
6,22869.000000,30-34,n,Opiate (not crack cocaine),2005-06,People
7,0.509355,30-34,%,Opiate (not crack cocaine),2005-06,People
8,17177.000000,35-39,n,Opiate (not crack cocaine),2005-06,People
9,0.461499,35-39,%,Opiate (not crack cocaine),2005-06,People


In [11]:
new_table = new_table[new_table['OBS'] != 0 ]

In [12]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [13]:
new_table.head()

Unnamed: 0,Value,Age,Measure Type,Substance,Period,Unit
0,1707.0,18-19,n,Opiate (not crack cocaine),2005-06,People
1,0.240898,18-19,%,Opiate (not crack cocaine),2005-06,People
2,13774.0,20-24,n,Opiate (not crack cocaine),2005-06,People
3,0.46335,20-24,%,Opiate (not crack cocaine),2005-06,People
4,23100.0,25-29,n,Opiate (not crack cocaine),2005-06,People


In [14]:
new_table['Measure Type'].unique()

array(['n', '%'], dtype=object)

In [15]:
new_table['Measure Type'] = new_table['Measure Type'].map(
    lambda x: {
        'n' : 'Count',
        '%'     : 'Percentage'
        }.get(x, x))

In [16]:
new_table.tail()

Unnamed: 0,Value,Age,Measure Type,Substance,Period,Unit
3115,1.0,60-64,Percentage,Total number of individuals*,2017-18,People
3116,5600.0,65+,Count,Total number of individuals*,2017-18,People
3117,1.0,65+,Percentage,Total number of individuals*,2017-18,People
3118,268390.0,Total,Count,Total number of individuals*,2017-18,People
3119,1.0,Total,Percentage,Total number of individuals*,2017-18,People


In [17]:
new_table.dtypes

Value           float64
Age              object
Measure Type     object
Substance        object
Period           object
Unit             object
dtype: object

In [18]:
new_table['Value'] = new_table['Value'].astype(str)

In [19]:
new_table.head(3)

Unnamed: 0,Value,Age,Measure Type,Substance,Period,Unit
0,1707.0,18-19,Count,Opiate (not crack cocaine),2005-06,People
1,0.240897544453853,18-19,Percentage,Opiate (not crack cocaine),2005-06,People
2,13774.0,20-24,Count,Opiate (not crack cocaine),2005-06,People


In [20]:
new_table['Period'] = new_table['Period'].map(
    lambda x: {
        'Total' : 'All years'
       }.get(x, x))

In [21]:
new_table.tail()

Unnamed: 0,Value,Age,Measure Type,Substance,Period,Unit
3115,1.0,60-64,Percentage,Total number of individuals*,2017-18,People
3116,5600.0,65+,Count,Total number of individuals*,2017-18,People
3117,1.0,65+,Percentage,Total number of individuals*,2017-18,People
3118,268390.0,Total,Count,Total number of individuals*,2017-18,People
3119,1.0,Total,Percentage,Total number of individuals*,2017-18,People


In [22]:
new_table['Substance'] = new_table['Substance'].str.rstrip('*')

In [23]:
new_table.head()

Unnamed: 0,Value,Age,Measure Type,Substance,Period,Unit
0,1707.0,18-19,Count,Opiate (not crack cocaine),2005-06,People
1,0.240897544453853,18-19,Percentage,Opiate (not crack cocaine),2005-06,People
2,13774.0,20-24,Count,Opiate (not crack cocaine),2005-06,People
3,0.463349816664985,20-24,Percentage,Opiate (not crack cocaine),2005-06,People
4,23100.0,25-29,Count,Opiate (not crack cocaine),2005-06,People


In [24]:
new_table['Clients in treatment'] = 'All clients in treatment'

In [25]:
new_table = new_table[['Period','Age','Substance','Clients in treatment','Measure Type','Value','Unit']]

In [26]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('Trendsallclientsintreatment.csv'), index = False)