Trends in age group and presenting substances among new presentations to treatment

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    inputFile = sourceFolder / 'AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='Trends new presentations')[0]

Loading in\AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx which has size 272149 bytes
Table names: ['Trends new presentations']


https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx

In [3]:
observations = tab.excel_ref('C6').expand(DOWN).expand(RIGHT).is_not_blank()
observations

{<E102 2942.0>, <V138 29.0>, <L59 0.177408498106857>, <Z63 15229.0>, <F129 0.0845685186869146>, <Q21 0.0392737054472091>, <L66 0.00837189734960034>, <H73 0.037010540372967>, <V117 60.0>, <O6 0.285916784791128>, <G125 512.0>, <C173 2412.0>, <E123 1537.0>, <V129 23.0>, <E108 12980.0>, <R34 116.0>, <L47 0.0390062111801242>, <F81 0.455433537694536>, <AA125 0.020367677167022>, <I142 1216.0>, <Q25 0.0404841963685272>, <H42 0.303696303696304>, <K95 21747.0>, <E151 317.0>, <D21 0.0483975048397505>, <Y167 0.004149377593361>, <X144 70.0>, <R105 67.0>, <E162 791.0>, <F116 0.0783908411512164>, <P59 1316.0>, <R112 198.0>, <X133 3396.0>, <O11 0.0777953806387059>, <D37 0.26027656753173>, <H33 0.265603961256135>, <AA104 0.0428396485682145>, <X173 3133.0>, <G34 1129.0>, <X123 101.0>, <R150 1266.0>, <J82 1.0>, <G90 1306.0>, <L162 0.198384118190212>, <Y33 0.00838769804287046>, <U24 0.0292326431181486>, <T16 1992.0>, <Z98 18181.0>, <S108 1.0>, <O66 0.00757975533701127>, <AA53 0.00979143232731166>, <I21 10

In [4]:
Period = tab.excel_ref('A5').expand(DOWN).is_not_blank() - tab.excel_ref('A174').expand(DOWN)
Period

{<A135 '2015-16'>, <A57 '2009-10'>, <A83 '2011-12'>, <A161 '2017-18'>, <A109 '2013-14'>, <A5 '2005-06'>, <A96 '2012-13'>, <A70 '2010-11'>, <A148 '2016-17'>, <A44 '2008-09'>, <A31 '2007-08'>, <A122 '2014-15'>, <A18 '2006-07'>}

In [5]:
substance = tab.excel_ref('B5').expand(DOWN).is_not_blank()- tab.excel_ref('B174').expand(DOWN)
substance

{<B58 'Opiate (not crack cocaine)'>, <B138 'Crack cocaine (not opiate)'>, <B37 'Cocaine'>, <B10 'Cannabis'>, <B56 'Total number of individuals*'>, <B120 'Alcohol'>, <B167 'Cocaine'>, <B9 'Other drug use'>, <B134 'Total number of individuals*'>, <B113 'Other drug use'>, <B130 'Benzodiazepine'>, <B84 'Opiate (not crack cocaine)'>, <B86 'Crack cocaine (not opiate)'>, <B26 'Benzodiazepine'>, <B76 'Cocaine'>, <B139 'Other drug use'>, <B169 'Benzodiazepine'>, <B54 'Alcohol'>, <B36 'Cannabis'>, <B148 'Opiate and/or crack cocaine use'>, <B29 'Alcohol'>, <B102 'Cocaine'>, <B135 'Opiate and/or crack cocaine use'>, <B123 'Opiate (not crack cocaine)'>, <B129 'Amphetamine (other than ecstasy)'>, <B52 'Benzodiazepine'>, <B74 'Other drug use'>, <B162 'Opiate (not crack cocaine)'>, <B19 'Opiate (not crack cocaine)'>, <B68 'Alcohol'>, <B91 'Benzodiazepine'>, <B11 'Cocaine'>, <B55 'Alcohol'>, <B21 'Crack cocaine (not opiate)'>, <B32 'Opiate (not crack cocaine)'>, <B69 'Total number of individuals*'>, <B

In [6]:
age = tab.excel_ref('C3').expand(RIGHT).is_not_blank() 
age

{<Z3 'Total'>, <T3 '55-59'>, <C3 '18-19'>, <P3 '45-49'>, <G3 '25-29'>, <R3 '50-54'>, <N3 '40-44'>, <K3 '35-39'>, <X3 '65+'>, <E3 '20-24'>, <V3 '60-64'>, <I3 '30-34'>}

In [7]:
measuretype = tab.excel_ref('C4').expand(RIGHT).is_not_blank() 
measuretype

{<F4 '%'>, <J4 '%'>, <S4 '%'>, <R4 'n'>, <C4 'n'>, <P4 'n'>, <X4 'n'>, <Q4 '%'>, <AA4 '%'>, <D4 '%'>, <O4 '%'>, <Z4 'n'>, <G4 'n'>, <N4 'n'>, <K4 'n'>, <W4 '%'>, <V4 'n'>, <Y4 '%'>, <H4 '%'>, <L4 '%'>, <E4 'n'>, <I4 'n'>, <U4 '%'>, <T4 'n'>}

In [8]:
Dimensions = [
            HDim(age,'Age',CLOSEST,LEFT),
            HDim(measuretype,'Measure Type',DIRECTLY,ABOVE),
            HDim(substance, 'Substance',DIRECTLY,LEFT),
            HDim(Period, 'Period',CLOSEST,ABOVE),
            HDimConst('Unit','People')            
            ]

In [9]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [10]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Age,Measure Type,Substance,Period,Unit
0,935.000000,18-19,n,Opiate (not crack cocaine),2005-06,People
1,0.203615,18-19,%,Opiate (not crack cocaine),2005-06,People
2,6552.000000,20-24,n,Opiate (not crack cocaine),2005-06,People
3,0.392547,20-24,%,Opiate (not crack cocaine),2005-06,People
4,10102.000000,25-29,n,Opiate (not crack cocaine),2005-06,People
5,0.450560,25-29,%,Opiate (not crack cocaine),2005-06,People
6,9245.000000,30-34,n,Opiate (not crack cocaine),2005-06,People
7,0.423985,30-34,%,Opiate (not crack cocaine),2005-06,People
8,6315.000000,35-39,n,Opiate (not crack cocaine),2005-06,People
9,0.349165,35-39,%,Opiate (not crack cocaine),2005-06,People


In [11]:
new_table = new_table[new_table['OBS'] != 0 ]

In [12]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [13]:
new_table.head()

Unnamed: 0,Value,Age,Measure Type,Substance,Period,Unit
0,935.0,18-19,n,Opiate (not crack cocaine),2005-06,People
1,0.203615,18-19,%,Opiate (not crack cocaine),2005-06,People
2,6552.0,20-24,n,Opiate (not crack cocaine),2005-06,People
3,0.392547,20-24,%,Opiate (not crack cocaine),2005-06,People
4,10102.0,25-29,n,Opiate (not crack cocaine),2005-06,People


In [14]:
new_table['Measure Type'].unique()

array(['n', '%'], dtype=object)

In [15]:
new_table['Measure Type'] = new_table['Measure Type'].map(
    lambda x: {
        'n' : 'Count',
        '%'     : 'Percentage'
        }.get(x, x))

In [16]:
new_table.tail()

Unnamed: 0,Value,Age,Measure Type,Substance,Period,Unit
3115,1.0,60-64,Percentage,Total number of individuals*,2017-18,People
3116,3133.0,65+,Count,Total number of individuals*,2017-18,People
3117,1.0,65+,Percentage,Total number of individuals*,2017-18,People
3118,127307.0,Total,Count,Total number of individuals*,2017-18,People
3119,1.0,Total,Percentage,Total number of individuals*,2017-18,People


In [17]:
new_table.dtypes

Value           float64
Age              object
Measure Type     object
Substance        object
Period           object
Unit             object
dtype: object

In [18]:
new_table['Value'] = new_table['Value'].astype(str)

In [19]:
new_table.head(3)

Unnamed: 0,Value,Age,Measure Type,Substance,Period,Unit
0,935.0,18-19,Count,Opiate (not crack cocaine),2005-06,People
1,0.203614982578397,18-19,Percentage,Opiate (not crack cocaine),2005-06,People
2,6552.0,20-24,Count,Opiate (not crack cocaine),2005-06,People


In [20]:
new_table['Period'] = new_table['Period'].map(
    lambda x: {
        'Total' : 'All years'
       }.get(x, x))

In [21]:
new_table.tail()

Unnamed: 0,Value,Age,Measure Type,Substance,Period,Unit
3115,1.0,60-64,Percentage,Total number of individuals*,2017-18,People
3116,3133.0,65+,Count,Total number of individuals*,2017-18,People
3117,1.0,65+,Percentage,Total number of individuals*,2017-18,People
3118,127307.0,Total,Count,Total number of individuals*,2017-18,People
3119,1.0,Total,Percentage,Total number of individuals*,2017-18,People


In [22]:
new_table['Substance'] = new_table['Substance'].str.rstrip('*')

In [23]:
new_table.head()

Unnamed: 0,Value,Age,Measure Type,Substance,Period,Unit
0,935.0,18-19,Count,Opiate (not crack cocaine),2005-06,People
1,0.203614982578397,18-19,Percentage,Opiate (not crack cocaine),2005-06,People
2,6552.0,20-24,Count,Opiate (not crack cocaine),2005-06,People
3,0.392546881552933,20-24,Percentage,Opiate (not crack cocaine),2005-06,People
4,10102.0,25-29,Count,Opiate (not crack cocaine),2005-06,People


In [24]:
new_table['Clients in treatment'] = 'All clients to new treatment'

In [25]:
new_table = new_table[['Period','Age','Substance','Clients in treatment','Measure Type','Value','Unit']]

In [26]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('Trendsnewpresentations.csv'), index = False)