Table 7.2.1: New treatment presentations by year for clients under 25

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    inputFile = sourceFolder / 'AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='Table 7.2.1')[0]

Loading in\AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx which has size 272149 bytes
Table names: ['Table 7.2.1']


https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx

In [3]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()
observations

{<D21 0.0163197729422895>, <G13 0.444832008207233>, <J12 8188.0>, <K13 0.519748906756947>, <N19 0.040061765462812>, <N14 3113.0>, <G24 0.468735573223904>, <C21 0.0151715039577836>, <I19 0.0371194802069546>, <B21 0.0229760841986562>, <F7 2581.0>, <N21 0.0152697949729776>, <C12 7147.0>, <I9 409.0>, <F15 0.209677419354839>, <H13 0.483048472961614>, <H7 1742.0>, <B18 1104.0>, <D6 0.231740775780511>, <O17 0.0303768985561598>, <O19 0.0440652540783799>, <E7 3353.0>, <B8 0.181553352440915>, <B16 1432.0>, <N6 0.0947070429784679>, <C10 0.0571341587172722>, <O20 145.0>, <I12 8399.0>, <D10 0.0564333017975402>, <H24 0.480190529560101>, <J17 0.0882810071495182>, <N5 1104.0>, <I17 0.0987847431115389>, <M20 298.0>, <G19 0.04688381636317>, <K24 0.443645083932854>, <M6 0.106038848159625>, <O9 417.0>, <N17 0.0425495410482972>, <C5 5884.0>, <M14 3137.0>, <I18 617.0>, <K19 0.0377345182677387>, <J19 0.0339446689462232>, <D17 0.0748344370860927>, <M23 5779.0>, <O8 0.0780048753047065>, <M10 0.0212380016627617

In [4]:
period = tab.excel_ref('A5').expand(DOWN).is_not_blank() 
period

{<A12 'Cannabis'>, <A11 'Other drug use'>, <A27 '*The number of individuals will be less than the total of the reported substances as an individual may present with more than one problematic substance '>, <A9 'Crack cocaine (not opiate)'>, <A7 'Both opiate and crack cocaine'>, <A16 'Amphetamine (other than ecstasy)'>, <A28 '**Other includes all citations for other substances not specifically stated in the table above'>, <A23 'Alcohol'>, <A20 'Other'>, <A14 'Cocaine'>, <A22 'Alcohol'>, <A25 'Total number of individuals*'>, <A18 'Benzodiazepine'>, <A29 '**Percentages may equal 0% or not sum to 100% due to rounding'>, <A5 'Opiate (not crack cocaine)'>}

In [5]:
measuretype = tab.excel_ref('B4').expand(RIGHT).is_not_blank() 
measuretype

set()

In [6]:
clients = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
clients

{<D3 '2007-08'>, <B3 '2005-06'>, <K3 '2014-15'>, <G3 '2010-11'>, <J3 '2013-14'>, <E3 '2008-09'>, <N3 '2016-17'>, <H3 '2011-12'>, <F3 '2009-10'>, <I3 '2012-13'>, <C3 '2006-07'>, <M3 '2015-16'>, <O3 '2017-18'>}

In [7]:
Dimensions = [
            HDim(clients,'Clients in treatment',CLOSEST,LEFT),
            HDim(period,'Period',DIRECTLY,LEFT),
            HDim(measuretype,'Measure Type',DIRECTLY,ABOVE),
            HDimConst('Unit','People')            
            ]

In [8]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [9]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Clients in treatment,Period,Measure Type,Unit
0,7487.000000,2005-06,Opiate (not crack cocaine),,People
1,5884.000000,2006-07,Opiate (not crack cocaine),,People
2,4899.000000,2007-08,Opiate (not crack cocaine),,People
3,4456.000000,2008-09,Opiate (not crack cocaine),,People
4,4279.000000,2009-10,Opiate (not crack cocaine),,People
5,3347.000000,2010-11,Opiate (not crack cocaine),,People
6,2633.000000,2011-12,Opiate (not crack cocaine),,People
7,2164.000000,2012-13,Opiate (not crack cocaine),,People
8,1951.000000,2013-14,Opiate (not crack cocaine),,People
9,1718.000000,2014-15,Opiate (not crack cocaine),,People


In [10]:
new_table = new_table[new_table['OBS'] != 0 ]

In [11]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [12]:
new_table.head()

Unnamed: 0,Value,Clients in treatment,Period,Measure Type,Unit
0,7487.0,2005-06,Opiate (not crack cocaine),,People
1,5884.0,2006-07,Opiate (not crack cocaine),,People
2,4899.0,2007-08,Opiate (not crack cocaine),,People
3,4456.0,2008-09,Opiate (not crack cocaine),,People
4,4279.0,2009-10,Opiate (not crack cocaine),,People


In [13]:
new_table['Measure Type'] = new_table['Measure Type'].map(
    lambda x: {
        '%' : 'Percentage',
        'n'  : 'Count'
        }.get(x, x))

In [14]:
new_table.tail()

Unnamed: 0,Value,Clients in treatment,Period,Measure Type,Unit
242,16085.0,2013-14,Total number of individuals*,,People
243,14178.0,2014-15,Total number of individuals*,,People
244,13231.0,2015-16,Total number of individuals*,,People
245,11657.0,2016-17,Total number of individuals*,,People
246,10666.0,2017-18,Total number of individuals*,,People


In [15]:
new_table['Clients in treatment'] = new_table['Clients in treatment'].map(
    lambda x: {
        'Total' : 'All Clients'
        }.get(x, x))

In [16]:
new_table.dtypes

Value                   float64
Clients in treatment     object
Period                   object
Measure Type             object
Unit                     object
dtype: object

In [17]:
new_table['Value'] = new_table['Value'].astype(str)

In [18]:
new_table.head(3)

Unnamed: 0,Value,Clients in treatment,Period,Measure Type,Unit
0,7487.0,2005-06,Opiate (not crack cocaine),,People
1,5884.0,2006-07,Opiate (not crack cocaine),,People
2,4899.0,2007-08,Opiate (not crack cocaine),,People


In [19]:
new_table.tail()

Unnamed: 0,Value,Clients in treatment,Period,Measure Type,Unit
242,16085.0,2013-14,Total number of individuals*,,People
243,14178.0,2014-15,Total number of individuals*,,People
244,13231.0,2015-16,Total number of individuals*,,People
245,11657.0,2016-17,Total number of individuals*,,People
246,10666.0,2017-18,Total number of individuals*,,People


In [20]:
new_table = new_table[['Period','Clients in treatment','Measure Type','Value','Unit']]

In [21]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('table7.2.1.csv'), index = False)