Table 7.1.1: Trends in numbers in treatment

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    inputFile = sourceFolder / 'AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='Table 7.1.1')[0]

Loading in\AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx which has size 272149 bytes
Table names: ['Table 7.1.1']


https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx

In [3]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()
observations

{<K11 1.0>, <E9 0.0787924290990063>, <K9 1.0>, <D12 23975.0>, <C5 0.64831966494774>, <G10 0.091336569579288>, <D8 27186.0>, <K7 1.0>, <J9 311667.0>, <F14 28128.0>, <E11 0.0767179076327341>, <J7 265832.0>, <K5 1.0>, <B16 146536.0>, <D6 28777.0>, <B8 170005.0>, <D9 24557.0>, <C13 0.516161937312879>, <J13 301944.0>, <B9 170032.0>, <D16 24561.0>, <J5 216802.0>, <E6 0.11908988954689>, <I15 0.294398687176078>, <F17 27684.0>, <F16 28242.0>, <C11 0.5422362425517>, <J12 297105.0>, <H5 35221.0>, <I11 0.288471617178242>, <G7 0.0855465105781095>, <H8 78658.0>, <G15 0.0975858857580069>, <I7 0.205754010051461>, <F10 28223.0>, <D14 25025.0>, <J14 295224.0>, <I5 0.162456988404166>, <J11 299565.0>, <F15 28187.0>, <E12 0.0806953770552498>, <H13 91651.0>, <F11 27732.0>, <J15 288843.0>, <F12 27627.0>, <D15 25814.0>, <E8 0.0893074777683971>, <J10 309000.0>, <E15 0.0893703499825165>, <C17 0.526059093110772>, <D17 23730.0>, <B11 162435.0>, <H16 80454.0>, <I12 0.294656771175174>, <H10 88020.0>, <J16 279793.0>

In [4]:
period = tab.excel_ref('A5').expand(DOWN).is_not_blank() 
period

{<A7 '2007-08'>, <A5 '2005-06'>, <A16 '2016-17'>, <A14 '2014-15'>, <A15 '2015-16'>, <A8 '2008-09'>, <A9 '2009-10'>, <A13 '2013-14'>, <A6 '2006-07'>, <A12 '2012-13'>, <A11 '2011-12'>, <A17 '2017-18'>, <A10 '2010-11'>}

In [5]:
measuretype = tab.excel_ref('B4').expand(RIGHT).is_not_blank() 
measuretype

{<D4 'n'>, <G4 '%'>, <I4 '%'>, <B4 'n'>, <H4 'n'>, <K4 '%'>, <J4 'n'>, <F4 'n'>, <C4 '%'>, <E4 '%'>}

In [6]:
clients = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
clients

{<D3 'Non-opiate only'>, <F3 'Non-opiate and Alcohol'>, <B3 'Opiate'>, <J3 'Total'>, <H3 'Alcohol only'>}

In [7]:
Dimensions = [
            HDim(clients,'Clients in treatment',CLOSEST,LEFT),
            HDim(period,'Period',DIRECTLY,LEFT),
            HDim(measuretype,'Measure Type',DIRECTLY,ABOVE),
            HDimConst('Unit','People')            
            ]

In [8]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [9]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Clients in treatment,Period,Measure Type,Unit
0,140557.000000,Opiate,2005-06,n,People
1,0.648320,Opiate,2005-06,%,People
2,26287.000000,Non-opiate only,2005-06,n,People
3,0.121249,Non-opiate only,2005-06,%,People
4,14737.000000,Non-opiate and Alcohol,2005-06,n,People
5,0.067974,Non-opiate and Alcohol,2005-06,%,People
6,35221.000000,Alcohol only,2005-06,n,People
7,0.162457,Alcohol only,2005-06,%,People
8,216802.000000,Total,2005-06,n,People
9,1.000000,Total,2005-06,%,People


In [10]:
new_table = new_table[new_table['OBS'] != 0 ]

In [11]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [12]:
new_table.head()

Unnamed: 0,Value,Clients in treatment,Period,Measure Type,Unit
0,140557.0,Opiate,2005-06,n,People
1,0.64832,Opiate,2005-06,%,People
2,26287.0,Non-opiate only,2005-06,n,People
3,0.121249,Non-opiate only,2005-06,%,People
4,14737.0,Non-opiate and Alcohol,2005-06,n,People


In [13]:
new_table['Measure Type'] = new_table['Measure Type'].map(
    lambda x: {
        '%' : 'Percentage',
        'n'  : 'Count'
        }.get(x, x))

In [14]:
new_table.tail()

Unnamed: 0,Value,Clients in treatment,Period,Measure Type,Unit
125,0.103148,Non-opiate and Alcohol,2017-18,Percentage,People
126,75787.0,Alcohol only,2017-18,Count,People
127,0.282376,Alcohol only,2017-18,Percentage,People
128,268390.0,Total,2017-18,Count,People
129,1.0,Total,2017-18,Percentage,People


In [15]:
new_table['Clients in treatment'] = new_table['Clients in treatment'].map(
    lambda x: {
        'Total' : 'All Clients'
        }.get(x, x))

In [16]:
new_table.dtypes

Value                   float64
Clients in treatment     object
Period                   object
Measure Type             object
Unit                     object
dtype: object

In [17]:
new_table['Value'] = new_table['Value'].astype(str)

In [18]:
new_table.head(3)

Unnamed: 0,Value,Clients in treatment,Period,Measure Type,Unit
0,140557.0,Opiate,2005-06,Count,People
1,0.64831966494774,Opiate,2005-06,Percentage,People
2,26287.0,Non-opiate only,2005-06,Count,People


In [19]:
new_table.tail()

Unnamed: 0,Value,Clients in treatment,Period,Measure Type,Unit
125,0.103148403442751,Non-opiate and Alcohol,2017-18,Percentage,People
126,75787.0,Alcohol only,2017-18,Count,People
127,0.282376392563061,Alcohol only,2017-18,Percentage,People
128,268390.0,All Clients,2017-18,Count,People
129,1.0,All Clients,2017-18,Percentage,People


In [20]:
new_table = new_table[['Period','Clients in treatment','Measure Type','Value','Unit']]

In [21]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('table7.1.1.csv'), index = False)