Table 5.3.1: Clients retained to treatment for at least 12 weeks or completing treatment earlier

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    inputFile = sourceFolder / 'AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='Table 5.3.1')[0]

Loading in\AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx which has size 272149 bytes
Table names: ['Table 5.3.1']


https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx

In [3]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()
observations

{<B6 23730.0>, <B9 268390.0>, <B7 27684.0>, <C6 20048.0>, <B8 75787.0>, <D7 0.861580696431152>, <D9 0.915015462573121>, <C8 67897.0>, <C5 133784.0>, <D8 0.895892435378099>, <D5 0.947552571375957>, <B5 141189.0>, <C9 245581.0>, <C7 23852.0>, <D6 0.844837758112094>}

In [4]:
substance = tab.excel_ref('A5').expand(DOWN).is_not_blank() 
substance

{<A8 'Alcohol only'>, <A9 'Total'>, <A7 'Non-opiate and Alcohol'>, <A5 'Opiate'>, <A6 'Non-opiate only'>}

In [5]:
clients = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
clients

{<C3 'Number retained in treatment for at least 12 weeks or completing treatment earlier'>, <B3 'Number in contact with treatment services'>}

In [6]:
MeasureType = tab.excel_ref('B4').expand(RIGHT).is_not_blank()
MeasureType

{<D4 '%'>, <C4 'n'>, <B4 'n'>}

In [7]:
Dimensions = [
            HDim(clients,'Clients in treatment',CLOSEST,LEFT),
            HDim(substance,'Substane',DIRECTLY,LEFT),
            HDim(MeasureType,'Measure Type',DIRECTLY,ABOVE),
            HDimConst('Unit','People')            
            ]

In [8]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [9]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Clients in treatment,Substane,Measure Type,Unit
0,141189.0,Number in contact with treatment services,Opiate,n,People
1,133784.0,Number retained in treatment for at least 12 w...,Opiate,n,People
2,0.947553,Number retained in treatment for at least 12 w...,Opiate,%,People
3,23730.0,Number in contact with treatment services,Non-opiate only,n,People
4,20048.0,Number retained in treatment for at least 12 w...,Non-opiate only,n,People
5,0.844838,Number retained in treatment for at least 12 w...,Non-opiate only,%,People
6,27684.0,Number in contact with treatment services,Non-opiate and Alcohol,n,People
7,23852.0,Number retained in treatment for at least 12 w...,Non-opiate and Alcohol,n,People
8,0.861581,Number retained in treatment for at least 12 w...,Non-opiate and Alcohol,%,People
9,75787.0,Number in contact with treatment services,Alcohol only,n,People


In [10]:
new_table = new_table[new_table['OBS'] != 0 ]

In [11]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [12]:
new_table.head()

Unnamed: 0,Value,Clients in treatment,Substane,Measure Type,Unit
0,141189.0,Number in contact with treatment services,Opiate,n,People
1,133784.0,Number retained in treatment for at least 12 w...,Opiate,n,People
2,0.947553,Number retained in treatment for at least 12 w...,Opiate,%,People
3,23730.0,Number in contact with treatment services,Non-opiate only,n,People
4,20048.0,Number retained in treatment for at least 12 w...,Non-opiate only,n,People


In [13]:
new_table['Measure Type'].unique()

array(['n', '%'], dtype=object)

In [14]:
new_table['Measure Type'] = new_table['Measure Type'].map(
    lambda x: {
        'n' : 'Count', 
        '%' : 'Percentage'
        }.get(x, x))

In [15]:
new_table.tail()

Unnamed: 0,Value,Clients in treatment,Substane,Measure Type,Unit
10,67897.0,Number retained in treatment for at least 12 w...,Alcohol only,Count,People
11,0.895892,Number retained in treatment for at least 12 w...,Alcohol only,Percentage,People
12,268390.0,Number in contact with treatment services,Total,Count,People
13,245581.0,Number retained in treatment for at least 12 w...,Total,Count,People
14,0.915015,Number retained in treatment for at least 12 w...,Total,Percentage,People


In [16]:
new_table.dtypes

Value                   float64
Clients in treatment     object
Substane                 object
Measure Type             object
Unit                     object
dtype: object

In [17]:
new_table['Value'] = new_table['Value'].astype(str)

In [18]:
new_table.head(3)

Unnamed: 0,Value,Clients in treatment,Substane,Measure Type,Unit
0,141189.0,Number in contact with treatment services,Opiate,Count,People
1,133784.0,Number retained in treatment for at least 12 w...,Opiate,Count,People
2,0.947552571375957,Number retained in treatment for at least 12 w...,Opiate,Percentage,People


In [19]:
new_table['Substane'] = new_table['Substane'].map(
    lambda x: {
        'Total' : 'All' 
        }.get(x, x))

In [20]:
new_table.tail()

Unnamed: 0,Value,Clients in treatment,Substane,Measure Type,Unit
10,67897.0,Number retained in treatment for at least 12 w...,Alcohol only,Count,People
11,0.895892435378099,Number retained in treatment for at least 12 w...,Alcohol only,Percentage,People
12,268390.0,Number in contact with treatment services,All,Count,People
13,245581.0,Number retained in treatment for at least 12 w...,All,Count,People
14,0.915015462573121,Number retained in treatment for at least 12 w...,All,Percentage,People


In [21]:
new_table = new_table[['Substane','Clients in treatment','Measure Type','Value','Unit']]

In [22]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('table5.3.1.csv'), index = False)