Table 3.3.1: Length of Latest Episode 2016-17

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://www.gov.uk/government/uploads/system/uploads/attachment_data/file/664944/'\
                    'Young-people-statistics-data-tables-from-the-national-drug-treatment-monitoring-system-2016-2017.xls'
    inputFile = sourceFolder / 'Young-people-statistics-data-tables-from-the-national-drug-treatment-monitoring-system-2016-2017.xls'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='3.3.1 Length of Latest Episode')[0]

Loading in\Young-people-statistics-data-tables-from-the-national-drug-treatment-monitoring-system-2016-2017.xls which has size 281600 bytes
Table names: ['3.3.1 Length of Latest Episode']


In [3]:
observations = tab.excel_ref('B4').expand(DOWN).expand(RIGHT).is_not_blank()
observations

{<B6 3122.0>, <B4 6928.0>, <B8 16200.0>, <C5 0.31>, <B5 5000.0>, <C4 0.43>, <C7 0.07>, <C6 0.19>, <B7 1150.0>, <C8 1.0>}

In [4]:
setting = tab.excel_ref('A4').expand(DOWN).is_not_blank() 
setting

{<A4 '0 (zero) to 12 weeks'>, <A6 '27 to 52 weeks'>, <A7 'Longer than 52 weeks'>, <A8 'Total'>, <A5 '13 to 26 weeks'>}

In [5]:
mt = tab.excel_ref('B3').expand(RIGHT).is_not_blank() 
mt

{<C3 '%'>, <B3 'n'>}

In [6]:
Dimensions = [
            HDim(mt,'Measure Type',DIRECTLY,ABOVE),
            HDim(setting,'Clients in treatment',DIRECTLY, LEFT),
            HDimConst('Unit','People')            
            ]

In [7]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [8]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Measure Type,Clients in treatment,Unit
0,6928.0,n,0 (zero) to 12 weeks,People
1,0.43,%,0 (zero) to 12 weeks,People
2,5000.0,n,13 to 26 weeks,People
3,0.31,%,13 to 26 weeks,People
4,3122.0,n,27 to 52 weeks,People
5,0.19,%,27 to 52 weeks,People
6,1150.0,n,Longer than 52 weeks,People
7,0.07,%,Longer than 52 weeks,People
8,16200.0,n,Total,People
9,1.0,%,Total,People


In [9]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [10]:
new_table.head()

Unnamed: 0,Value,Measure Type,Clients in treatment,Unit
0,6928.0,n,0 (zero) to 12 weeks,People
1,0.43,%,0 (zero) to 12 weeks,People
2,5000.0,n,13 to 26 weeks,People
3,0.31,%,13 to 26 weeks,People
4,3122.0,n,27 to 52 weeks,People


In [11]:
new_table['Measure Type'] = new_table['Measure Type'].map(
    lambda x: {
        'n' : 'Count', 
        '%' : 'Percentage',
        }.get(x, x))

In [12]:
new_table.tail()

Unnamed: 0,Value,Measure Type,Clients in treatment,Unit
5,0.19,Percentage,27 to 52 weeks,People
6,1150.0,Count,Longer than 52 weeks,People
7,0.07,Percentage,Longer than 52 weeks,People
8,16200.0,Count,Total,People
9,1.0,Percentage,Total,People


In [13]:
new_table.dtypes

Value                   float64
Measure Type             object
Clients in treatment     object
Unit                     object
dtype: object

In [14]:
new_table['Value'] = new_table['Value'].astype(str)

In [15]:
new_table.head(3)

Unnamed: 0,Value,Measure Type,Clients in treatment,Unit
0,6928.0,Count,0 (zero) to 12 weeks,People
1,0.43,Percentage,0 (zero) to 12 weeks,People
2,5000.0,Count,13 to 26 weeks,People


In [16]:
new_table['Basis of treatment'] =  'Length of Latest Episode' 

In [17]:
new_table['Clients in treatment'] = new_table['Clients in treatment'].map(
    lambda x: {
        'Total' : 'All episode', 
        }.get(x, x))

In [18]:
new_table['Period'] = '2016-17'
new_table['Substance'] = 'All'
new_table = new_table[['Period','Basis of treatment','Substance','Clients in treatment','Measure Type','Value','Unit']]

In [19]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('table3.3.1.csv'), index = False)

In [20]:
new_table.tail()

Unnamed: 0,Period,Basis of treatment,Substance,Clients in treatment,Measure Type,Value,Unit
5,2016-17,Length of Latest Episode,All,27 to 52 weeks,Percentage,0.19,People
6,2016-17,Length of Latest Episode,All,Longer than 52 weeks,Count,1150.0,People
7,2016-17,Length of Latest Episode,All,Longer than 52 weeks,Percentage,0.07,People
8,2016-17,Length of Latest Episode,All,All episode,Count,16200.0,People
9,2016-17,Length of Latest Episode,All,All episode,Percentage,1.0,People
