Table 2.8.1: Age and gender breakdown of young people starting treatment in 2016-17 and reported sexual exploitation

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://www.gov.uk/government/uploads/system/uploads/attachment_data/file/664944/'\
                    'Young-people-statistics-data-tables-from-the-national-drug-treatment-monitoring-system-2016-2017.xls'
    inputFile = sourceFolder / 'Young-people-statistics-data-tables-from-the-national-drug-treatment-monitoring-system-2016-2017.xls'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='2.8.1 Sexual exploitation')[0]

Loading in\Young-people-statistics-data-tables-from-the-national-drug-treatment-monitoring-system-2016-2017.xls which has size 281600 bytes
Table names: ['2.8.1 Sexual exploitation']


In [3]:
observations = tab.excel_ref('B6').expand(DOWN).expand(RIGHT).is_not_blank()
observations

{<F6 413.0>, <C6 0.12>, <I9 0.24>, <G10 0.21>, <I8 0.27>, <C8 0.28>, <H9 1853.0>, <D11 123.0>, <G9 0.21>, <E8 0.21>, <F9 826.0>, <B10 107.0>, <B6 66.0>, <B7 116.0>, <C11 1.0>, <I11 1.0>, <D8 26.0>, <H6 683.0>, <F7 782.0>, <E10 0.19>, <H8 2081.0>, <D6 16.0>, <E9 0.3>, <E7 0.17>, <G8 0.28>, <B9 120.0>, <E6 0.13>, <C9 0.21>, <G7 0.2>, <H11 7804.0>, <H10 1986.0>, <I7 0.15>, <C7 0.21>, <B11 565.0>, <E11 1.0>, <G6 0.1>, <B8 156.0>, <F10 834.0>, <D7 21.0>, <D10 23.0>, <G11 1.0>, <H7 1201.0>, <I6 0.09>, <F8 1094.0>, <F11 3949.0>, <C10 0.19>, <I10 0.25>, <D9 37.0>}

In [4]:
age = tab.excel_ref('A6').expand(DOWN).is_not_blank() - tab.excel_ref('A11')
age

{<A7 '14-15'>, <A9 '16-17'>, <A8 '15-16'>, <A13 '∆ Due to low numbers when breaking down by age and gender, age groups under 14 are combined in this table.'>, <A10 '17-18'>, <A6 'Under 14∆'>}

In [5]:
measuretype = tab.excel_ref('B5').expand(RIGHT).is_not_blank() 
measuretype

{<B5 'n'>, <C5 '%'>, <E5 '%'>, <D5 'n'>, <F5 'n'>, <G5 '%'>, <I5 '%'>, <H5 'n'>}

In [6]:
sex = tab.excel_ref('B4').expand(RIGHT).is_not_blank() 
sex

{<H4 'Male'>, <F4 'Female'>, <D4 'Male'>, <B4 'Female'>}

In [7]:
basis = tab.excel_ref('B3').expand(RIGHT).is_not_blank() 
basis

{<F3 'Total new presentations'>, <B3 'Sexual exploitation'>}

In [8]:
Dimensions = [
            HDim(basis,'Basis of treatment',CLOSEST,LEFT),
            HDim(age,'Clients in treatment',DIRECTLY, LEFT),
            HDim(measuretype,'Measure Type',DIRECTLY,ABOVE),
            HDim(sex,'sex',CLOSEST,LEFT),
            HDimConst('Unit','People')            
            ]

In [9]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [10]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Basis of treatment,Clients in treatment,Measure Type,sex,Unit
0,66.0,Sexual exploitation,Under 14∆,n,Female,People
1,0.12,Sexual exploitation,Under 14∆,%,Female,People
2,16.0,Sexual exploitation,Under 14∆,n,Male,People
3,0.13,Sexual exploitation,Under 14∆,%,Male,People
4,413.0,Total new presentations,Under 14∆,n,Female,People
5,0.1,Total new presentations,Under 14∆,%,Female,People
6,683.0,Total new presentations,Under 14∆,n,Male,People
7,0.09,Total new presentations,Under 14∆,%,Male,People
8,116.0,Sexual exploitation,14-15,n,Female,People
9,0.21,Sexual exploitation,14-15,%,Female,People


In [11]:
new_table = new_table[new_table['OBS'] != 0 ]

In [12]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [13]:
new_table.head()

Unnamed: 0,Value,Basis of treatment,Clients in treatment,Measure Type,sex,Unit
0,66.0,Sexual exploitation,Under 14∆,n,Female,People
1,0.12,Sexual exploitation,Under 14∆,%,Female,People
2,16.0,Sexual exploitation,Under 14∆,n,Male,People
3,0.13,Sexual exploitation,Under 14∆,%,Male,People
4,413.0,Total new presentations,Under 14∆,n,Female,People


In [14]:
new_table['Measure Type'] = new_table['Measure Type'].map(
    lambda x: {
        'n' : 'Count', 
        '%' : 'Percentage',
        }.get(x, x))

In [15]:
new_table.tail()

Unnamed: 0,Value,Basis of treatment,Clients in treatment,Measure Type,sex,Unit
43,1.0,Sexual exploitation,,Percentage,Male,People
44,3949.0,Total new presentations,,Count,Female,People
45,1.0,Total new presentations,,Percentage,Female,People
46,7804.0,Total new presentations,,Count,Male,People
47,1.0,Total new presentations,,Percentage,Male,People


In [16]:
new_table['Clients in treatment'].fillna('All years', inplace = True)

In [17]:
new_table.dtypes

Value                   float64
Basis of treatment       object
Clients in treatment     object
Measure Type             object
sex                      object
Unit                     object
dtype: object

In [18]:
new_table['Value'] = new_table['Value'].astype(str)

In [19]:
new_table.head(3)

Unnamed: 0,Value,Basis of treatment,Clients in treatment,Measure Type,sex,Unit
0,66.0,Sexual exploitation,Under 14∆,Count,Female,People
1,0.12,Sexual exploitation,Under 14∆,Percentage,Female,People
2,16.0,Sexual exploitation,Under 14∆,Count,Male,People


In [20]:
new_table['sex'] = new_table['sex'].map(
    lambda x: {
        'Female' : 'F', 
        'Male' : 'M',
        'Persons' : 'T'
        }.get(x, x))

In [21]:
new_table['Clients in treatment'] = new_table['Clients in treatment'].str.rstrip('∆')

In [22]:
new_table['Clients in treatment'] = new_table['Clients in treatment'] + '/' + new_table['sex']

In [23]:
new_table['Clients in treatment'] =  'Ag/' + new_table['Clients in treatment']

In [24]:
new_table['Period'] = '2016-17'
new_table['Substance'] = 'All'
new_table = new_table[['Period','Basis of treatment','Substance','Clients in treatment','Measure Type','Value','Unit']]

In [25]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('table2.8.1.csv'), index = False)

In [26]:
new_table.tail()

Unnamed: 0,Period,Basis of treatment,Substance,Clients in treatment,Measure Type,Value,Unit
43,2016-17,Sexual exploitation,All,Ag/All years/M,Percentage,1.0,People
44,2016-17,Total new presentations,All,Ag/All years/F,Count,3949.0,People
45,2016-17,Total new presentations,All,Ag/All years/F,Percentage,1.0,People
46,2016-17,Total new presentations,All,Ag/All years/M,Count,7804.0,People
47,2016-17,Total new presentations,All,Ag/All years/M,Percentage,1.0,People
