Table 4.12.2: Number of children living with individuals starting treatment 2017-18

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    inputFile = sourceFolder / 'AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='Table 4.12.2')[0]

Loading in\AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx which has size 272149 bytes
Table names: ['Table 4.12.2']


https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx

In [3]:
observations = tab.excel_ref('B6').expand(DOWN).expand(RIGHT).is_not_blank()
observations

{<E7 11967.0>, <F7 25593.0>, <C7 4020.0>, <D7 4009.0>, <F6 46109.0>, <B7 5597.0>, <E8 1.74680371020306>, <B6 10328.0>, <B8 1.84527425406468>, <C6 7424.0>, <F8 1.80162544445747>, <D6 7453.0>, <D8 1.85906709902719>, <C8 1.84676616915423>, <E6 20904.0>}

In [4]:
child = tab.excel_ref('A6').expand(DOWN).is_not_blank() - tab.excel_ref('A19').expand(DOWN)
child

{<A6 'Number of children'>, <A10 '*Percentages may equal 0% or not sum to 100% due to rounding'>, <A7 'Number of individuals living with children'>, <A8 'Average number of children living with people in treatment'>}

In [5]:
Clients = tab.excel_ref('B4').expand(RIGHT).is_not_blank()
Clients

{<F4 'Total'>, <D4 'Non-opiate and Alcohol'>, <B4 'Opiate'>, <C4 'Non-opiate only'>, <E4 'Alcohol only'>}

In [6]:
Dimensions = [
            HDim(child,'Category',DIRECTLY,LEFT),
            HDim(Clients,'Clients in treatment',CLOSEST,LEFT),
            HDimConst('Measure Type', 'Count'),
            HDimConst('Unit','People')            
            ]

In [7]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [8]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Category,Clients in treatment,Measure Type,Unit
0,10328.0,Number of children,Opiate,Count,People
1,7424.0,Number of children,Non-opiate only,Count,People
2,7453.0,Number of children,Non-opiate and Alcohol,Count,People
3,20904.0,Number of children,Alcohol only,Count,People
4,46109.0,Number of children,Total,Count,People
5,5597.0,Number of individuals living with children,Opiate,Count,People
6,4020.0,Number of individuals living with children,Non-opiate only,Count,People
7,4009.0,Number of individuals living with children,Non-opiate and Alcohol,Count,People
8,11967.0,Number of individuals living with children,Alcohol only,Count,People
9,25593.0,Number of individuals living with children,Total,Count,People


In [9]:
new_table = new_table[new_table['OBS'] != 0 ]

In [10]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [11]:
new_table.head()

Unnamed: 0,Value,Category,Clients in treatment,Measure Type,Unit
0,10328.0,Number of children,Opiate,Count,People
1,7424.0,Number of children,Non-opiate only,Count,People
2,7453.0,Number of children,Non-opiate and Alcohol,Count,People
3,20904.0,Number of children,Alcohol only,Count,People
4,46109.0,Number of children,Total,Count,People


In [12]:
new_table.tail()

Unnamed: 0,Value,Category,Clients in treatment,Measure Type,Unit
10,1.845274,Average number of children living with people ...,Opiate,Count,People
11,1.846766,Average number of children living with people ...,Non-opiate only,Count,People
12,1.859067,Average number of children living with people ...,Non-opiate and Alcohol,Count,People
13,1.746804,Average number of children living with people ...,Alcohol only,Count,People
14,1.801625,Average number of children living with people ...,Total,Count,People


In [13]:
new_table.dtypes

Value                   float64
Category                 object
Clients in treatment     object
Measure Type             object
Unit                     object
dtype: object

In [14]:
new_table['Value'] = new_table['Value'].astype(str)

In [15]:
new_table.head(3)

Unnamed: 0,Value,Category,Clients in treatment,Measure Type,Unit
0,10328.0,Number of children,Opiate,Count,People
1,7424.0,Number of children,Non-opiate only,Count,People
2,7453.0,Number of children,Non-opiate and Alcohol,Count,People


In [16]:
new_table['Category'] = new_table['Category'].map(
    lambda x: {
        'Number of children' : 'Number of children living with people in treatment' 
        }.get(x, x))

In [17]:
new_table.head(3)

Unnamed: 0,Value,Category,Clients in treatment,Measure Type,Unit
0,10328.0,Number of children living with people in treat...,Opiate,Count,People
1,7424.0,Number of children living with people in treat...,Non-opiate only,Count,People
2,7453.0,Number of children living with people in treat...,Non-opiate and Alcohol,Count,People


In [18]:
new_table = new_table[['Category','Clients in treatment','Measure Type','Value','Unit']]

In [19]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('table4.12.2.csv'), index = False)