Table 4.5.1: Disability, new presentations to treatment

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    inputFile = sourceFolder / 'AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='Table 4.5.1')[0]

Loading in\AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx which has size 272149 bytes
Table names: ['Table 4.5.1']


https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx

In [3]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()
observations

{<G9 0.031774804073146>, <D21 1184.0>, <I12 0.0352771636133923>, <J7 579.0>, <J10 242.0>, <D13 77.0>, <I6 0.00919930511686671>, <K12 0.0317814417117676>, <J20 8063.0>, <I11 0.00436276058117498>, <F16 1214.0>, <I18 0.692316803537587>, <K9 0.0457555358307084>, <H8 970.0>, <I15 0.0253079595704359>, <F13 104.0>, <E15 0.0199210337401292>, <D7 51.0>, <H14 85.0>, <G6 0.00735725329210428>, <C14 0.000995677303414445>, <D9 430.0>, <G22 1.0>, <B16 3149.0>, <J11 491.0>, <K18 0.686623673482212>, <C21 0.0209577929962601>, <G18 0.677560377459082>, <B7 206.0>, <H13 444.0>, <I14 0.00167798483891346>, <J19 27041.0>, <F15 439.0>, <H16 3494.0>, <G7 0.00362531321639921>, <E22 1.0>, <K8 0.0237928786319684>, <E14 0.00161521895190237>, <C20 0.0698188353004031>, <H20 3105.0>, <C18 0.689810092767983>, <C11 0.00407984846277138>, <G20 0.057525190595511>, <K19 0.212407801613423>, <J21 4791.0>, <B13 269.0>, <D15 333.0>, <I7 0.0050142135186355>, <C22 1.0>, <J22 127307.0>, <G16 0.0647225035986565>, <G10 0.00223916404

In [4]:
Disability = tab.excel_ref('A5').expand(DOWN).is_not_blank()
Disability

{<A9 'Mobility and gross motor'>, <A17 'Total number of citations'>, <A21 'Inconsistent/missing'>, <A6 'Hearing'>, <A13 'Sight'>, <A10 'Perception of physical danger'>, <A5 'Behaviour and emotional'>, <A14 'Speech'>, <A11 'Personal, self-care and continence'>, <A7 'Manual dexterity'>, <A8 'Learning disability'>, <A16 'Not stated'>, <A18 'No disability'>, <A19 'Any disability'>, <A12 'Progressive conditions and physical health'>, <A22 'Total'>, <A20 'Not stated'>, <A15 'Other'>}

In [5]:
Clients = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
Clients

{<H3 'Alcohol only'>, <D3 'Non-opiate only'>, <B3 'Opiate'>, <F3 'Non-opiate and Alcohol'>, <J3 'Total'>}

In [6]:
MeasureType = tab.excel_ref('B4').expand(RIGHT).is_not_blank()
MeasureType

{<F4 'n'>, <B4 'n'>, <H4 'n'>, <G4 '%'>, <D4 'n'>, <K4 '%'>, <E4 '%'>, <I4 '%'>, <C4 '%'>, <J4 'n'>}

In [7]:
Dimensions = [
            HDim(Disability,'Disability',DIRECTLY,LEFT),
            HDim(Clients,'Clients',CLOSEST,LEFT),
            HDim(MeasureType,'Measure Type',DIRECTLY,ABOVE),
            HDimConst('Unit','People')            
            ]

In [8]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [9]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Disability,Clients,Measure Type,Unit
0,4352.000000,Behaviour and emotional,Opiate,n,People
1,0.105688,Behaviour and emotional,Opiate,%,People
2,1834.000000,Behaviour and emotional,Non-opiate only,n,People
3,0.109715,Behaviour and emotional,Non-opiate only,%,People
4,2181.000000,Behaviour and emotional,Non-opiate and Alcohol,n,People
5,0.116277,Behaviour and emotional,Non-opiate and Alcohol,%,People
6,4725.000000,Behaviour and emotional,Alcohol only,n,People
7,0.093276,Behaviour and emotional,Alcohol only,%,People
8,13092.000000,Behaviour and emotional,Total,n,People
9,0.102838,Behaviour and emotional,Total,%,People


In [10]:
new_table = new_table[new_table['OBS'] != 0 ]

In [11]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [12]:
new_table['Measure Type'] = new_table['Measure Type'].map(
    lambda x: {
        'n' : 'Count', 
        '%' : 'Percentage',
        }.get(x, x))

In [13]:
new_table.head()

Unnamed: 0,Value,Disability,Clients,Measure Type,Unit
0,4352.0,Behaviour and emotional,Opiate,Count,People
1,0.105688,Behaviour and emotional,Opiate,Percentage,People
2,1834.0,Behaviour and emotional,Non-opiate only,Count,People
3,0.109715,Behaviour and emotional,Non-opiate only,Percentage,People
4,2181.0,Behaviour and emotional,Non-opiate and Alcohol,Count,People


In [14]:
new_table.dtypes

Value           float64
Disability       object
Clients          object
Measure Type     object
Unit             object
dtype: object

In [15]:
new_table['Value'] = new_table['Value'].astype(str)

In [16]:
new_table.head(3)

Unnamed: 0,Value,Disability,Clients,Measure Type,Unit
0,4352.0,Behaviour and emotional,Opiate,Count,People
1,0.105687503035602,Behaviour and emotional,Opiate,Percentage,People
2,1834.0,Behaviour and emotional,Non-opiate only,Count,People


In [17]:
new_table['Disability'] = new_table['Disability'].map(
    lambda x: {
        'Total' : 'All' 
        }.get(x, x))

In [18]:
new_table['Clients'] = new_table['Clients'].map(
    lambda x: {
        'Total' : 'All Clients',
        }.get(x, x))

In [19]:
new_table.head(3)

Unnamed: 0,Value,Disability,Clients,Measure Type,Unit
0,4352.0,Behaviour and emotional,Opiate,Count,People
1,0.105687503035602,Behaviour and emotional,Opiate,Percentage,People
2,1834.0,Behaviour and emotional,Non-opiate only,Count,People


In [20]:
new_table['Disability'] = 'Disability/' + new_table['Disability']

In [21]:
new_table['Clients'] = 'New clients/' + new_table['Clients']

In [22]:
new_table.columns = ['Clients in treatment' if x=='Clients' else x for x in new_table.columns]
new_table['Period'] = '2017-18'
new_table.columns = ['Basis of treatment' if x=='Disability' else x for x in new_table.columns]
new_table['Substance'] = 'All'
new_table = new_table[['Period','Basis of treatment','Substance','Clients in treatment','Measure Type','Value','Unit']]

In [23]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('table4.5.1.csv'), index = False)

In [24]:
new_table.head()

Unnamed: 0,Period,Basis of treatment,Substance,Clients in treatment,Measure Type,Value,Unit
0,2017-18,Disability/Behaviour and emotional,All,New clients/Opiate,Count,4352.0,People
1,2017-18,Disability/Behaviour and emotional,All,New clients/Opiate,Percentage,0.105687503035602,People
2,2017-18,Disability/Behaviour and emotional,All,New clients/Non-opiate only,Count,1834.0,People
3,2017-18,Disability/Behaviour and emotional,All,New clients/Non-opiate only,Percentage,0.109715242881072,People
4,2017-18,Disability/Behaviour and emotional,All,New clients/Non-opiate and Alcohol,Count,2181.0,People
