Table 4.11.1: Housing situation of new presentations to treatment

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    inputFile = sourceFolder / 'AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='Table 4.11.1')[0]

Loading in\AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx which has size 272149 bytes
Table names: ['Table 4.11.1']


https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx

In [3]:
observations = tab.excel_ref('B5').expand(DOWN).expand(RIGHT).is_not_blank()
observations

{<J11 127307.0>, <J10 1287.0>, <I7 0.030384592379927>, <B8 116.0>, <H11 50656.0>, <F9 18592.0>, <E8 0.0504578810115835>, <G7 0.0544858003442341>, <C9 1.0>, <F7 1013.0>, <K6 0.111387081415648>, <E9 1.0>, <H9 50157.0>, <C7 0.158476778971115>, <H8 219.0>, <D8 832.0>, <H5 44733.0>, <J7 9767.0>, <B6 6272.0>, <F8 774.0>, <B9 40782.0>, <J6 14037.0>, <G6 0.12322504302926>, <I9 1.0>, <I8 0.0043662898498714>, <D5 13097.0>, <H7 1524.0>, <I6 0.0733895567916741>, <E5 0.794287100491237>, <F10 165.0>, <K7 0.077503570861768>, <C8 0.00284439213378451>, <K9 1.0>, <G8 0.0416308089500861>, <J8 1941.0>, <B5 27931.0>, <F11 18757.0>, <E7 0.0465158590575535>, <I5 0.891859560978527>, <G5 0.78065834767642>, <B11 41178.0>, <E6 0.108739159439626>, <C6 0.153793340199107>, <H10 499.0>, <D11 16716.0>, <B7 6463.0>, <C5 0.684885488695993>, <B10 396.0>, <F5 14514.0>, <K8 0.015402317092525>, <G9 1.0>, <D10 227.0>, <F6 2291.0>, <D6 1793.0>, <D7 767.0>, <K5 0.795707030630059>, <H6 3681.0>, <J5 100275.0>, <J9 126020.0>, <D

In [4]:
house = tab.excel_ref('A5').expand(DOWN).is_not_blank() - tab.excel_ref('A11').expand(DOWN)
house

{<A6 'Housing problem'>, <A8 'Other'>, <A10 'Inconsistent/missing'>, <A5 'No problem'>, <A9 'Total'>, <A7 'Urgent housing problem (NFA)'>}

In [5]:
Clients = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
Clients

{<F3 'Non-opiate and Alcohol'>, <B3 'Opiate'>, <D3 'Non-opiate only'>, <H3 'Alcohol only'>, <J3 'Total'>}

In [6]:
MeasureType = tab.excel_ref('B4').expand(RIGHT).is_not_blank()
MeasureType

{<D4 'n'>, <J4 'n'>, <H4 'n'>, <G4 '%'>, <E4 '%'>, <K4 '%'>, <I4 '%'>, <B4 'n'>, <C4 '%'>, <F4 'n'>}

In [7]:
Dimensions = [
            HDim(house,'Housing Status',DIRECTLY,LEFT),
            HDim(Clients,'Clients in new treatment',CLOSEST,LEFT),
            HDim(MeasureType,'Measure Type',DIRECTLY,ABOVE),
            HDimConst('Unit','People')            
            ]

In [8]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [9]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Housing Status,Clients in new treatment,Measure Type,Unit
0,27931.0,No problem,Opiate,n,People
1,0.684885,No problem,Opiate,%,People
2,13097.0,No problem,Non-opiate only,n,People
3,0.794287,No problem,Non-opiate only,%,People
4,14514.0,No problem,Non-opiate and Alcohol,n,People
5,0.780658,No problem,Non-opiate and Alcohol,%,People
6,44733.0,No problem,Alcohol only,n,People
7,0.89186,No problem,Alcohol only,%,People
8,100275.0,No problem,Total,n,People
9,0.795707,No problem,Total,%,People


In [10]:
new_table = new_table[new_table['OBS'] != 0 ]

In [11]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [12]:
new_table.head()

Unnamed: 0,Value,Housing Status,Clients in new treatment,Measure Type,Unit
0,27931.0,No problem,Opiate,n,People
1,0.684885,No problem,Opiate,%,People
2,13097.0,No problem,Non-opiate only,n,People
3,0.794287,No problem,Non-opiate only,%,People
4,14514.0,No problem,Non-opiate and Alcohol,n,People


In [13]:
new_table['Measure Type'] = new_table['Measure Type'].map(
    lambda x: {
        'n' : 'Count', 
        '%' : 'Percentage',
        }.get(x, x))

In [14]:
new_table.tail()

Unnamed: 0,Value,Housing Status,Clients in new treatment,Measure Type,Unit
55,41178.0,,Opiate,Count,People
56,16716.0,,Non-opiate only,Count,People
57,18757.0,,Non-opiate and Alcohol,Count,People
58,50656.0,,Alcohol only,Count,People
59,127307.0,,Total,Count,People


In [15]:
new_table.dtypes

Value                       float64
Housing Status               object
Clients in new treatment     object
Measure Type                 object
Unit                         object
dtype: object

In [16]:
new_table['Value'] = new_table['Value'].astype(str)

In [17]:
new_table.head(3)

Unnamed: 0,Value,Housing Status,Clients in new treatment,Measure Type,Unit
0,27931.0,No problem,Opiate,Count,People
1,0.684885488695993,No problem,Opiate,Percentage,People
2,13097.0,No problem,Non-opiate only,Count,People


In [18]:
new_table['Housing Status'] = new_table['Housing Status'].map(
    lambda x: {
        'Total' : 'All' 
        }.get(x, x))

In [19]:
def user_perc(x):
    
    if x == None:
        return 'All inclusice Inconsistent/missing'
    else:
        return x
    
new_table['Housing Status'] = new_table.apply(lambda row: user_perc(row['Housing Status']), axis = 1)

In [20]:
new_table['Clients in new treatment'] = new_table['Clients in new treatment'].map(
    lambda x: {
        'Total' : 'All Clients',
        }.get(x, x))

In [21]:
new_table.head(3)

Unnamed: 0,Value,Housing Status,Clients in new treatment,Measure Type,Unit
0,27931.0,No problem,Opiate,Count,People
1,0.684885488695993,No problem,Opiate,Percentage,People
2,13097.0,No problem,Non-opiate only,Count,People


In [22]:
new_table['Housing Status'] =  'Housing situation/' + new_table['Housing Status']
new_table['Clients in new treatment'] = 'New clients/' + new_table['Clients in new treatment']

In [23]:
new_table.columns = ['Clients in treatment' if x=='Clients in new treatment' else x for x in new_table.columns]
new_table['Period'] = '2017-18'
new_table.columns = ['Basis of treatment' if x=='Housing Status' else x for x in new_table.columns]
new_table['Substance'] = 'All'
new_table = new_table[['Period','Basis of treatment','Substance','Clients in treatment','Measure Type','Value','Unit']]

In [25]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('table4.11.1.csv'), index = False)

In [26]:
new_table.head()

Unnamed: 0,Period,Basis of treatment,Substance,Clients in treatment,Measure Type,Value,Unit
0,2017-18,Housing situation/No problem,All,New clients/Opiate,Count,27931.0,People
1,2017-18,Housing situation/No problem,All,New clients/Opiate,Percentage,0.684885488695993,People
2,2017-18,Housing situation/No problem,All,New clients/Non-opiate only,Count,13097.0,People
3,2017-18,Housing situation/No problem,All,New clients/Non-opiate only,Percentage,0.794287100491237,People
4,2017-18,Housing situation/No problem,All,New clients/Non-opiate and Alcohol,Count,14514.0,People
