Table 7.3.2: Trends in the proportion of new presentations with housing problems, by NPS and all clients

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    inputFile = sourceFolder / 'AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='Table 7.3.2')[0]

Loading in\AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx which has size 272149 bytes
Table names: ['Table 7.3.2']


https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx

In [3]:
observations = tab.excel_ref('B6').expand(DOWN).expand(RIGHT).is_not_blank()
observations

{<F11 10.0>, <Q9 0.015330380128487604>, <J10 2016.0>, <J12 4058.0>, <I8 0.806111857148042>, <I10 1.0>, <R11 12.0>, <S10 1.0>, <E6 0.06582228685029469>, <D10 142353.0>, <L7 15569.0>, <P6 9442.0>, <T11 1287.0>, <M8 0.8047303675733082>, <E10 1.0>, <O10 1.0>, <H7 16009.0>, <D11 5105.0>, <D6 9370.0>, <T9 1941.0>, <F6 115.0>, <D12 289811.0>, <U7 0.1113870814156483>, <S8 0.5194054500412882>, <O7 0.17593244194229415>, <Q6 0.07299518364759452>, <P10 129351.0>, <L9 1848.0>, <N6 262.0>, <L10 135592.0>, <T8 100275.0>, <T6 9767.0>, <B7 40.0>, <B10 318.0>, <I6 0.06572786992342032>, <F12 2298.0>, <N10 1421.0>, <O8 0.5932441942294159>, <G6 0.10052447552447552>, <K9 0.06001984126984127>, <T12 253327.0>, <L12 273673.0>, <J8 1253.0>, <D9 1732.0>, <M7 0.11482240840167561>, <J6 286.0>, <M6 0.06681810136291226>, <B8 225.0>, <S6 0.24690338563170933>, <H11 3881.0>, <C7 0.12578616352201258>, <B12 638.0>, <H6 9055.0>, <U6 0.07750357086176797>, <U9 0.015402317092524997>, <J9 121.0>, <L11 2489.0>, <K10 1.0>, <S9 

In [4]:
house = tab.excel_ref('A6').expand(DOWN).is_not_blank() - tab.excel_ref('A12').expand(DOWN)
house

{<A7 'Housing problem'>, <A10 'Total'>, <A11 'Not stated/missing'>, <A6 'Urgent problem'>, <A9 'Other'>, <A8 'No problem'>}

In [5]:
Clients = tab.excel_ref('B4').expand(RIGHT).is_not_blank()
Clients

{<R4 'NPS'>, <H4 'All'>, <L4 'All'>, <D4 'All'>, <B4 'NPS'>, <F4 'NPS'>, <J4 'NPS'>, <P4 'All'>, <N4 'NPS'>, <T4 'All'>}

In [6]:
period = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
period

{<N3 '2016-17'>, <R3 '2017-18'>, <B3 '2013-14'>, <F3 '2014-15'>, <J3 '2015-16'>}

In [7]:
MeasureType = tab.excel_ref('B5').expand(RIGHT).is_not_blank()
MeasureType

{<F5 'n'>, <P5 'n'>, <S5 '%'>, <B5 'n'>, <L5 'n'>, <N5 'n'>, <E5 '%'>, <J5 'n'>, <T5 'n'>, <G5 '%'>, <D5 'n'>, <M5 '%'>, <R5 'n'>, <C5 '%'>, <I5 '%'>, <O5 '%'>, <H5 'n'>, <U5 '%'>, <K5 '%'>, <Q5 '%'>}

In [8]:
Dimensions = [
            HDim(house,'Housing Status',DIRECTLY,LEFT),
            HDim(Clients,'Clients in new treatment',CLOSEST,LEFT),
            HDim(MeasureType,'Measure Type',DIRECTLY,ABOVE),
            HDim(period,'Period',CLOSEST,LEFT),
            HDimConst('Unit','People')            
            ]

In [9]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [10]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Housing Status,Clients in new treatment,Measure Type,Period,Unit
0,20.000000,Urgent problem,NPS,n,2013-14,People
1,0.062893,Urgent problem,NPS,%,2013-14,People
2,9370.000000,Urgent problem,All,n,2013-14,People
3,0.065822,Urgent problem,All,%,2013-14,People
4,115.000000,Urgent problem,NPS,n,2014-15,People
5,0.100524,Urgent problem,NPS,%,2014-15,People
6,9055.000000,Urgent problem,All,n,2014-15,People
7,0.065728,Urgent problem,All,%,2014-15,People
8,286.000000,Urgent problem,NPS,n,2015-16,People
9,0.141865,Urgent problem,NPS,%,2015-16,People


In [11]:
new_table = new_table[new_table['OBS'] != 0 ]

In [12]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [13]:
new_table.head()

Unnamed: 0,Value,Housing Status,Clients in new treatment,Measure Type,Period,Unit
0,20.0,Urgent problem,NPS,n,2013-14,People
1,0.062893,Urgent problem,NPS,%,2013-14,People
2,9370.0,Urgent problem,All,n,2013-14,People
3,0.065822,Urgent problem,All,%,2013-14,People
4,115.0,Urgent problem,NPS,n,2014-15,People


In [14]:
new_table['Measure Type'] = new_table['Measure Type'].map(
    lambda x: {
        'n' : 'Count', 
        '%' : 'Percentage',
        }.get(x, x))

In [15]:
new_table.tail()

Unnamed: 0,Value,Housing Status,Clients in new treatment,Measure Type,Period,Unit
115,273673.0,,All,Count,2015-16,People
116,2871.0,,NPS,Count,2016-17,People
117,260567.0,,All,Count,2016-17,People
118,2434.0,,NPS,Count,2017-18,People
119,253327.0,,All,Count,2017-18,People


In [16]:
new_table.dtypes

Value                       float64
Housing Status               object
Clients in new treatment     object
Measure Type                 object
Period                       object
Unit                         object
dtype: object

In [17]:
new_table['Value'] = new_table['Value'].astype(str)

In [18]:
new_table.head(3)

Unnamed: 0,Value,Housing Status,Clients in new treatment,Measure Type,Period,Unit
0,20.0,Urgent problem,NPS,Count,2013-14,People
1,0.0628930817610062,Urgent problem,NPS,Percentage,2013-14,People
2,9370.0,Urgent problem,All,Count,2013-14,People


In [19]:
new_table['Housing Status'] = new_table['Housing Status'].map(
    lambda x: {
        'Total' : 'All' 
        }.get(x, x))

In [20]:
def user_perc(x):
    
    if x == None:
        return 'All inclusice Not stated/missing'
    else:
        return x
    
new_table['Housing Status'] = new_table.apply(lambda row: user_perc(row['Housing Status']), axis = 1)

In [21]:
new_table.head(3)

Unnamed: 0,Value,Housing Status,Clients in new treatment,Measure Type,Period,Unit
0,20.0,Urgent problem,NPS,Count,2013-14,People
1,0.0628930817610062,Urgent problem,NPS,Percentage,2013-14,People
2,9370.0,Urgent problem,All,Count,2013-14,People


In [22]:
new_table = new_table[['Period','Housing Status','Clients in new treatment','Measure Type','Value','Unit']]

In [23]:
if is_interactive():
    SubstancetinationFolder = Path('out')
    SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
    new_table.to_csv(SubstancetinationFolder / ('table7.3.2.csv'), index = False)