Table 4.8.1: Source of referral into treatment, new presentations to treatment

In [1]:
from gssutils import *

if is_interactive():
    import requests
    from cachecontrol import CacheControl
    from cachecontrol.caches.file_cache import FileCache
    from cachecontrol.heuristics import LastModified
    from pathlib import Path

    session = CacheControl(requests.Session(),
                           cache=FileCache('.cache'),
                           heuristic=LastModified())

    sourceFolder = Path('in')
    sourceFolder.mkdir(exist_ok=True)

    inputURL = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    inputFile = sourceFolder / 'AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx'
    response = session.get(inputURL)
    with open(inputFile, 'wb') as f:
      f.write(response.content)    

In [2]:
tab = loadxlstabs(inputFile, sheetids='Table 4.8.1')[0]

Loading in\AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx which has size 272149 bytes
Table names: ['Table 4.8.1']


https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/752515/AdultSubstanceMisuseNDTMSDataTables2017-18.xlsx

In [3]:
observations = tab.excel_ref('C6').expand(DOWN).expand(RIGHT).is_not_blank()
observations

{<F17 0.000180093648697323>, <I42 81.0>, <L47 0.00026044749615248>, <I49 0.0>, <J48 0.000475784548896775>, <I40 92.0>, <E53 16716.0>, <G38 1219.0>, <D11 0.0175391455162811>, <G42 29.0>, <L33 0.0179393078410481>, <K44 69.0>, <G17 5.0>, <E15 280.0>, <L51 1.0>, <D10 0.052934997679361>, <F40 0.020290551086565>, <F29 0.000360187297394645>, <I50 2164.0>, <E46 10.0>, <L50 0.044410244268182>, <C27 502.0>, <G23 242.0>, <L7 0.0145298133459611>, <L11 0.0259658261315654>, <C44 6.0>, <I34 1363.0>, <F35 0.00240124864929763>, <D50 0.0283850795124215>, <K20 22187.0>, <H22 0.0210531954786522>, <H12 0.0208924840627846>, <H25 0.0477848609846253>, <I26 721.0>, <L45 0.000236770451047709>, <L28 0.00386725070044592>, <G44 37.0>, <D48 0.000146566675623519>, <D19 0.000830544495199941>, <F38 0.064953775963501>, <F23 0.0180093648697323>, <J12 0.0192098011617073>, <G18 23.0>, <I47 6.0>, <H27 0.00744629560186425>, <J15 0.0108637471998097>, <H38 0.0653024053141908>, <H6 0.583436010071249>, <G31 2016.0>, <J39 0.0354

In [4]:
ReferralSource1 = tab.excel_ref('A5').expand(DOWN).is_not_blank() - tab.excel_ref('A53')
ReferralSource1

{<A34 'Drug service non-statutory'>, <A52 'Inconsistent/missing'>, <A22 'Arrest referral/DIP'>, <A5 'Self, family and friends'>, <A51 'Total'>, <A35 'Community alcohol team'>, <A23 'Prison'>, <A13 'Health – other'>, <A20 'Health services and social care subtotal'>, <A10 'GP'>, <A24 'Probation'>, <A9 'Health services and social care'>, <A33 'Drug service statutory'>, <A12 'Social services'>, <A38 'Other'>, <A7 'Other family and friends'>, <A8 'Self, family and friends subtotal'>, <A11 'Hospital'>, <A32 'Substance misuse service'>, <A37 'Other'>, <A55 '*Percentages may equal 0% or not sum to 100% due to rounding'>, <A6 'Self'>, <A31 'Criminal justice subtotal'>, <A50 'Other subtotal'>, <A21 'Criminal justice'>, <A36 'Substance misuse service subtotal'>, <A25 'Criminal justice – other'>}

In [5]:
ReferralSource2 = tab.excel_ref('B5').expand(DOWN).is_not_blank() - tab.excel_ref('A53')
ReferralSource2

{<B19 'Children social services'>, <B16 'A&E'>, <B40 'Other YP'>, <B41 'Job centre plus'>, <B46 'Education service'>, <B44 'Other treatment provider'>, <B14 'Other community health'>, <B30 'Other criminal justice'>, <B39 'Other'>, <B27 'DRR'>, <B42 'Employment service'>, <B49 'Other helplines & websites'>, <B15 'Psychiatry'>, <B26 'ATR'>, <B17 'Syringe Exchange'>, <B29 'Liaison and diversion'>, <B47 'LAC'>, <B28 'Community rehabilitation company'>, <B48 'Employer'>, <B18 'Community care assessment'>, <B45 'Connexions'>, <B43 'Other sex worker project'>}

In [6]:
Clients = tab.excel_ref('B3').expand(RIGHT).is_not_blank()
Clients

{<G3 'Non-opiate and Alcohol'>, <I3 'Alcohol only'>, <K3 'Total'>, <C3 'Opiate'>, <E3 'Non-opiate only'>}

In [7]:
MeasureType = tab.excel_ref('B4').expand(RIGHT).is_not_blank()
MeasureType

{<K4 'n'>, <E4 'n'>, <C4 'n'>, <H4 '%'>, <G4 'n'>, <D4 '%'>, <F4 '%'>, <L4 '%'>, <I4 'n'>, <J4 '%'>}

In [8]:
Dimensions = [
            HDim(ReferralSource1,'Referral Source1',DIRECTLY,LEFT),
            HDim(ReferralSource2,'Referral Source2',DIRECTLY,LEFT),
            HDim(Clients,'Clients',CLOSEST,LEFT),
            HDim(MeasureType,'Measure Type',DIRECTLY,ABOVE),
            HDimConst('Unit','People')            
            ]

In [9]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
# if is_interactive():
#     savepreviewhtml(c1)

In [10]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,Referral Source1,Referral Source2,Clients,Measure Type,Unit
0,21542.000000,Self,,Opiate,n,People
1,0.526223,Self,,Opiate,%,People
2,9747.000000,Self,,Non-opiate only,n,People
3,0.585124,Self,,Non-opiate only,%,People
4,10891.000000,Self,,Non-opiate and Alcohol,n,People
5,0.583436,Self,,Non-opiate and Alcohol,%,People
6,29842.000000,Self,,Alcohol only,n,People
7,0.591598,Self,,Alcohol only,%,People
8,72022.000000,Self,,Total,n,People
9,0.568423,Self,,Total,%,People


In [11]:
new_table = new_table[new_table['OBS'] != 0 ]

In [12]:
new_table.columns = ['Value' if x=='OBS' else x for x in new_table.columns]

In [13]:
new_table['Referral Source'] = new_table['Referral Source1'].fillna('') + new_table['Referral Source2'].fillna('')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [14]:
new_table.head()

Unnamed: 0,Value,Referral Source1,Referral Source2,Clients,Measure Type,Unit,Referral Source
0,21542.0,Self,,Opiate,n,People,Self
1,0.526223,Self,,Opiate,%,People,Self
2,9747.0,Self,,Non-opiate only,n,People,Self
3,0.585124,Self,,Non-opiate only,%,People,Self
4,10891.0,Self,,Non-opiate and Alcohol,n,People,Self


In [15]:
new_table['Measure Type'] = new_table['Measure Type'].map(
    lambda x: {
        'n' : 'Count', 
        '%' : 'Percentage',
        }.get(x, x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [16]:
new_table.tail()

Unnamed: 0,Value,Referral Source1,Referral Source2,Clients,Measure Type,Unit,Referral Source
425,41178.0,,,Opiate,Count,People,
426,16716.0,,,Non-opiate only,Count,People,
427,18757.0,,,Non-opiate and Alcohol,Count,People,
428,50656.0,,,Alcohol only,Count,People,
429,127307.0,,,Total,Count,People,


In [17]:
new_table.dtypes

Value               float64
Referral Source1     object
Referral Source2     object
Clients              object
Measure Type         object
Unit                 object
Referral Source      object
dtype: object

In [18]:
new_table['Value'] = new_table['Value'].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [19]:
new_table.head(3)

Unnamed: 0,Value,Referral Source1,Referral Source2,Clients,Measure Type,Unit,Referral Source
0,21542.0,Self,,Opiate,Count,People,Self
1,0.526223221046975,Self,,Opiate,Percentage,People,Self
2,9747.0,Self,,Non-opiate only,Count,People,Self


In [20]:
new_table['Referral Source'] = new_table['Referral Source'].map(
    lambda x: {
        'Total' : 'All' 
        }.get(x, x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [21]:
def user_perc(x):
    
    if x == '':
        return 'All inclusice Inconsistent/missing'
    else:
        return x
    
new_table['Referral Source'] = new_table.apply(lambda row: user_perc(row['Referral Source']), axis = 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [22]:
new_table['Clients'] = new_table['Clients'].map(
    lambda x: {
        'Total' : 'All Clients',
        }.get(x, x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [23]:
new_table.head(3)

Unnamed: 0,Value,Referral Source1,Referral Source2,Clients,Measure Type,Unit,Referral Source
0,21542.0,Self,,Opiate,Count,People,Self
1,0.526223221046975,Self,,Opiate,Percentage,People,Self
2,9747.0,Self,,Non-opiate only,Count,People,Self


In [24]:
new_table['Referral Source'] = 'Referral source/' + new_table['Referral Source']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [25]:
new_table['Clients'] = 'New clients/' + new_table['Clients']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [26]:
new_table.columns = ['Clients in treatment' if x=='Clients' else x for x in new_table.columns]
new_table['Period'] = '2017-18'
new_table.columns = ['Basis of treatment' if x=='Referral Source' else x for x in new_table.columns]
new_table['Substance'] = 'All'
new_table = new_table[['Period','Basis of treatment','Substance','Clients in treatment','Measure Type','Value','Unit']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


In [27]:
# if is_interactive():
#     SubstancetinationFolder = Path('out')
#     SubstancetinationFolder.mkdir(exist_ok=True, parents=True)
#     new_table.to_csv(SubstancetinationFolder / ('table4.8.1.csv'), index = False)

In [28]:
new_table.head()

Unnamed: 0,Period,Basis of treatment,Substance,Clients in treatment,Measure Type,Value,Unit
0,2017-18,Referral source/Self,All,New clients/Opiate,Count,21542.0,People
1,2017-18,Referral source/Self,All,New clients/Opiate,Percentage,0.526223221046975,People
2,2017-18,Referral source/Self,All,New clients/Non-opiate only,Count,9747.0,People
3,2017-18,Referral source/Self,All,New clients/Non-opiate only,Percentage,0.585124264617601,People
4,2017-18,Referral source/Self,All,New clients/Non-opiate and Alcohol,Count,10891.0,People
