# Referral journeys
...Explain

In [None]:
import os
import pandas as pd
import numpy as np

%run "00-config.ipynb"
%load_ext autoreload
%autoreload 2

### Config

#### Filepaths

In [None]:
input_file = os.path.join(flatfile_folder, 'main_flatcin.csv')
output_file = os.path.join(output_folder, 'referral_outcomes.csv')

#### Key assumptions

In [None]:
# Max days referral -> assessment for both to be linked
ref_assessment = 30

# Date from which referral is too recent to determine next journey
ref_max_date = cin_census_close - pd.Timedelta("30 days") # 30 days before CIN Census closes

### Data wrangling

In [None]:
# Load flatfile
df = pd.read_csv(input_file)

# Only keep 3 subsets: Referral, Assessment and S47 events. 
ref = df[df.Type == 'CINreferralDate']
s17 = df[df.Type == 'AssessmentActualStartDate']
s47 = df[df.Type == 'S47ActualStartDate']

# Drop empty cols
ref.dropna(axis=1, how='all', inplace=True)
s17.dropna(axis=1, how='all', inplace=True)
s47.dropna(axis=1, how='all', inplace=True)

#### Match Referrals with S17 assessments

In [None]:
# Merge Referrals and S17 ogether, to match each referral event with an assessment (if it occurred)

data_s17 = ref.merge(s17[['LAchildID', 'LA', 'AssessmentActualStartDate']], how='left', on=['LAchildID', 'LA'])

In [None]:
# We might have merged some S17 events that happened to the child, but before the Referral or much later.
# In that case, the Referral and the Assessments are not related.
# Rule: if the Assessment happened before the Referral, or more than X days later (defined at top of notebook), they are not related.

# Turn relevant columns into dates
data_s17['AssessmentActualStartDate'] = pd.to_datetime(data_s17['AssessmentActualStartDate'])
data_s17['CINreferralDate'] = pd.to_datetime(data_s17['CINreferralDate'])

# Calculate days_to_s17: length of time between Referral and S17
data_s17['days_to_s17'] = data_s17['AssessmentActualStartDate'] - data_s17['CINreferralDate']
data_s17['days_to_s17'] = data_s17['days_to_s17'].dt.days

data_s17.head()

In [None]:
# Let's look at the distribution of days between Referral and S17
data_s17.hist(column='days_to_s17')

# We can see that some pairs do not make sense: the S17 happened earlier or much later

In [None]:
# Only keep rows where the Referral goes to S17
ref_s17_match = data_s17[((data_s17.days_to_s17 <= ref_assessment) & (data_s17.days_to_s17 >=0))]

# Only keep useful cols
ref_s17_match = ref_s17_match[['Date', 'LAchildID', 'AssessmentActualStartDate', 'days_to_s17']]
ref_s17_match.head()

#### Match Referrals with S47 assessments

In [None]:
# Merge Referrals and S47 ogether, to match each referral event with an assessment (if it occurred)

data_s47 = ref.merge(s47[['LAchildID', 'LA', 'S47ActualStartDate']], how='left', on=['LAchildID', 'LA'])

In [None]:
# We might have merged some S47 events that happened to the child, but before the Referral or much later.
# In that case, the Referral and the Assessments are not related.
# Rule: if the Assessment happened before the Referral, or more than X days later (defined at top of notebook), they are not related.

# Turn relevant columns into dates
data_s47['S47ActualStartDate'] = pd.to_datetime(data_s47['S47ActualStartDate'])
data_s47['CINreferralDate'] = pd.to_datetime(data_s47['CINreferralDate'])

# Calculate days_to_s47: length of time between Referral and S17
data_s47['days_to_s47'] = data_s47['S47ActualStartDate'] - data_s47['CINreferralDate']
data_s47['days_to_s47'] = data_s47['days_to_s47'].dt.days

data_s47.head()

In [None]:
# Let's look at the distribution of days between Referral and S47
data_s47.hist(column='days_to_s47')

# We can see that some pairs do not make sense: the S47 happened earlier or much later

In [None]:
# Only keep rows where the Referral goes to S47
ref_s47_match = data_s47[((data_s47.days_to_s47 <= ref_assessment) & (data_s47.days_to_s47 >=0))]

# Only keep useful cols
ref_s47_match = ref_s47_match[['Date', 'LAchildID', 'S47ActualStartDate', 'days_to_s47']]
ref_s47_match.head()

#### Create final list of Referrals with outcomes

In [None]:
# Merge with S17 outcomes
referral_outcomes = ref.merge(ref_s17_match, on=['Date', 'LAchildID'], how='left')

# Merge with S47 outcomes
referral_outcomes = referral_outcomes.merge(ref_s47_match, on=['Date', 'LAchildID'], how='left')

referral_outcomes.head()

In [None]:
# Add clear outcomes column

# By default, set to NFA
referral_outcomes["referral_outcome"] = 'NFA'

# S17
referral_outcomes.loc[referral_outcomes.AssessmentActualStartDate.notnull(), "referral_outcome"] = 'S17'

# S47
referral_outcomes.loc[referral_outcomes.S47ActualStartDate.notnull(), "referral_outcome"] = 'S47'

# Both
referral_outcomes.loc[(referral_outcomes.S47ActualStartDate.notnull() & 
                       referral_outcomes.AssessmentActualStartDate.notnull()), "referral_outcome"] = 'Both S17 & S47'

### Save

In [None]:
referral_outcomes.to_csv(output_file, index=False)