In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re
from tqdm.notebook import tqdm
import pickle 

%matplotlib inline

In [2]:
with open('../data/teis_final_data.pickle', 'rb') as file:
    final_data = pickle.load(file)

In [6]:
final_data.columns = [
    col.strip().lower().replace(' ', '_').replace('.', '_') 
    for col in 
    final_data.columns]
final_data.dtypes

child_id                              int64
notification_date            datetime64[ns]
fiscal_year                          object
notification_month                   object
tenn_region                          object
fiscal_year_1                       float64
third_dob                    datetime64[ns]
late_referral                       float64
qtr                                 float64
poe                                  object
dob                          datetime64[ns]
county_name                          object
county_ses                           object
child_phase                          object
active                               object
service_coordinator                  object
parent_consent               datetime64[ns]
initial_eligibility                  object
initial_eligibility_date             object
initial_ifsp_date            datetime64[ns]
latest_ifsp                  datetime64[ns]
exit_reason                          object
exit_date                    dat

In [7]:
final_data.columns

Index(['child_id', 'notification_date', 'fiscal_year', 'notification_month',
       'tenn_region', 'fiscal_year_1', 'third_dob', 'late_referral', 'qtr',
       'poe', 'dob', 'county_name', 'county_ses', 'child_phase', 'active',
       'service_coordinator', 'parent_consent', 'initial_eligibility',
       'initial_eligibility_date', 'initial_ifsp_date', 'latest_ifsp',
       'exit_reason', 'exit_date', 'referral_source_type_name', 'year'],
      dtype='object')

### What is the rate of re-referral? 

In [8]:
def reref_calc(df, proportion = False, as_percentage = False):
    """Use value counts over 1 to determine how many or what proportion of children get re-referrals."""
    if proportion:
        calc = (df['child_id'].value_counts() > 1).mean()
    else:
        calc = (df['child_id'].value_counts() > 1).sum()
    
    if as_percentage:
        return "{:.2%}".format(calc)
    else:
        return calc

In [9]:
print(reref_calc(final_data, proportion = True, as_percentage = True), "of all children get re-referrals")

16.17% of all children get re-referrals


### How many children have multiple referrals by year and by POE?

by year

In [10]:
ref_by_year = final_data.groupby('fiscal_year').apply(reref_calc)
ref_by_year

fiscal_year
2016-2017    1347
2017-2018    1412
2018-2019    1398
2019-2020    1468
2020-2021    1681
2021-2022    1474
dtype: int64

by POE & year

In [11]:
final_data.groupby(['poe','fiscal_year']).apply(reref_calc).sort_index()

poe  fiscal_year
ET   2016-2017      245
     2017-2018      252
     2018-2019      247
     2019-2020      249
     2020-2021      281
     2021-2022      234
FT   2016-2017      133
     2017-2018      166
     2018-2019      156
     2019-2020      121
     2020-2021      113
     2021-2022      117
GN   2016-2017      247
     2017-2018      235
     2018-2019      194
     2019-2020      259
     2020-2021      362
     2021-2022      306
MD   2016-2017      264
     2017-2018      265
     2018-2019      307
     2019-2020      205
     2020-2021      238
     2021-2022      221
NW   2016-2017       65
     2017-2018       78
     2018-2019       65
     2019-2020       86
     2020-2021       84
     2021-2022       75
SC   2016-2017      155
     2017-2018      156
     2018-2019      136
     2019-2020      218
     2020-2021      227
     2021-2022      186
SE   2016-2017       49
     2017-2018       60
     2018-2019       76
     2019-2020      102
     2020-2021       89