In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [2]:
teis_p = pd.read_csv('../data/teis_p.csv')
teis_p.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 96580 entries, 0 to 96579
Data columns (total 17 columns):
 #   Column                     Non-Null Count  Dtype 
---  ------                     --------------  ----- 
 0   POE                        96580 non-null  object
 1   Child ID                   96580 non-null  int64 
 2   DOB                        96580 non-null  object
 3   County Name                96576 non-null  object
 4   County SES                 96576 non-null  object
 5   Child Phase                96580 non-null  object
 6   Active                     96580 non-null  object
 7   Service Coordinator        96579 non-null  object
 8   Notification Date          96580 non-null  object
 9   Parent Consent Date        70026 non-null  object
 10  Referral Source Type Name  96580 non-null  object
 11  Initial Eligibility        68221 non-null  object
 12  Initial Eligibility Date   68221 non-null  object
 13  Initial IFSP Date          45493 non-null  object
 14  Latest

In [3]:
teis_c = pd.read_csv('../data/teis_c.csv')
teis_c.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 113256 entries, 0 to 113255
Data columns (total 34 columns):
 #   Column                       Non-Null Count   Dtype  
---  ------                       --------------   -----  
 0   POE                          113256 non-null  object 
 1   Child ID                     113256 non-null  int64  
 2   DOB                          113256 non-null  object 
 3   County Name                  113251 non-null  object 
 4   County SES                   113251 non-null  object 
 5   Child Phase                  113256 non-null  object 
 6   Active                       113256 non-null  object 
 7   Service Coordinator          113255 non-null  object 
 8   Notification/ Referral Date  113256 non-null  object 
 9   Parent Consent Date          58101 non-null   object 
 10  Referral Source Type Name    113256 non-null  object 
 11  Initial Eligibility          40762 non-null   object 
 12  Initial Eligibility Date     40762 non-null   object 
 13 

In [4]:
teis_c = teis_c.rename(columns = {'Notification/ Referral Date' : 'Notification Date'})
teis_p = teis_p.rename(columns = {'County SES ' : 'County SES'})

In [5]:
full_teis= pd.concat([teis_c, teis_p])

In [6]:
full_teis.to_csv('../data/full_teis.csv')

In [7]:
teis_confused = full_teis[full_teis.duplicated(['Child ID', 'Notification Date'], keep=False)]
teis_confused.info()
teis_confused.to_csv('../data/teis_confused.csv')

<class 'pandas.core.frame.DataFrame'>
Int64Index: 188624 entries, 0 to 96579
Data columns (total 34 columns):
 #   Column                       Non-Null Count   Dtype  
---  ------                       --------------   -----  
 0   POE                          188624 non-null  object 
 1   Child ID                     188624 non-null  int64  
 2   DOB                          188624 non-null  object 
 3   County Name                  188616 non-null  object 
 4   County SES                   188616 non-null  object 
 5   Child Phase                  188624 non-null  object 
 6   Active                       188624 non-null  object 
 7   Service Coordinator          188622 non-null  object 
 8   Notification Date            188624 non-null  object 
 9   Parent Consent Date          121910 non-null  object 
 10  Referral Source Type Name    188624 non-null  object 
 11  Initial Eligibility          104631 non-null  object 
 12  Initial Eligibility Date     104631 non-null  object 
 13  

In [8]:
teis_clean = full_teis[~full_teis.duplicated(['POE', 'Child ID', 'DOB', 'County Name', 'County SES', 'Child Phase',
       'Active', 'Service Coordinator', 'Notification Date',
       'Parent Consent Date', 'Referral Source Type Name',
       'Initial Eligibility', 'Initial Eligibility Date', 'Initial IFSP Date',
       'Latest IFSP Date', 'Exit Reason', 'Exit Date'])]
teis_clean.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 191578 entries, 0 to 96579
Data columns (total 34 columns):
 #   Column                       Non-Null Count   Dtype  
---  ------                       --------------   -----  
 0   POE                          191578 non-null  object 
 1   Child ID                     191578 non-null  int64  
 2   DOB                          191578 non-null  object 
 3   County Name                  191570 non-null  object 
 4   County SES                   191570 non-null  object 
 5   Child Phase                  191578 non-null  object 
 6   Active                       191578 non-null  object 
 7   Service Coordinator          191577 non-null  object 
 8   Notification Date            191578 non-null  object 
 9   Parent Consent Date          127024 non-null  object 
 10  Referral Source Type Name    191578 non-null  object 
 11  Initial Eligibility          107265 non-null  object 
 12  Initial Eligibility Date     107265 non-null  object 
 13  

In [9]:
teis_merge = pd.merge(teis_c, teis_p, how='outer')
teis_merge.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 191578 entries, 0 to 191577
Data columns (total 34 columns):
 #   Column                       Non-Null Count   Dtype  
---  ------                       --------------   -----  
 0   POE                          191578 non-null  object 
 1   Child ID                     191578 non-null  int64  
 2   DOB                          191578 non-null  object 
 3   County Name                  191570 non-null  object 
 4   County SES                   191570 non-null  object 
 5   Child Phase                  191578 non-null  object 
 6   Active                       191578 non-null  object 
 7   Service Coordinator          191577 non-null  object 
 8   Notification Date            191578 non-null  object 
 9   Parent Consent Date          127024 non-null  object 
 10  Referral Source Type Name    191578 non-null  object 
 11  Initial Eligibility          107265 non-null  object 
 12  Initial Eligibility Date     107265 non-null  object 
 13 

In [10]:
teis_clean = teis_clean.drop(columns = ['2012 Child Count', '2013 Child Count', 'Service Coordinator Counter', 'Referral Count',
                                        'Evaluation Count', 'Eligibility Count', 'IFSP Count', 'Child Count', ' QTR ', 
                                        'Referral Source Category'])
teis_clean.head(2)

Unnamed: 0,POE,Child ID,DOB,County Name,County SES,Child Phase,Active,Service Coordinator,Notification Date,Parent Consent Date,...,Latest IFSP Date,Exit Reason,Exit Date,Fiscal Year,Notification Month,Tenn Region,Fiscal Year.1,third DOB,Late Referral,Qtr
0,ET,403339,8/9/2014,Blount,Transitional,Eligibility,A,Kristi Borer,7/1/2016,7/1/16,...,,,,2016-2017,Jul,Eastern,2016.0,8/9/2017,,1.0
1,FT,404085,7/31/2013,Unicoi,At-Risk,Notification,I,Jennifer Terranera - 45 days,7/1/2016,,...,,Referral less than 45 days,7/1/2016,2016-2017,Jul,Eastern,2016.0,7/31/2016,1.0,1.0


In [11]:
teis_clean.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 191578 entries, 0 to 96579
Data columns (total 24 columns):
 #   Column                     Non-Null Count   Dtype  
---  ------                     --------------   -----  
 0   POE                        191578 non-null  object 
 1   Child ID                   191578 non-null  int64  
 2   DOB                        191578 non-null  object 
 3   County Name                191570 non-null  object 
 4   County SES                 191570 non-null  object 
 5   Child Phase                191578 non-null  object 
 6   Active                     191578 non-null  object 
 7   Service Coordinator        191577 non-null  object 
 8   Notification Date          191578 non-null  object 
 9   Parent Consent Date        127024 non-null  object 
 10  Referral Source Type Name  191578 non-null  object 
 11  Initial Eligibility        107265 non-null  object 
 12  Initial Eligibility Date   107265 non-null  object 
 13  Initial IFSP Date          531

In [12]:
import re
from tqdm.notebook import tqdm

In [13]:
for ind, row in tqdm(teis_clean.iterrows()):
    if re.search('\d{5}', str(row['Exit Date'])):
        teis_clean.loc[ind, 'Exit Date'] = np.NaN

0it [00:00, ?it/s]

In [14]:
all_teis_dups = teis_clean[teis_clean.duplicated(['Child ID', 'Notification Date'], keep=False)].sort_values('Child ID')
all_teis_dups

Unnamed: 0,POE,Child ID,DOB,County Name,County SES,Child Phase,Active,Service Coordinator,Notification Date,Parent Consent Date,...,Latest IFSP Date,Exit Reason,Exit Date,Fiscal Year,Notification Month,Tenn Region,Fiscal Year.1,third DOB,Late Referral,Qtr
437,NW,351755,11/13/2013,Madison,Transitional,Evaluation,A,Rene Bard,7/12/2016,7/12/16,...,,,,2016-2017,Jul,Western,2016.0,11/13/2016,,1.0
8367,NW,351755,11/13/2013,Madison,Transitional,Eligibility,I,Rene Bard,7/12/2016,7/12/2016,...,,Ineligible for Part C,8/25/2016,,,,,,,
13240,UC,353805,12/4/2013,Putnam,Transitional,IFSP,I,Anna Bolin,8/5/2016,8/15/2016,...,,618 - Part B eligibility not determined,12/3/2016,,,,,,,
1552,UC,353805,12/4/2013,Putnam,Transitional,Evaluation,A,Anna Bolin,8/5/2016,8/15/16,...,,,,2016-2017,Aug,Middle,2016.0,12/4/2016,,1.0
4142,MD,353900,10/9/2013,Shelby,Transitional,Notification,I,JoAnn Hinkle-DA,9/29/2016,,...,,Referral less than 45 days,9/29/2016,2016-2017,Sep,Western,2016.0,10/9/2016,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95952,UC,529587,3/15/2022,Sumner,Competitive,Eligibility,I,Sherry Roberts-DS,5/31/2022,6/8/2022,...,,Ineligible for Part C,6/24/2022,,,,,,,
93343,SE,529597,4/10/2020,McMinn,Transitional,Eligibility,I,Jennifer Rose - ES,5/31/2022,6/4/2022,...,,Ineligible for Part C,6/27/2022,,,,,,,
112947,SE,529597,4/10/2020,McMinn,Transitional,Notification,A,Signal Centers-EEA,5/31/2022,,...,,,,2021-2022,May,Eastern,2021.0,4/10/2023,,4.0
81090,FT,529609,5/13/2020,Washington,Transitional,Notification,I,Alicia Taylor - Admin,5/31/2022,,...,,Unable to contact,7/1/2022,,,,,,,


In [15]:
POE_dups = all_teis_dups[~all_teis_dups.duplicated(['POE', 'Child ID', 'Notification Date'], keep=False)]
POE_dups

Unnamed: 0,POE,Child ID,DOB,County Name,County SES,Child Phase,Active,Service Coordinator,Notification Date,Parent Consent Date,...,Latest IFSP Date,Exit Reason,Exit Date,Fiscal Year,Notification Month,Tenn Region,Fiscal Year.1,third DOB,Late Referral,Qtr
9221,SC,360673,2/19/2014,Rutherford,Competitive,IFSP,I,LaKika King,9/8/2016,9/16/2016,...,2/6/2017,618 - Part B eligible,2/18/2017,,,,,,,
3088,GN,360673,2/19/2014,Davidson,Competitive,Eligibility,A,Dalmys Sanchez,9/8/2016,9/16/16,...,,,,2016-2017,Sep,Middle,2016.0,2/19/2017,,1.0
769,ET,382571,11/5/2014,Sevier,Transitional,IFSP,I,Tonia Hodsden,9/22/2016,9/22/2016,...,4/11/2017,618 - Parent withdraw,9/21/2017,,,,,,,
3810,FT,382571,11/5/2014,Cocke,Distressed,Eligibility,A,Barbara Bowman,9/22/2016,9/22/16,...,,,,2016-2017,Sep,Eastern,2016.0,11/5/2017,,1.0
2435,SW,385467,1/31/2015,Fayette,Transitional,Eligibility,A,Yalunda Whiteside,8/24/2016,9/1/16,...,,,,2016-2017,Aug,Western,2016.0,1/31/2018,,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81455,FT,526514,1/7/2021,Hamblen,Transitional,IFSP,A,Averi Parker,4/11/2022,4/27/2022,...,5/18/2022,,,,,,,,,
95224,UC,527542,10/7/2020,Sumner,Competitive,IFSP,A,Taylor Lee,4/26/2022,5/3/2022,...,5/11/2022,,,,,,,,,
110110,GN,527542,10/7/2020,Robertson,Transitional,Eligibility,A,Taylor Watson,4/26/2022,5/3/2022,...,,,,2021-2022,Apr,Middle,2021.0,10/7/2023,,4.0
111939,GN,529308,6/1/2020,Marshall,Transitional,Evaluation,A,Kim Correll,5/25/2022,6/7/2022,...,,,,2021-2022,May,Middle,2021.0,6/1/2023,,4.0


In [16]:
num_all_dup = len(np.unique(np.array(all_teis_dups['Child ID'])))
num_all_dup

76054

In [17]:
num_POE_dup = len(np.unique(np.array(POE_dups['Child ID'])))
num_POE_dup

1240

In [18]:
county_dups = all_teis_dups[~all_teis_dups.duplicated(['County Name', 'Child ID', 'Notification Date'], keep=False)]
county_dups

Unnamed: 0,POE,Child ID,DOB,County Name,County SES,Child Phase,Active,Service Coordinator,Notification Date,Parent Consent Date,...,Latest IFSP Date,Exit Reason,Exit Date,Fiscal Year,Notification Month,Tenn Region,Fiscal Year.1,third DOB,Late Referral,Qtr
9221,SC,360673,2/19/2014,Rutherford,Competitive,IFSP,I,LaKika King,9/8/2016,9/16/2016,...,2/6/2017,618 - Part B eligible,2/18/2017,,,,,,,
3088,GN,360673,2/19/2014,Davidson,Competitive,Eligibility,A,Dalmys Sanchez,9/8/2016,9/16/16,...,,,,2016-2017,Sep,Middle,2016.0,2/19/2017,,1.0
3729,ET,365886,2/28/2014,Blount,Transitional,IFSP,A,Karen Stock,9/21/2016,,...,9/26/2016,,,2016-2017,Sep,Eastern,2016.0,2/28/2017,,1.0
285,ET,365886,2/28/2014,Knox,Transitional,IFSP,I,Karen Stock,9/21/2016,9/22/2016,...,2/3/2017,618 - Part B eligibility not determined,2/27/2017,,,,,,,
66,FT,381125,9/26/2014,Washington,Transitional,Eligibility,A,Melinda Sutton,7/5/2016,7/25/16,...,,,,2016-2017,Jul,Eastern,2016.0,9/26/2017,,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81300,FT,528738,9/22/2021,Unicoi,At-Risk,IFSP,A,Sarah McKinney,5/16/2022,5/25/2022,...,6/27/2022,,,,,,,,,
111795,FT,528739,9/22/2021,Washington,Transitional,Eligibility,A,Melinda Burrell,5/16/2022,5/25/2022,...,,,,2021-2022,May,Eastern,2021.0,9/22/2024,,4.0
81287,FT,528739,9/22/2021,Unicoi,At-Risk,IFSP,A,Sarah McKinney,5/16/2022,5/25/2022,...,6/27/2022,,,,,,,,,
92411,SE,529465,11/11/2020,Bradley,Transitional,Eligibility,A,Jennifer Steward,5/27/2022,5/31/2022,...,,,,,,,,,,


In [19]:
num_county_dup = len(np.unique(np.array(county_dups['Child ID'])))
num_county_dup

3168

In [20]:
all_teis_dups['Child Phase'].value_counts()

IFSP            52683
Eligibility     48512
Notification    28925
Evaluation      15208
Screening        3720
Referral         3060
Name: Child Phase, dtype: int64

In [21]:
county_status_dups = county_dups[county_dups.duplicated(['Child ID', 'Child Phase', 'Notification Date'], keep=False)]
county_status_dups

Unnamed: 0,POE,Child ID,DOB,County Name,County SES,Child Phase,Active,Service Coordinator,Notification Date,Parent Consent Date,...,Latest IFSP Date,Exit Reason,Exit Date,Fiscal Year,Notification Month,Tenn Region,Fiscal Year.1,third DOB,Late Referral,Qtr
3729,ET,365886,2/28/2014,Blount,Transitional,IFSP,A,Karen Stock,9/21/2016,,...,9/26/2016,,,2016-2017,Sep,Eastern,2016.0,2/28/2017,,1.0
285,ET,365886,2/28/2014,Knox,Transitional,IFSP,I,Karen Stock,9/21/2016,9/22/2016,...,2/3/2017,618 - Part B eligibility not determined,2/27/2017,,,,,,,
12365,UC,385283,1/31/2015,Putnam,Transitional,IFSP,I,Kim Pierce,4/25/2017,5/2/2017,...,10/30/2017,618 - Part B eligibility not determined,1/30/2018,,,,,,,
13302,UC,385283,1/31/2015,Overton,Transitional,IFSP,A,Kim Pierce,4/25/2017,5/2/17,...,5/2/2017,,,2016-2017,Apr,Middle,2016.0,1/31/2018,,4.0
12971,UC,386507,10/24/2014,Sumner,Competitive,IFSP,I,Miriam Espinosa,11/1/2016,11/30/2016,...,6/15/2017,618 - Parent withdraw,9/13/2017,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107545,ET,523720,1/5/2021,Knox,Transitional,IFSP,A,Kristi Hopwood,3/7/2022,3/8/2022,...,4/6/2022,,,2021-2022,Mar,Eastern,2021.0,1/5/2024,,3.0
80374,ET,524519,12/28/2021,Monroe,At-Risk,Eligibility,I,Leslie Milligan,3/15/2022,3/21/2022,...,,Unable to contact,5/31/2022,,,,,,,
109200,SE,524519,12/28/2021,Bradley,Transitional,Eligibility,A,Jaci Robertson,3/15/2022,3/21/2022,...,,,,2021-2022,Mar,Eastern,2021.0,12/28/2024,,3.0
109557,ET,526317,3/21/2021,Sevier,Transitional,Eligibility,A,Cynthia Miller,4/7/2022,4/13/2022,...,,,,2021-2022,Apr,Eastern,2021.0,3/21/2024,,4.0


In [22]:
teis_c[teis_c.duplicated(['Child ID', 'Notification Date'], keep=False)]

Unnamed: 0,POE,Child ID,DOB,County Name,County SES,Child Phase,Active,Service Coordinator,Notification Date,Parent Consent Date,...,Service Coordinator Counter,Fiscal Year.1,Child Count,2012 Child Count,2013 Child Count,third DOB,Late Referral,Qtr,QTR,Referral Source Category


In [23]:
all_teis_dups[['Child ID', 'Initial Eligibility', 'Initial Eligibility Date', 'Initial IFSP Date']].sort_values('Child ID').head(40)

Unnamed: 0,Child ID,Initial Eligibility,Initial Eligibility Date,Initial IFSP Date
437,351755,,,
8367,351755,Ineligible,8/25/2016,
13240,353805,Eligible,10/5/2016,10/10/2016
1552,353805,,,
4142,353900,,,
6352,353900,,,
4093,354011,,,
1258,354011,,,
6373,354135,,,
92,354135,,,


In [24]:
teis_c.columns

Index(['POE', 'Child ID', 'DOB', 'County Name', 'County SES', 'Child Phase',
       'Active', 'Service Coordinator', 'Notification Date',
       'Parent Consent Date', 'Referral Source Type Name',
       'Initial Eligibility', 'Initial Eligibility Date', 'Initial IFSP Date',
       'Latest IFSP Date', 'Exit Reason', 'Exit Date', 'Fiscal Year',
       'Notification Month', 'Tenn Region', 'Referral Count',
       'Evaluation Count', 'Eligibility Count', 'IFSP Count',
       'Service Coordinator Counter', 'Fiscal Year.1', 'Child Count',
       '2012 Child Count', '2013 Child Count', 'third DOB', 'Late Referral',
       'Qtr', ' QTR ', 'Referral Source Category'],
      dtype='object')

In [53]:
teis_c2 = teis_c.drop(columns = ['POE', 'DOB', 'County Name', 'County SES', 'Child Phase', 'Active', 'Service Coordinator', 
                                 'Parent Consent Date', 'Referral Source Type Name', 'Initial Eligibility', 'Initial Eligibility Date', 'Initial IFSP Date', 'Latest IFSP Date', 'Exit Reason', 'Exit Date',
                                 '2012 Child Count', '2013 Child Count', 'Service Coordinator Counter', 'Referral Count', 
                                 'Evaluation Count', 'Eligibility Count', 'IFSP Count', 'Child Count', ' QTR ', 
                                 'Referral Source Category'])
teis_c2

Unnamed: 0,Child ID,Notification Date,Fiscal Year,Notification Month,Tenn Region,Fiscal Year.1,third DOB,Late Referral,Qtr
0,403339,7/1/2016,2016-2017,Jul,Eastern,2016,8/9/2017,,1
1,404085,7/1/2016,2016-2017,Jul,Eastern,2016,7/31/2016,1.0,1
2,403623,7/1/2016,2016-2017,Jul,Eastern,2016,2/5/2019,,1
3,404157,7/1/2016,2016-2017,Jul,Eastern,2016,12/6/2018,,1
4,404154,7/1/2016,2016-2017,Jul,Eastern,2016,4/18/2018,,1
...,...,...,...,...,...,...,...,...,...
113251,528759,5/16/2022,2021-2022,May,Middle,2021,4/7/2024,,4
113252,528534,5/11/2022,2021-2022,May,Middle,2021,3/13/2025,,4
113253,528430,5/10/2022,2021-2022,May,Middle,2021,1/5/2025,,4
113254,528253,5/6/2022,2021-2022,May,Middle,2021,9/12/2023,,4


In [50]:
import re
from tqdm.notebook import tqdm

In [82]:
teis_merge3 = pd.merge(teis_c2, teis_p, how='outer').sort_values(['Child ID'])
teis_merge3

Unnamed: 0,Child ID,Notification Date,Fiscal Year,Notification Month,Tenn Region,Fiscal Year.1,third DOB,Late Referral,Qtr,POE,...,Active,Service Coordinator,Parent Consent Date,Referral Source Type Name,Initial Eligibility,Initial Eligibility Date,Initial IFSP Date,Latest IFSP Date,Exit Reason,Exit Date
25415,104085,1/20/2018,2017-2018,Jan,Middle,2017.0,7/7/2010,1.0,3.0,GN,...,I,,,DCS,,,,,Unable to contact,1/25/2010
2092,350065,8/17/2016,2016-2017,Aug,Middle,2016.0,10/15/2016,1.0,1.0,UC,...,I,Steven Scarlett,,PCP,,,,,Unable to contact,8/31/2016
437,351755,7/12/2016,2016-2017,Jul,Western,2016.0,11/13/2016,,1.0,NW,...,I,Rene Bard,7/12/2016,Parent,Ineligible,8/25/2016,,,Ineligible for Part C,8/25/2016
3369,353800,9/14/2016,2016-2017,Sep,Eastern,2016.0,10/1/2016,1.0,1.0,FT,...,I,Jennifer Terranera - 45 days,,Parent,,,,,Referral less than 45 days,9/15/2016
1552,353805,8/5/2016,2016-2017,Aug,Middle,2016.0,12/4/2016,,1.0,UC,...,I,Anna Bolin,8/15/2016,Dept. of Health,Eligible,10/5/2016,10/10/2016,,618 - Part B eligibility not determined,12/3/2016
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114109,531592,7/1/2022,,,,,,,,GN,...,A,Kim Correll,,PCP,,,,,,
114352,531593,7/1/2022,,,,,,,,GN,...,A,Kim Correll,,PCP,,,,,,
114075,531594,7/1/2022,,,,,,,,GN,...,A,Kim Correll,,Other Health Care Provider,,,,,,
114046,531595,7/1/2022,,,,,,,,GN,...,A,Kim Correll,,Hospital,,,,,,


In [98]:
teis_merge3.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 115524 entries, 25415 to 114959
Data columns (total 24 columns):
 #   Column                     Non-Null Count   Dtype         
---  ------                     --------------   -----         
 0   Child ID                   115524 non-null  int64         
 1   Notification Date          115524 non-null  datetime64[ns]
 2   Fiscal Year                113256 non-null  object        
 3   Notification Month         113256 non-null  object        
 4   Tenn Region                113256 non-null  object        
 5   Fiscal Year.1              113256 non-null  float64       
 6   third DOB                  113256 non-null  datetime64[ns]
 7   Late Referral              2829 non-null    float64       
 8   Qtr                        113256 non-null  float64       
 9   POE                        96580 non-null   object        
 10  DOB                        96580 non-null   datetime64[ns]
 11  County Name                96576 non-null   obje

In [83]:
teis_c3 = teis_c.drop(columns = [
                                 '2012 Child Count', '2013 Child Count', 'Service Coordinator Counter', 'Referral Count', 
                                 'Evaluation Count', 'Eligibility Count', 'IFSP Count', 'Child Count', ' QTR ', 
                                 'Referral Source Category'])
teis_c3

Unnamed: 0,POE,Child ID,DOB,County Name,County SES,Child Phase,Active,Service Coordinator,Notification Date,Parent Consent Date,...,Latest IFSP Date,Exit Reason,Exit Date,Fiscal Year,Notification Month,Tenn Region,Fiscal Year.1,third DOB,Late Referral,Qtr
0,ET,403339,8/9/2014,Blount,Transitional,Eligibility,A,Kristi Borer,7/1/2016,7/1/16,...,,,,2016-2017,Jul,Eastern,2016,8/9/2017,,1
1,FT,404085,7/31/2013,Unicoi,At-Risk,Notification,I,Jennifer Terranera - 45 days,7/1/2016,,...,,Referral less than 45 days,7/1/2016,2016-2017,Jul,Eastern,2016,7/31/2016,1.0,1
2,FT,403623,2/5/2016,Sullivan,Transitional,Eligibility,A,Amy Talbert,7/1/2016,7/20/16,...,,,,2016-2017,Jul,Eastern,2016,2/5/2019,,1
3,FT,404157,12/6/2015,Sullivan,Transitional,IFSP,A,Candice Cradic,7/1/2016,7/22/16,...,8/10/2016,,,2016-2017,Jul,Eastern,2016,12/6/2018,,1
4,FT,404154,4/18/2015,Hawkins,At-Risk,IFSP,A,Kathy Jeffries,7/1/2016,7/18/16,...,7/28/2016,,,2016-2017,Jul,Eastern,2016,4/18/2018,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113251,UC,528759,4/7/2021,Sumner,Competitive,Evaluation,A,Sherry Roberts-DS,5/16/2022,5/26/2022,...,,,,2021-2022,May,Middle,2021,4/7/2024,,4
113252,UC,528534,3/13/2022,Sumner,Competitive,IFSP,A,Robyn Hogan,5/11/2022,5/23/2022,...,6/14/2022,,,2021-2022,May,Middle,2021,3/13/2025,,4
113253,UC,528430,1/5/2022,Cumberland,Transitional,Eligibility,A,Lisa Davis,5/10/2022,5/18/2022,...,,,,2021-2022,May,Middle,2021,1/5/2025,,4
113254,UC,528253,9/12/2020,Sumner,Competitive,IFSP,A,Taylor Lee,5/6/2022,5/11/2022,...,5/31/2022,,,2021-2022,May,Middle,2021,9/12/2023,,4


In [84]:
for ind, row in tqdm(teis_c3.iterrows()):
    if re.search('\d{5}', str(row['Exit Date'])):
        teis_c3.loc[ind, 'Exit Date'] = np.NaN

0it [00:00, ?it/s]

In [85]:
teis_merge3['DOB'] = pd.to_datetime(teis_merge3['DOB'])
teis_merge3['Notification Date'] = pd.to_datetime(teis_merge3['Notification Date'])
teis_merge3['Parent Consent Date'] = pd.to_datetime(teis_merge3['Parent Consent Date'])
teis_merge3['third DOB'] = pd.to_datetime(teis_merge3['third DOB'])
teis_merge3['Exit Date'] = pd.to_datetime(teis_merge3['Exit Date'])
teis_merge3

Unnamed: 0,Child ID,Notification Date,Fiscal Year,Notification Month,Tenn Region,Fiscal Year.1,third DOB,Late Referral,Qtr,POE,...,Active,Service Coordinator,Parent Consent Date,Referral Source Type Name,Initial Eligibility,Initial Eligibility Date,Initial IFSP Date,Latest IFSP Date,Exit Reason,Exit Date
25415,104085,2018-01-20,2017-2018,Jan,Middle,2017.0,2010-07-07,1.0,3.0,GN,...,I,,NaT,DCS,,,,,Unable to contact,2010-01-25
2092,350065,2016-08-17,2016-2017,Aug,Middle,2016.0,2016-10-15,1.0,1.0,UC,...,I,Steven Scarlett,NaT,PCP,,,,,Unable to contact,2016-08-31
437,351755,2016-07-12,2016-2017,Jul,Western,2016.0,2016-11-13,,1.0,NW,...,I,Rene Bard,2016-07-12,Parent,Ineligible,8/25/2016,,,Ineligible for Part C,2016-08-25
3369,353800,2016-09-14,2016-2017,Sep,Eastern,2016.0,2016-10-01,1.0,1.0,FT,...,I,Jennifer Terranera - 45 days,NaT,Parent,,,,,Referral less than 45 days,2016-09-15
1552,353805,2016-08-05,2016-2017,Aug,Middle,2016.0,2016-12-04,,1.0,UC,...,I,Anna Bolin,2016-08-15,Dept. of Health,Eligible,10/5/2016,10/10/2016,,618 - Part B eligibility not determined,2016-12-03
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114109,531592,2022-07-01,,,,,NaT,,,GN,...,A,Kim Correll,NaT,PCP,,,,,,NaT
114352,531593,2022-07-01,,,,,NaT,,,GN,...,A,Kim Correll,NaT,PCP,,,,,,NaT
114075,531594,2022-07-01,,,,,NaT,,,GN,...,A,Kim Correll,NaT,Other Health Care Provider,,,,,,NaT
114046,531595,2022-07-01,,,,,NaT,,,GN,...,A,Kim Correll,NaT,Hospital,,,,,,NaT


In [86]:
teis_c3['DOB'] = pd.to_datetime(teis_c3['DOB'])
teis_c3['Notification Date'] = pd.to_datetime(teis_c3['Notification Date'])
teis_c3['Parent Consent Date'] = pd.to_datetime(teis_c3['Parent Consent Date'])
teis_c3['third DOB'] = pd.to_datetime(teis_c3['third DOB'])
teis_c3['Exit Date'] = pd.to_datetime(teis_c3['Exit Date'])
teis_c3

Unnamed: 0,POE,Child ID,DOB,County Name,County SES,Child Phase,Active,Service Coordinator,Notification Date,Parent Consent Date,...,Latest IFSP Date,Exit Reason,Exit Date,Fiscal Year,Notification Month,Tenn Region,Fiscal Year.1,third DOB,Late Referral,Qtr
0,ET,403339,2014-08-09,Blount,Transitional,Eligibility,A,Kristi Borer,2016-07-01,2016-07-01,...,,,NaT,2016-2017,Jul,Eastern,2016,2017-08-09,,1
1,FT,404085,2013-07-31,Unicoi,At-Risk,Notification,I,Jennifer Terranera - 45 days,2016-07-01,NaT,...,,Referral less than 45 days,2016-07-01,2016-2017,Jul,Eastern,2016,2016-07-31,1.0,1
2,FT,403623,2016-02-05,Sullivan,Transitional,Eligibility,A,Amy Talbert,2016-07-01,2016-07-20,...,,,NaT,2016-2017,Jul,Eastern,2016,2019-02-05,,1
3,FT,404157,2015-12-06,Sullivan,Transitional,IFSP,A,Candice Cradic,2016-07-01,2016-07-22,...,8/10/2016,,NaT,2016-2017,Jul,Eastern,2016,2018-12-06,,1
4,FT,404154,2015-04-18,Hawkins,At-Risk,IFSP,A,Kathy Jeffries,2016-07-01,2016-07-18,...,7/28/2016,,NaT,2016-2017,Jul,Eastern,2016,2018-04-18,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113251,UC,528759,2021-04-07,Sumner,Competitive,Evaluation,A,Sherry Roberts-DS,2022-05-16,2022-05-26,...,,,NaT,2021-2022,May,Middle,2021,2024-04-07,,4
113252,UC,528534,2022-03-13,Sumner,Competitive,IFSP,A,Robyn Hogan,2022-05-11,2022-05-23,...,6/14/2022,,NaT,2021-2022,May,Middle,2021,2025-03-13,,4
113253,UC,528430,2022-01-05,Cumberland,Transitional,Eligibility,A,Lisa Davis,2022-05-10,2022-05-18,...,,,NaT,2021-2022,May,Middle,2021,2025-01-05,,4
113254,UC,528253,2020-09-12,Sumner,Competitive,IFSP,A,Taylor Lee,2022-05-06,2022-05-11,...,5/31/2022,,NaT,2021-2022,May,Middle,2021,2023-09-12,,4


In [128]:
teis_merge2 = pd.merge(teis_merge3, teis_c3, on=['Child ID', 'Notification Date'], how='left')
teis_merge2

Unnamed: 0,Child ID,Notification Date,Fiscal Year_x,Notification Month_x,Tenn Region_x,Fiscal Year.1_x,third DOB_x,Late Referral_x,Qtr_x,POE_x,...,Latest IFSP Date_y,Exit Reason_y,Exit Date_y,Fiscal Year_y,Notification Month_y,Tenn Region_y,Fiscal Year.1_y,third DOB_y,Late Referral_y,Qtr_y
0,104085,2018-01-20,2017-2018,Jan,Middle,2017.0,2010-07-07,1.0,3.0,GN,...,,Unable to contact,2010-01-25,2017-2018,Jan,Middle,2017.0,2010-07-07,1.0,3.0
1,350065,2016-08-17,2016-2017,Aug,Middle,2016.0,2016-10-15,1.0,1.0,UC,...,,Unable to contact,2016-08-31,2016-2017,Aug,Middle,2016.0,2016-10-15,1.0,1.0
2,351755,2016-07-12,2016-2017,Jul,Western,2016.0,2016-11-13,,1.0,NW,...,,,NaT,2016-2017,Jul,Western,2016.0,2016-11-13,,1.0
3,353800,2016-09-14,2016-2017,Sep,Eastern,2016.0,2016-10-01,1.0,1.0,FT,...,,Referral less than 45 days,2016-09-15,2016-2017,Sep,Eastern,2016.0,2016-10-01,1.0,1.0
4,353805,2016-08-05,2016-2017,Aug,Middle,2016.0,2016-12-04,,1.0,UC,...,,,NaT,2016-2017,Aug,Middle,2016.0,2016-12-04,,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115519,531592,2022-07-01,,,,,NaT,,,GN,...,,,NaT,,,,,NaT,,
115520,531593,2022-07-01,,,,,NaT,,,GN,...,,,NaT,,,,,NaT,,
115521,531594,2022-07-01,,,,,NaT,,,GN,...,,,NaT,,,,,NaT,,
115522,531595,2022-07-01,,,,,NaT,,,GN,...,,,NaT,,,,,NaT,,


In [129]:
teis_merge2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 115524 entries, 0 to 115523
Data columns (total 46 columns):
 #   Column                       Non-Null Count   Dtype         
---  ------                       --------------   -----         
 0   Child ID                     115524 non-null  int64         
 1   Notification Date            115524 non-null  datetime64[ns]
 2   Fiscal Year_x                113256 non-null  object        
 3   Notification Month_x         113256 non-null  object        
 4   Tenn Region_x                113256 non-null  object        
 5   Fiscal Year.1_x              113256 non-null  float64       
 6   third DOB_x                  113256 non-null  datetime64[ns]
 7   Late Referral_x              2829 non-null    float64       
 8   Qtr_x                        113256 non-null  float64       
 9   POE_x                        96580 non-null   object        
 10  DOB_x                        96580 non-null   datetime64[ns]
 11  County Name_x             

In [130]:
teis_merge2['Referral Source Type Name_y'] = teis_merge2['Referral Source Type Name_y'].fillna(teis_merge2['Referral Source Type Name_x'])



In [131]:

teis_merge2 = teis_merge2.drop(columns = ['Referral Source Type Name_x'])
teis_merge2 = teis_merge2.rename(columns= {'Referral Source Type Name_y' : 'Referral Source Type Name'})
teis_merge2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 115524 entries, 0 to 115523
Data columns (total 45 columns):
 #   Column                      Non-Null Count   Dtype         
---  ------                      --------------   -----         
 0   Child ID                    115524 non-null  int64         
 1   Notification Date           115524 non-null  datetime64[ns]
 2   Fiscal Year_x               113256 non-null  object        
 3   Notification Month_x        113256 non-null  object        
 4   Tenn Region_x               113256 non-null  object        
 5   Fiscal Year.1_x             113256 non-null  float64       
 6   third DOB_x                 113256 non-null  datetime64[ns]
 7   Late Referral_x             2829 non-null    float64       
 8   Qtr_x                       113256 non-null  float64       
 9   POE_x                       96580 non-null   object        
 10  DOB_x                       96580 non-null   datetime64[ns]
 11  County Name_x               96576 non-n

In [132]:
teis_merge2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 115524 entries, 0 to 115523
Data columns (total 45 columns):
 #   Column                      Non-Null Count   Dtype         
---  ------                      --------------   -----         
 0   Child ID                    115524 non-null  int64         
 1   Notification Date           115524 non-null  datetime64[ns]
 2   Fiscal Year_x               113256 non-null  object        
 3   Notification Month_x        113256 non-null  object        
 4   Tenn Region_x               113256 non-null  object        
 5   Fiscal Year.1_x             113256 non-null  float64       
 6   third DOB_x                 113256 non-null  datetime64[ns]
 7   Late Referral_x             2829 non-null    float64       
 8   Qtr_x                       113256 non-null  float64       
 9   POE_x                       96580 non-null   object        
 10  DOB_x                       96580 non-null   datetime64[ns]
 11  County Name_x               96576 non-n

In [133]:
all_dups2 = teis_merge2[teis_merge2.duplicated(['Child ID', 'Notification Date'], keep=False)].sort_values('Child ID')

In [134]:
reref = teis_merge2[teis_merge2.duplicated('Child ID')].sort_values('Child ID') 
reref.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 18781 entries, 25 to 113753
Data columns (total 45 columns):
 #   Column                      Non-Null Count  Dtype         
---  ------                      --------------  -----         
 0   Child ID                    18781 non-null  int64         
 1   Notification Date           18781 non-null  datetime64[ns]
 2   Fiscal Year_x               18535 non-null  object        
 3   Notification Month_x        18535 non-null  object        
 4   Tenn Region_x               18535 non-null  object        
 5   Fiscal Year.1_x             18535 non-null  float64       
 6   third DOB_x                 18535 non-null  datetime64[ns]
 7   Late Referral_x             384 non-null    float64       
 8   Qtr_x                       18535 non-null  float64       
 9   POE_x                       8223 non-null   object        
 10  DOB_x                       8223 non-null   datetime64[ns]
 11  County Name_x               8222 non-null   object  

In [135]:
num_reref = len(np.unique(np.array(reref['Child ID'])))
num_reref

15641

In [136]:
n_reref = reref.groupby(['Referral Source Type Name']).count()
n_reref

Unnamed: 0_level_0,Child ID,Notification Date,Fiscal Year_x,Notification Month_x,Tenn Region_x,Fiscal Year.1_x,third DOB_x,Late Referral_x,Qtr_x,POE_x,...,Latest IFSP Date_y,Exit Reason_y,Exit Date_y,Fiscal Year_y,Notification Month_y,Tenn Region_y,Fiscal Year.1_y,third DOB_y,Late Referral_y,Qtr_y
Referral Source Type Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Child Care provider,52,52,52,52,52,52,52,3,52,25,...,2,20,20,52,52,52,52,52,3,52
Community/Child Find Activity,43,43,43,43,43,43,43,2,43,19,...,3,15,15,43,43,43,43,43,2,43
DCS,4211,4211,4176,4176,4176,4176,4176,94,4176,1484,...,132,2104,2094,4176,4176,4176,4176,4176,94,4176
DHS,2,2,2,2,2,2,2,0,2,1,...,0,0,0,2,2,2,2,2,0,2
Dept. of Health,672,672,667,667,667,667,667,8,667,262,...,38,315,315,667,667,667,667,667,8,667
EIRA,61,61,59,59,59,59,59,2,59,22,...,1,27,27,59,59,59,59,59,2,59
Early Head Start/Head Start,167,167,166,166,166,166,166,6,166,93,...,10,52,52,166,166,166,166,166,6,166
Family and Friends,98,98,95,95,95,95,95,11,95,61,...,8,32,31,95,95,95,95,95,11,95
Foster Parent,115,115,114,114,114,114,114,0,114,86,...,15,21,21,114,114,114,114,114,0,114
Hospital,1169,1169,1163,1163,1163,1163,1163,7,1163,283,...,45,585,585,1163,1163,1163,1163,1163,7,1163
