In [1]:
import boto3
import io
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
#import awswrangler as wr

plt.style.use('fivethirtyeight')  #fivethirtyeight  #seaborn-colorblind #ggplot  #seaborn-paper

import warnings
warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

#### November Prediction Data

In [2]:
session = boto3.Session()
s3 = session.client('s3')

In [3]:
bucket_name = 'ds-collections-dev'
file_dir = 'predictions/parquet/'

response = s3.list_objects(Bucket = bucket_name, Prefix = file_dir + 'df_run_dt_2021_12')

In [4]:
%%time

df = []
for i in range(len(response['Contents'])):
    key = response['Contents'][i]['Key']
    obj = s3.get_object(Bucket = bucket_name, Key = key)
    df_temp = pd.read_parquet(io.BytesIO(obj['Body'].read()))
    df.append(df_temp)
    print(i,' = ',response['Contents'][i]['Key'])
    
df = pd.concat(df, ignore_index = True)

0  =  predictions/parquet/df_run_dt_2021_12_01_00_00.parquet
1  =  predictions/parquet/df_run_dt_2021_12_02_00_00.parquet
2  =  predictions/parquet/df_run_dt_2021_12_03_00_00.parquet
3  =  predictions/parquet/df_run_dt_2021_12_04_00_00.parquet
4  =  predictions/parquet/df_run_dt_2021_12_06_00_00.parquet
5  =  predictions/parquet/df_run_dt_2021_12_07_00_00.parquet
6  =  predictions/parquet/df_run_dt_2021_12_08_00_00.parquet
7  =  predictions/parquet/df_run_dt_2021_12_09_00_00.parquet
8  =  predictions/parquet/df_run_dt_2021_12_10_00_00.parquet
9  =  predictions/parquet/df_run_dt_2021_12_11_00_00.parquet
10  =  predictions/parquet/df_run_dt_2021_12_13_00_00.parquet
11  =  predictions/parquet/df_run_dt_2021_12_14_00_00.parquet
12  =  predictions/parquet/df_run_dt_2021_12_15_00_00.parquet
13  =  predictions/parquet/df_run_dt_2021_12_16_00_00.parquet
CPU times: user 1.24 s, sys: 668 ms, total: 1.91 s
Wall time: 5.55 s


In [5]:
def avg_payment_day(col):
    conditions  = [df[col] <= 5, (df[col] >5) & (df[col] <= 10),  (df[col]>=11) ]
    choices     = [ '1_5', '6_10' , '11_30']
    df[col + '_bucket'] = np.select(conditions, choices, default='missing')

In [6]:
avg_payment_day('avgpmtday_6m')

In [7]:
df_sub= df[['loanid', 'rundt', 'avgpmtday_6m_bucket', 'down2_probability', 'decile_all', 'decile_ex_policy']]

In [8]:
#df_sub['avgpmtday_6m_bucket'].unique()

#### Raw Data and Merge (to get RiskScore and Campaign Profile)

In [9]:
%%time
bucket_name = 'ds-collections-dev'
file_name = 'data/drop_box/collections_data.parquet'


obj = s3.get_object(Bucket = bucket_name, Key = file_name)
df_campaign = pd.read_parquet(io.BytesIO(obj['Body'].read())) 

df_campaign['loanid'] = df_campaign['LOANID']
df_campaign['rundt'] = df_campaign['RUNDT']
df_campaign['rundt'] = pd.to_datetime(df_campaign['rundt'])

CPU times: user 3min 33s, sys: 47.9 s, total: 4min 21s
Wall time: 3min 6s


In [10]:
df_daily = df_sub.merge(df_campaign[['loanid', 'rundt', 'CAMPAIGNPROFILE','RISKSCORE']], how='left', 
                         on = ['loanid', 'rundt'])

# new campaign profiles 
#df_daily['AvgPmtDay_6M'] = df_daily['avgpmtday_6m'].astype(str)
df_daily['Campaign']=np.where(df_daily['CAMPAIGNPROFILE']!='Never Delinquent', df_daily['CAMPAIGNPROFILE'], 
                        (df_daily['CAMPAIGNPROFILE'] + '_' + df_daily['avgpmtday_6m_bucket']))

#### Rollback, Policy, and PastDue flags

In [11]:
df_daily['nextpaymentduedt'] = pd.Timestamp('20211001')
df_daily['rundt'] = pd.to_datetime(df_daily['rundt'])

In [12]:
df_daily = df_daily[df_daily['rundt']>'2021-12-01']

In [13]:
#df_daily = df_daily.dropna(how='all')

In [14]:
df_daily['rollback_flag'] = df['decile_ex_policy'].isin(['Policy_Rollback', 'Rollback'])
df_daily['policy_flag'] = df['decile_ex_policy'].isin (['Policy'])
df_daily['PastDue'] = df_daily['PastDue'] = np.round((df_daily['rundt'] - df_daily['nextpaymentduedt'])/np.timedelta64(1, 'D'), 0).astype(int)

#### Create a Challenger flag

In [15]:
df_month = df_daily
df_month['lastdigit'] = df_month['loanid']%10

In [16]:
digits = [0,1,2,3,9]
df_month['challenger'] = np.where(df_month['lastdigit'].isin(digits),1,0)
#np.where( ((df_month['lastdigit']==8.0) | (df_month['lastdigit']==9.0)), 1, 0)

In [17]:
df_month =df_month[df_month['rundt']>= '2021-12-02']

#### Function to show champion and challenger

In [18]:
def rollrate_fn (segment):
    #pd.options.display.float_format = '{:.2%}'.format
    column_order = [1,0]
    group = pd.pivot_table(segment, values='loanid', aggfunc = "count", index=['rundt'], columns=['challenger'], 
                        margins=False).reindex(column_order, axis=1).rename_axis(None, axis=0)
    group.columns = ['challenger', 'champion']
    group['challenger remaining%'] =((1+ group['challenger'].pct_change().fillna(0).add(1).cumprod().sub(1))*100).round(2)
    group['champion remaining%'] = ((1+ group['champion'].pct_change().fillna(0).add(1).cumprod().sub(1))*100).round(2)
    group['lift'] = ((group['champion remaining%'] - group['challenger remaining%']))
    return group

#### Function to get ADCO model based Deciles

In [19]:
def recalc_deciles(score):
    score_method = 'RISKSCORE'
    rank_method = 'rank'
    decile_method =  'Decile_ADCO'
    rollback = 0

    decile_table = pd.DataFrame()
    
    #deciles are re-sorted everyday
    for i in score['PastDue'].sort_values().unique():
        PastDue = i 
        score_day = score[score['PastDue']==PastDue]
        ser, bins = pd.qcut(score_day[score_method], 10, retbins=True, labels=False,duplicates='drop')
        score_day[rank_method]= pd.cut(score_day[score_method], bins=bins, labels=False,include_lowest=True)
        score_day[rank_method]= score_day[rank_method].fillna(10)

        # Assign decile names (D1, D2, etc.) based on pre-determeind rank on 1st day
        col         = rank_method
        conditions  = [(score_day[col] >=0) & (score_day[col] < 1), (score_day[col] >=1) & (score_day[col] < 2) , (score_day[col] >=2) & (score_day[col] < 3),
                       (score_day[col] >=3) & (score_day[col] < 4) ,  (score_day[col] >=4) & (score_day[col] < 5) ,  (score_day[col] >=5) & (score_day[col] < 6) ,
                       (score_day[col] >=6) & (score_day[col] < 7) ,  (score_day[col] >=7) & (score_day[col] < 8) ,  (score_day[col] >=8) & (score_day[col] < 9),
                       (score_day[col] >=9) & (score_day[col] < 10) , (score_day[col] >= 10)]
        choices     = [ 'D1','D2','D3','D4','D5','D6','D7','D8','D9','D10','NoScore' ]

        score_day[decile_method] = np.select(conditions, choices, default= 'NoScore')
        #score_day['PastDue']=i
        #score_day=score_day.set_index(['PastDue']) 
        decile_table= decile_table.append(score_day, ignore_index=True)
    return decile_table

In [20]:
# #first
# segment = df_month
# segment_group = segment.sort_values('rundt').groupby(['loanid'])
# first = segment_group.head(1)
# first['first_decile']= first['decile_ex_policy']
# first = first[['loanid','first_decile']]
# segment = segment.merge(first, how='left', on =['loanid'])

# rollrate_fn(segment)

#### All loans 

In [21]:
rollrate_fn(df_month)

Unnamed: 0,challenger,champion,challenger remaining%,champion remaining%,lift
2021-12-02,506947,507762,100.0,100.0,0.0
2021-12-03,450945,451962,88.95,89.01,0.06
2021-12-04,390693,391356,77.07,77.07,0.0
2021-12-06,341624,342170,67.39,67.39,0.0
2021-12-07,305995,306797,60.36,60.42,0.06
2021-12-08,279302,280680,55.09,55.28,0.19
2021-12-09,257592,258733,50.81,50.96,0.15
2021-12-10,237485,238529,46.85,46.98,0.13
2021-12-11,191131,192040,37.7,37.82,0.12
2021-12-13,176046,176859,34.73,34.83,0.1


#### Exclude Rollback

In [22]:
no_rollback = df_month[df_month['rollback_flag']==False]
rollrate_fn(no_rollback)

Unnamed: 0,challenger,champion,challenger remaining%,champion remaining%,lift
2021-12-02,506943,507754,100.0,100.0,0.0
2021-12-03,449637,450705,88.7,88.76,0.06
2021-12-04,387839,388483,76.51,76.51,0.0
2021-12-06,338475,338838,66.77,66.73,-0.04
2021-12-07,302012,302797,59.58,59.63,0.05
2021-12-08,275040,276244,54.25,54.41,0.16
2021-12-09,252804,253854,49.87,50.0,0.13
2021-12-10,232178,233210,45.8,45.93,0.13
2021-12-11,185003,185748,36.49,36.58,0.09
2021-12-13,169725,170369,33.48,33.55,0.07


#### Exclude Rollback and Policy 

In [23]:
no_policy = no_rollback[no_rollback['policy_flag']==False]
rollrate_fn(no_policy)

Unnamed: 0,challenger,champion,challenger remaining%,champion remaining%,lift
2021-12-02,467780,468465,100.0,100.0,0.0
2021-12-03,413297,414076,88.35,88.39,0.04
2021-12-04,354690,355718,75.82,75.93,0.11
2021-12-06,307518,308154,65.74,65.78,0.04
2021-12-07,274202,275022,58.62,58.71,0.09
2021-12-08,248178,249079,53.05,53.17,0.12
2021-12-09,227634,228501,48.66,48.78,0.12
2021-12-10,208965,209743,44.67,44.77,0.1
2021-12-11,164761,165676,35.22,35.37,0.15
2021-12-13,151476,152016,32.38,32.45,0.07


####  By Decile

In [24]:
def calc_remaining_percent(pop):
    for col in list(pop):
        new_col =  col +'%'
        pop[new_col] = 1+ pop[col].pct_change().fillna(0).add(1).cumprod().sub(1)
    

def rollrate_decile_fn(segment):
    #fix decile as of first run date
    
    segment_group = segment.sort_values('rundt').groupby(['loanid'])
    first = segment_group.head(1)
    first['first_decile']= first['decile_ex_policy']
    first = first[['loanid','first_decile']]
    segment = segment.merge(first, how='left', on =['loanid'])
    
    challenger = segment[segment['challenger']==1]
    challenger_pop= pd.pivot_table(challenger, values='loanid', aggfunc = "count", index=['rundt'], columns=[ 'first_decile']).rename_axis(None, axis=0)
    challenger_pop =challenger_pop.reindex(['D1','D2','D3','D4','D5','D6','D7','D8','D9','D10'], axis=1)
    
    champion = segment[segment['challenger']==0]
    champion_pop= pd.pivot_table(champion, values='loanid', aggfunc = "count", index=['rundt'], columns=['first_decile']).rename_axis(None, axis=0)
    champion_pop =champion_pop.reindex(['D1','D2','D3','D4','D5','D6','D7','D8','D9','D10'], axis=1)

    calc_remaining_percent(challenger_pop)
    calc_remaining_percent(champion_pop)
    
    lift = ((champion_pop[champion_pop.columns[10:20]] - challenger_pop[challenger_pop.columns[10:20]])*100).round(2)
    
    return challenger_pop, champion_pop, lift

In [25]:
#no_policy = recalc_deciles(df_month)

In [26]:
display(rollrate_fn(no_policy))

# exclue rollbacks and policy loans 
challenger_pop, champion_pop, lift = rollrate_decile_fn(no_policy)
#display(challenger_pop,champion_pop)
display(lift)

Unnamed: 0,challenger,champion,challenger remaining%,champion remaining%,lift
2021-12-02,467780,468465,100.0,100.0,0.0
2021-12-03,413297,414076,88.35,88.39,0.04
2021-12-04,354690,355718,75.82,75.93,0.11
2021-12-06,307518,308154,65.74,65.78,0.04
2021-12-07,274202,275022,58.62,58.71,0.09
2021-12-08,248178,249079,53.05,53.17,0.12
2021-12-09,227634,228501,48.66,48.78,0.12
2021-12-10,208965,209743,44.67,44.77,0.1
2021-12-11,164761,165676,35.22,35.37,0.15
2021-12-13,151476,152016,32.38,32.45,0.07


first_decile,D1%,D2%,D3%,D4%,D5%,D6%,D7%,D8%,D9%,D10%
2021-12-02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-12-03,-0.46,0.16,-0.02,0.12,0.24,-0.12,-0.08,-0.44,0.04,0.42
2021-12-04,0.06,0.14,-0.32,0.65,0.3,-0.18,-0.46,0.19,0.03,0.28
2021-12-06,-0.03,0.27,-0.9,0.94,-0.13,-0.38,-0.22,0.43,0.16,0.48
2021-12-07,-0.23,0.6,-0.27,0.75,0.0,-0.45,-0.37,-0.23,0.75,0.42
2021-12-08,-0.13,0.79,-0.46,0.48,0.59,-0.34,-0.63,0.04,0.94,0.3
2021-12-09,0.06,0.58,-0.02,0.51,0.39,-0.69,-0.86,0.29,0.87,0.74
2021-12-10,0.15,0.65,0.06,0.19,0.47,-0.54,-0.71,0.21,0.86,0.13
2021-12-11,0.09,0.49,-0.11,0.58,-0.04,-0.17,-0.01,-0.02,0.83,0.06
2021-12-13,-0.22,0.15,-0.37,0.33,-0.01,-0.17,-0.29,0.39,0.57,0.85


####  By Campaign Profile

In [27]:
#overall
group1 = pd.pivot_table(no_policy, values='loanid', aggfunc = "count", index=['rundt'], columns=['challenger', 'Campaign'], 
                        margins=False)
group1   

challenger,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1
Campaign,Always Delinquent,Delinquent Loss Mitigation,Delinquent MOD,Early Payment Default,First Payment Default,First Time Default,Infrequent Delinquent,Never Delinquent_11_30,Never Delinquent_1_5,Never Delinquent_6_10,Newly Boarded,Recurring Delinquent,Regularly Delinquent,Always Delinquent,Delinquent Loss Mitigation,Delinquent MOD,Early Payment Default,First Payment Default,First Time Default,Infrequent Delinquent,Never Delinquent_11_30,Never Delinquent_1_5,Never Delinquent_6_10,Newly Boarded,Recurring Delinquent,Regularly Delinquent
rundt,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2
2021-12-02,3584,2652,435,403,21,2191,22189,143360,153308,110715,10718,11290,7290,3539,2640,429,401,26,2231,22044,143118,152802,110156,11127,11418,7567
2021-12-03,3435,2536,452,409,29,2377,21388,138031,114371,103890,8944,10937,6995,3374,2569,453,379,33,2328,21172,137644,114036,103770,9170,10881,7218
2021-12-04,3197,2407,507,392,36,2444,19923,131925,75259,95193,7301,10328,6537,3173,2418,500,381,29,2476,19736,131353,75225,94646,7479,10196,6829
2021-12-06,3114,2332,507,380,38,2424,18906,126990,41594,89063,6378,9921,6271,3021,2399,514,351,31,2486,18705,126478,42063,88393,6607,9794,6463
2021-12-07,2870,2180,517,348,38,2373,17910,122409,27971,77288,5461,9520,5914,2872,2236,517,337,36,2478,17704,121767,28250,76809,5527,9292,6175
2021-12-08,2719,2089,526,333,42,2288,17121,118210,21301,64617,4910,9015,5698,2708,2145,527,323,45,2381,16734,118012,21370,64015,4952,8929,5846
2021-12-09,2622,2019,526,315,38,2301,16320,114107,17168,54357,4375,8710,5449,2662,2081,522,317,47,2349,15881,113740,17174,54057,4442,8590,5604
2021-12-10,2509,1942,525,320,42,2204,15553,110277,13722,45075,3910,8304,5154,2589,2025,512,319,42,2304,15202,109771,13698,44895,3918,8190,5334
2021-12-11,2289,1746,550,284,39,2134,14031,98805,9195,21396,2980,7481,4587,2305,1805,537,290,40,2222,13620,98326,9155,21246,3014,7378,4675
2021-12-13,2230,1702,546,276,41,2148,13450,91031,8146,17853,2732,7290,4411,2290,1762,532,274,45,2220,13071,90776,8148,17828,2771,7048,4569


In [28]:
# High Risk
model_pop = []
cp =  ['Always Delinquent', 'Delinquent Loss Mitigation', 'First Time Default','Delinquent MOD']
model_pop = no_policy[no_policy['Campaign'].isin(cp)]

display(rollrate_fn(model_pop))

#by decile
challenger_pop, champion_pop, lift = rollrate_decile_fn(model_pop)
display(lift)

Unnamed: 0,challenger,champion,challenger remaining%,champion remaining%,lift
2021-12-02,8839,8862,100.0,100.0,0.0
2021-12-03,8724,8800,98.7,99.3,0.6
2021-12-04,8567,8555,96.92,96.54,-0.38
2021-12-06,8420,8377,95.26,94.53,-0.73
2021-12-07,8103,7940,91.67,89.6,-2.07
2021-12-08,7761,7622,87.8,86.01,-1.79
2021-12-09,7614,7468,86.14,84.27,-1.87
2021-12-10,7430,7180,84.06,81.02,-3.04
2021-12-11,6869,6719,77.71,75.82,-1.89
2021-12-13,6804,6626,76.98,74.77,-2.21


first_decile,D1%,D2%,D3%,D4%,D5%,D6%,D7%,D8%,D9%,D10%
2021-12-02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-12-03,0.0,0.0,-14.29,10.71,13.33,-16.41,-12.96,5.58,-6.4,0.52
2021-12-04,0.0,0.0,11.43,21.43,23.33,-3.08,-7.41,7.51,-2.32,0.43
2021-12-06,-50.0,-25.0,-2.86,28.57,-0.0,-5.13,-10.65,8.22,-2.42,0.09
2021-12-07,-50.0,0.0,17.14,-21.43,10.0,1.54,-6.02,8.42,-3.6,-3.19
2021-12-08,-50.0,-50.0,-31.43,25.0,-10.0,8.21,-36.11,1.27,-0.24,-3.06
2021-12-09,-50.0,-25.0,2.86,25.0,-3.33,-21.54,-19.91,12.78,-4.08,-0.6
2021-12-10,-50.0,-25.0,-11.43,32.14,-0.0,-0.51,-24.07,4.92,-5.41,-2.8
2021-12-11,-50.0,-25.0,-25.71,-3.57,-20.0,6.15,-38.43,1.42,1.63,-2.55
2021-12-13,-50.0,-25.0,-25.71,-10.71,-16.67,-0.51,-26.85,-0.3,-0.67,-0.56


In [29]:
# Early Payment Default
model_pop = []
cp =  ['Early Payment Default']
model_pop = no_policy[no_policy['Campaign'].isin(cp)]

display(rollrate_fn(model_pop))

#by decile
challenger_pop, champion_pop, lift = rollrate_decile_fn(model_pop)
display(lift)

Unnamed: 0,challenger,champion,challenger remaining%,champion remaining%,lift
2021-12-02,401,403,100.0,100.0,0.0
2021-12-03,379,409,94.51,101.49,6.98
2021-12-04,381,392,95.01,97.27,2.26
2021-12-06,351,380,87.53,94.29,6.76
2021-12-07,337,348,84.04,86.35,2.31
2021-12-08,323,333,80.55,82.63,2.08
2021-12-09,317,315,79.05,78.16,-0.89
2021-12-10,319,320,79.55,79.4,-0.15
2021-12-11,290,284,72.32,70.47,-1.85
2021-12-13,274,276,68.33,68.49,0.16


first_decile,D1%,D2%,D3%,D4%,D5%,D6%,D7%,D8%,D9%,D10%
2021-12-02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-12-03,0.0,0.0,0.0,0.0,0.0,0.0,39.29,33.57,-13.75,5.74
2021-12-04,0.0,0.0,0.0,0.0,0.0,66.67,-16.07,53.57,-9.58,-0.93
2021-12-06,0.0,0.0,0.0,-50.0,66.67,66.67,23.21,35.0,-9.17,-3.81
2021-12-07,0.0,0.0,0.0,-50.0,33.33,66.67,-19.64,11.43,-8.33,3.73
2021-12-08,0.0,0.0,0.0,-50.0,33.33,66.67,-33.93,11.43,-2.5,2.69
2021-12-09,0.0,0.0,0.0,-50.0,-16.67,66.67,-8.93,7.86,-20.42,1.21
2021-12-10,0.0,0.0,0.0,-50.0,16.67,66.67,-8.93,-2.14,-7.5,5.26
2021-12-11,0.0,0.0,0.0,-50.0,16.67,66.67,-10.71,-6.43,-18.33,7.28
2021-12-13,0.0,0.0,0.0,-50.0,16.67,66.67,-10.71,7.86,-12.5,5.85


In [30]:
# Regularly
model_pop = []
cp =  ['Regularly Delinquent']
model_pop = no_policy[no_policy['Campaign'].isin(cp)]

display(rollrate_fn(model_pop))

#by decile
challenger_pop, champion_pop, lift = rollrate_decile_fn(model_pop)
display(lift)

Unnamed: 0,challenger,champion,challenger remaining%,champion remaining%,lift
2021-12-02,7567,7290,100.0,100.0,0.0
2021-12-03,7218,6995,95.39,95.95,0.56
2021-12-04,6829,6537,90.25,89.67,-0.58
2021-12-06,6463,6271,85.41,86.02,0.61
2021-12-07,6175,5914,81.6,81.12,-0.48
2021-12-08,5846,5698,77.26,78.16,0.9
2021-12-09,5604,5449,74.06,74.75,0.69
2021-12-10,5334,5154,70.49,70.7,0.21
2021-12-11,4675,4587,61.78,62.92,1.14
2021-12-13,4569,4411,60.38,60.51,0.13


first_decile,D1%,D2%,D3%,D4%,D5%,D6%,D7%,D8%,D9%,D10%
2021-12-02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-12-03,0.0,0.0,0.0,50.0,-33.33,-30.0,-20.83,-6.59,-3.47,1.42
2021-12-04,0.0,0.0,0.0,16.67,-33.33,-75.0,8.33,10.81,2.28,0.7
2021-12-06,0.0,0.0,0.0,50.0,-33.33,-55.0,33.33,15.75,8.33,3.73
2021-12-07,0.0,0.0,0.0,50.0,-33.33,-35.0,16.67,10.99,-0.61,2.01
2021-12-08,0.0,0.0,0.0,16.67,-33.33,-55.0,12.5,14.84,4.79,2.06
2021-12-09,0.0,0.0,0.0,-16.67,-33.33,-55.0,25.0,9.16,10.11,3.7
2021-12-10,0.0,0.0,0.0,-16.67,-33.33,-15.0,4.17,15.02,7.54,-0.34
2021-12-11,0.0,0.0,0.0,-16.67,16.67,-35.0,4.17,17.95,9.52,-0.16
2021-12-13,0.0,0.0,0.0,-16.67,16.67,5.0,20.83,12.27,-0.01,-0.33


In [31]:
# Recurring
model_pop = []
cp =  ['Recurring Delinquent']
model_pop = no_policy[no_policy['Campaign'].isin(cp)]

display(rollrate_fn(model_pop))

#by decile
challenger_pop, champion_pop, lift = rollrate_decile_fn(model_pop)
display(lift)

Unnamed: 0,challenger,champion,challenger remaining%,champion remaining%,lift
2021-12-02,11418,11290,100.0,100.0,0.0
2021-12-03,10881,10937,95.3,96.87,1.57
2021-12-04,10196,10328,89.3,91.48,2.18
2021-12-06,9794,9921,85.78,87.87,2.09
2021-12-07,9292,9520,81.38,84.32,2.94
2021-12-08,8929,9015,78.2,79.85,1.65
2021-12-09,8590,8710,75.23,77.15,1.92
2021-12-10,8190,8304,71.73,73.55,1.82
2021-12-11,7378,7481,64.62,66.26,1.64
2021-12-13,7048,7290,61.73,64.57,2.84


first_decile,D1%,D2%,D3%,D4%,D5%,D6%,D7%,D8%,D9%,D10%
2021-12-02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-12-03,0.0,20.0,25.0,15.38,0.0,-8.31,-1.19,2.59,4.07,1.42
2021-12-04,50.0,60.0,-25.0,57.69,-32.14,14.62,0.84,9.84,5.29,1.06
2021-12-06,0.0,-10.0,-25.0,10.26,-82.14,-5.08,16.25,9.99,3.93,1.26
2021-12-07,0.0,10.0,0.0,60.26,-28.57,-21.08,8.6,7.91,4.56,2.59
2021-12-08,0.0,10.0,0.0,11.54,-7.14,-17.54,15.23,0.94,5.88,0.78
2021-12-09,0.0,30.0,25.0,20.51,-14.29,-2.31,12.01,3.84,3.99,1.21
2021-12-10,50.0,10.0,25.0,20.51,-17.86,-18.0,3.88,5.26,4.95,0.41
2021-12-11,50.0,10.0,-25.0,6.41,-39.29,-6.62,13.56,2.25,3.41,0.67
2021-12-13,50.0,10.0,-25.0,14.1,-32.14,-6.77,9.86,4.99,4.93,1.61


In [32]:
# Infrequent DQ
model_pop = []
cp =  ['Infrequent Delinquent']
model_pop = no_policy[no_policy['Campaign'].isin(cp)]

display(rollrate_fn(model_pop))

challenger_pop, champion_pop, lift = rollrate_decile_fn(model_pop)
#display(challenger_pop,champion_pop)
display(lift)

Unnamed: 0,challenger,champion,challenger remaining%,champion remaining%,lift
2021-12-02,22044,22189,100.0,100.0,0.0
2021-12-03,21172,21388,96.04,96.39,0.35
2021-12-04,19736,19923,89.53,89.79,0.26
2021-12-06,18705,18906,84.85,85.2,0.35
2021-12-07,17704,17910,80.31,80.72,0.41
2021-12-08,16734,17121,75.91,77.16,1.25
2021-12-09,15881,16320,72.04,73.55,1.51
2021-12-10,15202,15553,68.96,70.09,1.13
2021-12-11,13620,14031,61.79,63.23,1.44
2021-12-13,13071,13450,59.3,60.62,1.32


first_decile,D1%,D2%,D3%,D4%,D5%,D6%,D7%,D8%,D9%,D10%
2021-12-02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-12-03,13.89,0.88,4.39,1.29,-1.97,-8.52,-2.74,-2.55,1.08,0.2
2021-12-04,30.56,2.64,9.65,-3.25,12.01,-6.19,-0.98,-3.88,0.55,0.56
2021-12-06,36.11,-6.15,8.33,4.33,6.59,-11.35,2.1,-1.64,1.68,0.24
2021-12-07,22.22,-13.26,12.28,2.69,0.16,-12.03,-1.96,-0.04,1.39,0.25
2021-12-08,22.22,1.03,9.21,0.73,3.02,-9.48,3.56,-1.27,2.99,1.63
2021-12-09,5.56,-0.66,12.72,0.66,0.73,-4.22,3.97,-0.42,2.36,1.86
2021-12-10,-2.78,-2.93,2.19,4.24,-3.89,-7.36,3.35,-1.5,3.36,1.57
2021-12-11,-2.78,5.93,-4.39,-1.81,3.9,-2.71,5.54,1.33,2.14,2.04
2021-12-13,-2.78,3.66,2.63,-1.33,4.74,0.15,2.86,0.79,2.04,1.85


In [33]:
# Never DQ
cp =  ['Never Delinquent_1_5']
model_pop = no_policy[no_policy['Campaign'].isin(cp)]

display(rollrate_fn(model_pop))

#deciles
challenger_pop, champion_pop, lift = rollrate_decile_fn(model_pop)
display(lift)

Unnamed: 0,challenger,champion,challenger remaining%,champion remaining%,lift
2021-12-02,152802,153308,100.0,100.0,0.0
2021-12-03,114036,114371,74.63,74.6,-0.03
2021-12-04,75225,75259,49.23,49.09,-0.14
2021-12-06,42063,41594,27.53,27.13,-0.4
2021-12-07,28250,27971,18.49,18.24,-0.25
2021-12-08,21370,21301,13.99,13.89,-0.1
2021-12-09,17174,17168,11.24,11.2,-0.04
2021-12-10,13698,13722,8.96,8.95,-0.01
2021-12-11,9155,9195,5.99,6.0,0.01
2021-12-13,8148,8146,5.33,5.31,-0.02


first_decile,D1%,D2%,D3%,D4%,D5%,D6%,D7%,D8%,D9%,D10%
2021-12-02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-12-03,-0.71,-0.13,0.33,0.32,0.5,0.3,-0.19,-0.81,-0.01,1.09
2021-12-04,-0.46,-0.27,-0.41,0.32,-0.04,0.36,-0.69,-0.3,-0.45,0.9
2021-12-06,-0.26,-0.29,-0.8,0.3,-0.8,-0.91,-0.42,0.17,-0.61,1.45
2021-12-07,-0.36,-0.14,-0.47,0.4,-0.16,-0.67,-0.76,-0.14,0.23,1.19
2021-12-08,-0.18,0.14,-0.38,0.57,0.01,-0.69,-0.9,-0.08,0.54,2.22
2021-12-09,-0.04,0.25,-0.02,0.05,0.28,-0.49,-0.8,-0.09,0.25,1.48
2021-12-10,0.03,0.23,-0.0,-0.13,0.29,-0.64,-0.47,-0.21,0.76,0.73
2021-12-11,0.05,-0.04,-0.33,-0.24,-0.09,-0.02,0.03,-0.04,0.88,0.41
2021-12-13,-0.06,0.03,-0.23,-0.38,-0.17,-0.06,-0.01,0.05,0.64,0.62


In [34]:
# Never DQ 6-10 (including policy)
cp =  ['Never Delinquent_6_10']
model_pop = df_month[df_month['Campaign'].isin(cp)]

display(rollrate_fn(model_pop))

challenger_pop, champion_pop, lift = rollrate_decile_fn(model_pop)
display(lift)

Unnamed: 0,challenger,champion,challenger remaining%,champion remaining%,lift
2021-12-02,119536,119987,100.0,100.0,0.0
2021-12-03,113145,113469,94.65,94.57,-0.08
2021-12-04,104138,104536,87.12,87.12,0.0
2021-12-06,98159,98684,82.12,82.25,0.13
2021-12-07,85431,86016,71.47,71.69,0.22
2021-12-08,72042,72791,60.27,60.67,0.4
2021-12-09,60797,61167,50.86,50.98,0.12
2021-12-10,50973,51301,42.64,42.76,0.12
2021-12-11,24692,24847,20.66,20.71,0.05
2021-12-13,20760,20768,17.37,17.31,-0.06


first_decile,D1%,D2%,D3%,D4%,D5%,D6%,D7%,D8%,D9%,D10%
2021-12-02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-12-03,0.01,0.26,-0.37,-0.36,0.06,-0.03,0.18,-0.72,-0.27,0.03
2021-12-04,-0.02,0.26,-0.59,0.13,0.3,0.1,-0.01,-0.77,-0.19,0.23
2021-12-06,0.39,0.49,-0.68,0.27,0.55,0.17,0.29,-1.2,-0.23,0.79
2021-12-07,0.75,1.03,-0.44,-0.05,0.21,0.23,0.06,-0.94,0.39,0.54
2021-12-08,0.84,1.62,-0.49,-0.25,0.76,0.84,0.06,-1.12,1.11,0.11
2021-12-09,0.79,0.93,-0.53,-0.16,0.37,0.19,-0.9,-0.6,0.93,0.69
2021-12-10,0.79,0.94,-0.28,-0.3,-0.05,0.2,-0.78,-0.58,0.88,0.93
2021-12-11,0.12,-0.01,0.29,1.1,-0.69,0.15,-0.47,-0.34,1.07,1.9
2021-12-13,0.15,-0.37,0.13,0.96,-0.89,-0.0,-0.45,-0.23,0.59,1.89


In [35]:
# Never DQ (excluding policy)
cp =  ['Never Delinquent_6_10']
model_pop = no_policy[no_policy['Campaign'].isin(cp)]

display(rollrate_fn(model_pop))

challenger_pop, champion_pop, lift = rollrate_decile_fn(model_pop)
display(lift)

Unnamed: 0,challenger,champion,challenger remaining%,champion remaining%,lift
2021-12-02,110156,110715,100.0,100.0,0.0
2021-12-03,103770,103890,94.2,93.84,-0.36
2021-12-04,94646,95193,85.92,85.98,0.06
2021-12-06,88393,89063,80.24,80.44,0.2
2021-12-07,76809,77288,69.73,69.81,0.08
2021-12-08,64015,64617,58.11,58.36,0.25
2021-12-09,54057,54357,49.07,49.1,0.03
2021-12-10,44895,45075,40.76,40.71,-0.05
2021-12-11,21246,21396,19.29,19.33,0.04
2021-12-13,17828,17853,16.18,16.13,-0.05


first_decile,D1%,D2%,D3%,D4%,D5%,D6%,D7%,D8%,D9%,D10%
2021-12-02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-12-03,0.21,-0.22,-0.77,-0.22,-0.76,-0.78,0.4,-1.33,-0.42,-1.06
2021-12-04,0.55,-0.49,-0.35,0.55,0.4,-0.73,0.25,0.04,-0.42,-1.75
2021-12-06,0.75,0.13,-1.21,0.93,0.34,0.02,0.39,-0.63,0.55,0.2
2021-12-07,0.22,1.39,-0.39,-0.02,-0.44,-0.39,0.07,-1.23,1.02,-0.56
2021-12-08,0.83,1.73,-0.83,-0.27,1.13,0.05,-0.13,-1.12,1.16,-1.28
2021-12-09,0.54,0.4,-0.69,-0.36,0.34,-0.17,-0.52,0.04,1.26,-0.33
2021-12-10,0.87,0.59,-0.26,-0.49,0.08,-0.06,-1.3,-0.57,0.22,0.53
2021-12-11,0.17,-0.26,0.15,1.06,-0.88,-0.16,-0.21,-0.14,1.04,2.05
2021-12-13,0.04,-0.41,0.15,0.75,-0.58,0.23,-0.55,-0.02,0.53,0.56


In [36]:
# Never DQ 11+ (including policy)
cp =  ['Never Delinquent_11_30']
model_pop = df_month[df_month['Campaign'].isin(cp)]

display(rollrate_fn(model_pop))

challenger_pop, champion_pop, lift = rollrate_decile_fn(model_pop)
display(lift)

Unnamed: 0,challenger,champion,challenger remaining%,champion remaining%,lift
2021-12-02,155244,155544,100.0,100.0,0.0
2021-12-03,150494,150900,96.94,97.01,0.07
2021-12-04,144998,145418,93.4,93.49,0.09
2021-12-06,140504,140982,90.51,90.64,0.13
2021-12-07,136238,136830,87.76,87.97,0.21
2021-12-08,132788,133385,85.54,85.75,0.21
2021-12-09,129121,129612,83.17,83.33,0.16
2021-12-10,124922,125341,80.47,80.58,0.11
2021-12-11,114067,114510,73.48,73.62,0.14
2021-12-13,105467,105926,67.94,68.1,0.16


first_decile,D1%,D2%,D3%,D4%,D5%,D6%,D7%,D8%,D9%,D10%
2021-12-02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-12-03,-0.04,0.18,0.2,0.13,0.23,-0.23,-0.11,0.16,0.04,-0.03
2021-12-04,0.2,0.37,-0.17,0.36,0.28,-0.43,-0.09,0.37,0.22,-0.33
2021-12-06,0.15,0.85,0.15,0.34,0.28,-0.32,0.04,0.36,0.24,-0.43
2021-12-07,-0.1,1.09,0.52,0.22,0.35,-0.03,0.17,0.33,0.34,-0.33
2021-12-08,0.22,1.25,0.48,0.24,0.35,0.04,-0.02,0.36,0.26,-0.39
2021-12-09,0.07,1.14,0.49,0.58,0.41,-0.28,0.33,-0.0,0.07,-0.33
2021-12-10,0.18,1.19,0.22,0.22,0.57,0.08,0.52,-0.11,0.02,-0.83
2021-12-11,0.38,1.67,0.2,0.1,0.41,0.39,0.24,-0.62,0.23,-0.92
2021-12-13,0.24,1.62,0.18,0.04,0.59,0.18,0.69,-0.64,0.13,-0.57


In [37]:
# Never DQ 11+ (excluding policy)
cp =  ['Never Delinquent_11_30']
model_pop = no_policy[no_policy['Campaign'].isin(cp)]

display(rollrate_fn(model_pop))

challenger_pop, champion_pop, lift = rollrate_decile_fn(model_pop)
display(lift)

Unnamed: 0,challenger,champion,challenger remaining%,champion remaining%,lift
2021-12-02,143118,143360,100.0,100.0,0.0
2021-12-03,137644,138031,96.18,96.28,0.1
2021-12-04,131353,131925,91.78,92.02,0.24
2021-12-06,126478,126990,88.37,88.58,0.21
2021-12-07,121767,122409,85.08,85.39,0.31
2021-12-08,118012,118210,82.46,82.46,0.0
2021-12-09,113740,114107,79.47,79.59,0.12
2021-12-10,109771,110277,76.7,76.92,0.22
2021-12-11,98326,98805,68.7,68.92,0.22
2021-12-13,90776,91031,63.43,63.5,0.07


first_decile,D1%,D2%,D3%,D4%,D5%,D6%,D7%,D8%,D9%,D10%
2021-12-02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-12-03,-0.61,0.93,0.01,-0.59,0.76,0.34,-0.33,0.28,0.01,-0.06
2021-12-04,0.59,1.63,-0.34,0.22,0.39,0.34,-0.7,0.72,-0.0,-0.22
2021-12-06,-0.26,1.72,-0.72,0.22,0.29,0.87,-0.38,1.05,-0.15,-0.23
2021-12-07,-0.22,1.39,0.2,0.5,0.67,0.76,-0.11,-0.15,0.5,-0.21
2021-12-08,-0.9,1.17,-0.17,-0.59,0.81,0.63,-0.54,0.48,0.26,-1.03
2021-12-09,0.0,1.48,0.75,0.48,0.56,-0.37,-0.78,0.32,0.55,-0.67
2021-12-10,-0.06,1.65,0.77,-0.23,1.17,0.29,-0.2,0.56,0.32,-1.14
2021-12-11,0.59,2.26,0.65,-0.1,0.76,0.79,0.3,-0.4,0.1,-2.18
2021-12-13,-0.38,1.02,-0.82,-0.2,0.63,0.39,-0.11,0.44,-0.09,-0.06


#### Comment Activity

In [38]:
bucket_name = 'ppsg-collections'
commentactivity_file = 'CommentActivity/comment_activity_20211102_21211115.parquet'

#s3://ppsg-collections/CommentActivity/comment_activity_20211102_21211115.parquet

obj = s3.get_object(Bucket = bucket_name, Key = commentactivity_file)
comments = pd.read_parquet(io.BytesIO(obj['Body'].read())) 

In [39]:
comments= comments.rename(columns={'LOANID': 'loanid'})

In [40]:
#comments =comments.sort_values('COMMENTDT').groupby(['loanid'])

In [41]:
#challenger_flag
comments['challenger'] = np.where((comments['loanid']%10).isin(digits),1,0)

In [42]:
comments['SentDate'] = pd.to_datetime(comments['COMMENTDT'])
comments['weekday'] = comments['SentDate'].dt.day_name()

# email sent day is Monday, then set rundate = Saturday
comments['rundt'] = np.where( (comments['weekday'] == 'Monday'), (comments['SentDate'] + pd.Timedelta(days= -2)),
                          (comments['SentDate'] + pd.Timedelta(days= -1)))

In [43]:
## Email flag
comments['email_flag'] = np.where((comments['AGENTNAME'].isin(['Email Collections', 'Email CS Payment Reminder'])),1,0)

print(comments[comments['email_flag']==1].count()[0])                                

121576


In [44]:
## Call flag
comments['dialer_flag'] = np.where(comments['WORKTRACKINGWORKTYPE']=='Outbound Dialer',1,0)
print(comments[comments['dialer_flag']==1].count()[0])

comments['manual_flag'] = np.where(comments['WORKTRACKINGWORKTYPE']=='Outbound Manual',1,0)
print(comments[comments['manual_flag']==1].count()[0])

212882
24551


In [45]:
# comments['email_flag'] = comments['email_flag'].astype('str')
# comments['dialer_flag'] = comments['dialer_flag'].astype('str')
# comments['manual_flag'] = comments['manual_flag'].astype('str')

In [46]:
comments['email_sentDate'] = np.where((comments['email_flag']==1),comments['COMMENTDT'], np.nan)
comments['dialer_sentDate'] = np.where((comments['dialer_flag']==1),comments['COMMENTDT'], np.nan)
comments['manual_sentDate'] = np.where((comments['manual_flag']==1),comments['COMMENTDT'], np.nan)

In [47]:
# email sent day is Monday, then set rundate = Saturday
comments['rundt'] = np.where( (comments['weekday'] == 'Monday'), (comments['SentDate'] + pd.Timedelta(days= -2)),
                          (comments['SentDate'] + pd.Timedelta(days= -1)))

comments['date'] = np.where( (comments['weekday'] == 'Saturday'), (comments['rundt'] + pd.Timedelta(days= 2)),
                          (comments['rundt'] + pd.Timedelta(days= 1)))

In [48]:
#Remove columns and remove dups 
comments = comments.drop(columns=['AGENTID', 'COMMENTTIME', 'RUNDT'])
comments= comments.drop_duplicates()
print(comments.shape)

(481746, 23)


In [49]:
comments = comments[['loanid', 'rundt', 'SentDate', 'weekday', 'email_flag', 'dialer_flag', 'manual_flag', 
                     'email_sentDate', 'dialer_sentDate', 'manual_sentDate']]

In [50]:
%%time
df_comments = df_month.merge(comments, how='right', on = ['loanid', 'rundt'])
print(df_comments.shape)

(481746, 23)
CPU times: user 3.99 s, sys: 858 ms, total: 4.85 s
Wall time: 4.84 s


In [51]:
# email count challenger
deciles = ['D1','D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9', 'D10']
df_comments_challenger= df_comments[(df_comments['email_flag']==1)&
                                    (df_comments['challenger']==1)&
                                    (df_comments['decile_ex_policy'].isin(deciles))]
df_comments_challenger.groupby('SentDate')['loanid'].count()

Series([], Name: loanid, dtype: int64)

In [52]:
# email count champion
deciles = ['D1','D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9', 'D10']
df_comments_champion= df_comments[(df_comments['email_flag']==1)&
                                    (df_comments['challenger']==0)&
                                    (df_comments['decile_ex_policy'].isin(deciles))]
df_comments_champion.groupby('SentDate')['loanid'].count()

Series([], Name: loanid, dtype: int64)

In [53]:
# dialer count challenger
df_comments_challenger= df_comments[(df_comments['dialer_flag']==1)&
                                    (df_comments['challenger']==1)&
                                    (df_comments['decile_ex_policy'].isin(deciles))]
df_comments_challenger.groupby('dialer_sentDate')['loanid'].count()

Series([], Name: loanid, dtype: int64)

In [54]:
# dialer count champion
df_comments_champion= df_comments[(df_comments['dialer_flag']==1)&
                                    (df_comments['challenger']==0)&
                                    (df_comments['decile_ex_policy'].isin(deciles))]
df_comments_champion.groupby('dialer_sentDate')['loanid'].count()

Series([], Name: loanid, dtype: int64)

In [55]:
# manual count challenger
df_comments_challenger= df_comments[(df_comments['manual_flag']==1)&
                                    (df_comments['challenger']==1)&
                                    (df_comments['decile_ex_policy'].isin(deciles))]
df_comments_challenger.groupby('manual_sentDate')['loanid'].count()

Series([], Name: loanid, dtype: int64)

In [56]:
# manual count champion
df_comments_champion= df_comments[(df_comments['manual_flag']==1)&
                                    (df_comments['challenger']==0)&
                                    (df_comments['decile_ex_policy'].isin(deciles))]
df_comments_champion.groupby('manual_sentDate')['loanid'].count()

Series([], Name: loanid, dtype: int64)

### Get Last Run Date

In [57]:
df_month_group = df_month.sort_values('rundt').groupby(['loanid'])

last = df_month_group.tail(1)
last['LastRunDate']= last['rundt']
last = last[['loanid','LastRunDate']]

#merge with df
df_comments = df_comments.merge(last, how='left', on =['loanid'])
df_comments['nextpaymentduedt'] = pd.Timestamp('20211101')

In [58]:
#delete rows where last run date is na
df_comments= df_comments[df_comments['LastRunDate'].notnull()]

In [59]:
df_comments['PastDue'] = np.round((df_comments['LastRunDate'] - df_comments['nextpaymentduedt'])/np.timedelta64(1, 'D'), 0).astype(int)

In [60]:
df_comments['Campaign']=np.where(df_comments['CAMPAIGNPROFILE']!='Never Delinquent', df_comments['CAMPAIGNPROFILE'], 
                        (df_comments['CAMPAIGNPROFILE'] + '_' + df_comments['avgpmtday_6m_bucket']))

In [61]:
#df_comments['email_flag'] = df_comments['email_flag'].astype('str')

### A/B Testing: Challenger vs. Champion  (Same decile and Same campaign)

In [62]:
## Code fills missing dates in a nextpaymentduedt period
def expanding_code(df, loanid_col = 'loanid', rundt_col = 'rundt', nxt_pmt = 'nextpaymentduedt'
                  , rundtnum_col = 'rundatenumber'):

    ln_nxt_pmt= df.groupby([loanid_col, nxt_pmt]).agg({'rundt':['min'], 'LastRunDate':['max']} )
    ln_nxt_pmt.columns = ['dt_min_mnth', 'dt_max_mnth']
    ln_nxt_pmt = ln_nxt_pmt.reset_index()
    
    ## Creating a list of dates between dt_min_mnth and dt_max_mnth of each row
    ln_nxt_pmt['date_range'] = ln_nxt_pmt.apply(lambda row: pd.date_range(row['dt_min_mnth']
                                                                          , row['dt_max_mnth'])
                                               , axis = 1)
    ln_nxt_pmt = ln_nxt_pmt.drop([nxt_pmt, 'dt_min_mnth', 'dt_max_mnth'] , axis = 1)
    ln_nxt_pmt = ln_nxt_pmt.explode('date_range', ignore_index = True)
    ln_nxt_pmt.columns = [loanid_col, rundt_col]
    
    df = df.merge(ln_nxt_pmt, how = 'right', on = [loanid_col, rundt_col])
    
    ## Forward filling
    index_period_na = np.where(df[nxt_pmt].isna())[0]
    prev_ind = index_period_na - 1

    ## Hanlding consecutive missing data
    diffs = np.diff(prev_ind) != 1
    indexes = np.nonzero(diffs)[0] + 1
    rng = np.arange(len(prev_ind))
    groups = np.split(rng, indexes)
    groups = [i for i in groups if len(i) > 1]

    for grp in groups:
        equal_to = prev_ind[grp[0]]
        prev_ind[grp] = equal_to
        
    dt_ser = df[rundt_col]
    df.loc[index_period_na] = df.loc[prev_ind].values
    df[rundt_col] = dt_ser
    df[rundtnum_col] = df[rundt_col].dt.strftime('%Y%m%d').astype('float')
    df[nxt_pmt] = pd.to_datetime(df[nxt_pmt])
    
    return df

In [63]:
def rollrate_fn_2 (segment, activity):
    #segment: data
    #activity: email_pop, dialer_pop, manual_pop
    
    segment_group = segment.sort_values('rundt').groupby(['loanid'])
    first = segment_group.head(1)
    first['first_decile']= first['decile_ex_policy']
    first = first[['loanid','first_decile']]
    segment = segment.merge(first, how='left', on =['loanid'])
    
    #challenger
    column_order = [1,0]
    group1 = pd.pivot_table(segment[segment['challenger']==1], values='loanid', aggfunc = "count", index=['rundt'], 
                            columns=[activity], margins=False).reindex(column_order, axis=1).rename_axis(None, axis=0)
   
    #champion
    column_order = [1,0]
    group2 = pd.pivot_table(segment[segment['challenger']==0], values='loanid', aggfunc = "count", index=['rundt'], 
                            columns=[activity],  margins=False).reindex(column_order, axis=1).rename_axis(None, axis=0)
    
    # rmove first row
    #group1 = group1.iloc[1: , :]
    group1.columns = ['yes', 'no']
    group1['yes remaining%'] = (1+ group1['yes'].pct_change().fillna(0).add(1).cumprod().sub(1))*100
    group1['no remaining%'] = (1+ group1['no'].pct_change().fillna(0).add(1).cumprod().sub(1))*100
    
    # rmove first row
    #group2 = group2.iloc[1: , :]
    group2.columns = ['yes', 'no']
    group2['yes remaining%'] = (1+ group2['yes'].pct_change().fillna(0).add(1).cumprod().sub(1))*100
    group2['no remaining%'] = (1+ group2['no'].pct_change().fillna(0).add(1).cumprod().sub(1))*100
    
    #update challenger table with cahmpion yes
    group1['no'] = group2['yes']
    group1['no remaining%'] = group2['yes remaining%']
    group1.rename(columns={'yes': 'challenger', 'no': 'champion'}, inplace=True)
    group1.rename(columns={'yes remaining%': 'challenger remaining%', 'no remaining%': 'champion remaining%'}, inplace=True)
    group1['lift'] = (group1['champion remaining%'] - group1['challenger remaining%'])
    return group1.round(2), group2.round(2)

In [64]:
def rollrate_fn_decile (segment, activity):
    #segment: data
    #activity: email_pop, dialer_pop, manual_pop
    
    segment_group = segment.sort_values('rundt').groupby(['loanid'])
    first = segment_group.head(1)
    first['first_decile']= first['decile_ex_policy']
    first = first[['loanid','first_decile']]
    segment = segment.merge(first, how='left', on =['loanid'])
    
    
    #challenger
    group1 = pd.pivot_table(segment[segment['challenger']==1], values='loanid', aggfunc = "count", index=['rundt'], 
                            columns=[activity, 'first_decile'], margins=False)
   
    #champion
    group2 = pd.pivot_table(segment[segment['challenger']==0], values='loanid', aggfunc = "count", index=['rundt'], 
                            columns=[activity,'first_decile'],  margins=False)
    
 
    group1_per = (1+ group1.pct_change().fillna(0).add(1).cumprod().sub(1))*100
    group2_per = (1+ group2.pct_change().fillna(0).add(1).cumprod().sub(1))*100

    
    #update challenger table with cahmpion yes
    lift = group2_per - group1_per
    return group1, group2, lift.round(2)