In [1]:
import pandas as pd
import numpy as np 
from datetime import date
from dateutil.relativedelta import relativedelta
from pandas.tseries.offsets import MonthEnd
from functools import reduce

import warnings
warnings.filterwarnings("ignore")

##### MoM Data Import

In [3]:
df_mom = pd.read_csv('D://Varthana/MOM_DATA_28-Sep-20.csv')

#### Date format conversion

In [5]:
df_mom['DT_BUSINESSDATE'] = pd.to_datetime(df_mom['DT_BUSINESSDATE'])
df_mom['DT_INSTALLMENTDUE'] = pd.to_datetime(df_mom['DT_INSTALLMENTDUE'])
df_mom['DT_LAST_PAYMENT'] = pd.to_datetime(df_mom['DT_LAST_PAYMENT'])
df_mom['DT_CREATED'] = pd.to_datetime(df_mom['DT_CREATED'])
df_mom['DT_LAST_UPDATE'] = pd.to_datetime(df_mom['DT_LAST_UPDATE'])


### Adding disb date to MoM data

In [7]:
LMS_det_all = pd.read_csv('D://Varthana/LMS_DETAILS_11-Sep-20.csv')

In [9]:
LMS_det_all_1 = pd.DataFrame(LMS_det_all[['LOAN_ID','FIRST_DISB_DATE']])

In [10]:
LMS_det_all_1['FIRST_DISB_DATE'] = pd.to_datetime(LMS_det_all_1['FIRST_DISB_DATE'])

In [11]:
df_mom_2  = pd.merge(df_mom, LMS_det_all_1,how="left",left_on="SZ_LOAN_ACCOUNT_NO", right_on="LOAN_ID")

#### Retain a copy

In [12]:
df_mom_0_30 = df_mom_2.copy()

#### Retain only Dec'19, Sep'19, Jun'19, Mar'19 Snapshots for analysis

In [14]:
bus_dates = ['2019-07-31','2019-08-31','2019-09-30','2019-10-31','2019-11-30','2019-12-31']
# df_mom_0_30['DT_BUSINESSDATE'].head()

In [15]:
df_mom_4_snaps = df_mom_0_30[df_mom_0_30['DT_BUSINESSDATE'].isin(bus_dates)]

In [17]:
df_mom_4_snaps['DT_BUSINESSDATE'].value_counts()

2019-12-31    5053
2019-11-30    5046
2019-10-31    4994
2019-09-30    4894
2019-08-31    4781
2019-07-31    4609
Name: DT_BUSINESSDATE, dtype: int64

In [19]:
df_perf = df_mom_2[df_mom_2['DT_BUSINESSDATE'] >=min(bus_dates)]

### N-Mob perf creation WIP

In [20]:
accts_dat = pd.DataFrame(df_mom_4_snaps[['SZ_LOAN_ACCOUNT_NO','DT_BUSINESSDATE']])
accts_dat.reset_index(inplace=True,drop=True)

In [21]:
accts_dat_1  = accts_dat.rename(columns = {"DT_BUSINESSDATE": "Snapshot_dt"})

In [22]:
# accts_dat_1['end_date'] = pd.to_datetime(accts_dat_1['Snapshot_dt'], format="%Y%m") + MonthEnd(3)

###### perf  merge

In [23]:
perf_merge = pd.merge(accts_dat_1,df_mom_2, on='SZ_LOAN_ACCOUNT_NO', how='inner' )

In [25]:
perf_merge_2 = perf_merge[perf_merge['Snapshot_dt'] < perf_merge['DT_BUSINESSDATE']]

##### Getting Relative MOB and delinquency flag 

In [28]:
perf_merge_2["Relative_MOB"] =  round(((perf_merge_2["DT_BUSINESSDATE"] - perf_merge_2["Snapshot_dt"])/np.timedelta64(1, 'M')))

## Flagging x days & 30p

In [30]:
perf_merge_2['x_days_flag'] = np.where((perf_merge_2['I_DPD'] >=1) & (perf_merge_2['I_DPD'] <= 29),1,0)

In [31]:
perf_sub = perf_merge_2.loc[:,['SZ_LOAN_ACCOUNT_NO',
                               'Snapshot_dt',
                               'DT_BUSINESSDATE',
                               'I_DPD',
                               'Relative_MOB',
                               'x_days_flag']]

In [32]:
perf_sub_2 = perf_sub.copy()

## xdays ever in 3m 


In [34]:
perf_sub_2['xdays_ever_3m_flag'] = np.where((perf_sub_2['Relative_MOB'] <=3) & (perf_sub_2['x_days_flag'] ==1),1,0)


In [35]:
perf_sub_2_agg = perf_sub_2.groupby(['SZ_LOAN_ACCOUNT_NO','Snapshot_dt']).apply(lambda x: pd.Series({
                                                                              'xdays_ever_3m': x['xdays_ever_3m_flag'].max()
                                                                                    })).reset_index()

## Adding MOB and delinquency bucket to the data 

In [36]:
df_snapshot = df_mom_4_snaps.loc[:,['SZ_LOAN_ACCOUNT_NO','DT_BUSINESSDATE','FIRST_DISB_DATE','I_DPD']]

In [37]:
perf_sub_3_agg = pd.merge(perf_sub_2_agg,df_snapshot,left_on = ['SZ_LOAN_ACCOUNT_NO','Snapshot_dt'],
                         right_on = ['SZ_LOAN_ACCOUNT_NO','DT_BUSINESSDATE'],
                         how='left')

In [41]:
perf_sub_3_agg["MOB"] = round(((perf_sub_3_agg["Snapshot_dt"] - perf_sub_3_agg["FIRST_DISB_DATE"])/np.timedelta64(1, 'M')))

In [42]:
perf_sub_3_agg.loc[perf_sub_3_agg['I_DPD'] == 0, 'DEL_BUCKET'] = '1.Current'
perf_sub_3_agg.loc[(perf_sub_3_agg['I_DPD'] >=1) & (perf_sub_3_agg['I_DPD'] <=29) , 'DEL_BUCKET'] = '2.[1-29]'
perf_sub_3_agg.loc[(perf_sub_3_agg['I_DPD'] >=30) & (perf_sub_3_agg['I_DPD'] <=59) , 'DEL_BUCKET'] = '3.[30-59]'
perf_sub_3_agg.loc[(perf_sub_3_agg['I_DPD'] >=60) & (perf_sub_3_agg['I_DPD'] <=89) , 'DEL_BUCKET'] = '4.[60-89]'
perf_sub_3_agg.loc[(perf_sub_3_agg['I_DPD'] >=90)  , 'DEL_BUCKET'] = '5.[90+)'

In [45]:
perf_sub_3_agg.loc[:,['SZ_LOAN_ACCOUNT_NO',
                      'Snapshot_dt',
                     'xdays_ever_3m',
                     'MOB',
                     'DEL_BUCKET']].to_csv('xdays_ever_N3M_dv_trigMod.csv',index=False)

In [48]:
perf_sub_3_agg[(perf_sub_3_agg['MOB']>=6)&(perf_sub_3_agg['DEL_BUCKET']=='1.Current')]['Snapshot_dt'].value_counts()

2019-12-31    3533
2019-11-30    3466
2019-09-30    3408
2019-10-31    3322
2019-08-31    3198
2019-07-31    3056
Name: Snapshot_dt, dtype: int64