In [63]:
import pandas as pd
import numpy as np

pd.options.display.max_columns = None
pd.options.display.max_rows = 100

# Applying the PD Model

In [5]:
summary_table = pd.read_csv('../processed/05_summary_table.csv')
summary_table.head()

Unnamed: 0,Feature name,Coefficients,p_values
0,Intercept,1.483647,
1,grade_A,0.889716,1.387834e-18
2,grade_B,0.761152,1.16363e-31
3,grade_C,0.579016,8.214773e-22
4,grade_D,0.426654,1.107295e-13


In [6]:
ref_categories = ['grade:G',
'home_ownership:RENT_OTHER_NONE_ANY',
'addr_state:ND_NE_IA_NV_FL_HI_AL',
'verification_status:Verified',
'purpose:educ__sm_b__wedd__ren_en__mov__house',
'initial_list_status:f',
'term:60',
'emp_length:0',
'mths_since_issue_d:>84',
'int_rate:>20.281',
'mths_since_earliest_cr_line:<140',
'inq_last_6mths:>6',
'acc_now_delinq:0',
'annual_inc:<20K',
'dti:>35',
'mths_since_last_delinq:0-3',
'mths_since_last_record:0-2']

### Creating a Scorecard

In [7]:
df_ref_categories = pd.DataFrame(ref_categories, columns = ['Feature name'])
df_ref_categories['Coefficients'] = 0
df_ref_categories['p_values'] = np.nan
df_ref_categories

Unnamed: 0,Feature name,Coefficients,p_values
0,grade:G,0,
1,home_ownership:RENT_OTHER_NONE_ANY,0,
2,addr_state:ND_NE_IA_NV_FL_HI_AL,0,
3,verification_status:Verified,0,
4,purpose:educ__sm_b__wedd__ren_en__mov__house,0,
5,initial_list_status:f,0,
6,term:60,0,
7,emp_length:0,0,
8,mths_since_issue_d:>84,0,
9,int_rate:>20.281,0,


In [16]:
df_scorecard = pd.concat([summary_table, df_ref_categories])
df_scorecard = df_scorecard.sort_values('Feature name').reset_index()

In [30]:
def feature_name(row):
    if(row.startswith('acc_now_delinq')): return 'acc_now_delinq'
    if(row.startswith('grade')): return 'grade'
    if(row.startswith('addr_state')): return 'addr_state'
    if(row.startswith('annual_inc')): return 'annual_inc'
    if(row.startswith('dti')): return 'dti'
    if(row.startswith('emp_length')): return 'emp_length'
    if(row.startswith('home_ownership')): return 'home_ownership'
    if(row.startswith('initial_list_status')): return 'initial_list_status'
    if(row.startswith('inq_last')): return 'inq_last'
    if(row.startswith('int_rate')): return 'int_rate'
    if(row.startswith('mths_since_earliest_cr_line')): return 'mths_since_earliest_cr_line'
    if(row.startswith('mths_since_last_record')): return 'mths_since_last_record'
    if(row.startswith('purpose')): return 'purpose'
    if(row.startswith('term')): return 'term'
    if(row.startswith('verification_status')): return 'verification_status'
    if(row.startswith('mths_since_issue_d')): return 'mths_since_issue_d'
    if(row.startswith('mths_since_last_delinq')): return 'mths_since_last_delinq'
    return row

In [31]:
df_scorecard['Original feature name'] = df_scorecard['Feature name'].apply(feature_name)
df_scorecard['Original feature name'].unique()

array(['Intercept', 'acc_now_delinq', 'addr_state', 'annual_inc', 'dti',
       'emp_length', 'grade', 'home_ownership', 'initial_list_status',
       'inq_last', 'int_rate', 'mths_since_earliest_cr_line',
       'mths_since_issue_d', 'mths_since_last_delinq',
       'mths_since_last_record', 'purpose', 'term', 'verification_status'],
      dtype=object)

In [32]:
min_score = 300
max_score = 850

In [33]:
df_scorecard.groupby('Original feature name')['Coefficients'].min()

Original feature name
Intercept                      1.483647
acc_now_delinq                 0.000000
addr_state                     0.000000
annual_inc                    -0.032055
dti                            0.000000
emp_length                     0.000000
grade                          0.000000
home_ownership                 0.000000
initial_list_status            0.000000
inq_last                      -1.286869
int_rate                       0.000000
mths_since_earliest_cr_line    0.000000
mths_since_issue_d            -0.308981
mths_since_last_delinq         0.000000
mths_since_last_record        -0.492287
purpose                        0.000000
term                           0.000000
verification_status           -0.001729
Name: Coefficients, dtype: float64

In [34]:
min_sum_coef = df_scorecard.groupby('Original feature name')['Coefficients'].min().sum()
min_sum_coef

-0.6382741431187717

In [35]:
df_scorecard.groupby('Original feature name')['Coefficients'].max()

Original feature name
Intercept                      1.483647
acc_now_delinq                 0.148139
addr_state                     0.518472
annual_inc                     0.633294
dti                            0.400541
emp_length                     0.139248
grade                          0.889716
home_ownership                 0.098247
initial_list_status            0.076700
inq_last                       0.000000
int_rate                       1.090135
mths_since_earliest_cr_line    0.000000
mths_since_issue_d             1.011105
mths_since_last_delinq         0.145282
mths_since_last_record         0.000000
purpose                        0.234024
term                           0.066079
verification_status            0.107811
Name: Coefficients, dtype: float64

In [36]:
max_sum_coef = df_scorecard.groupby('Original feature name')['Coefficients'].max().sum()
max_sum_coef

7.042439238384943

In [37]:
df_scorecard['Score - Calculation'] = df_scorecard['Coefficients'] * (max_score - min_score) / (max_sum_coef - min_sum_coef)
df_scorecard

Unnamed: 0,index,Feature name,Coefficients,p_values,Original feature name,Score - Calculation
0,0,Intercept,1.483647,,Intercept,106.240904
1,12,acc_now_delinq:0,0.000000,,acc_now_delinq,0.000000
2,49,acc_now_delinq:>=1,0.148139,1.996905e-01,acc_now_delinq,10.607919
3,2,addr_state:ND_NE_IA_NV_FL_HI_AL,0.000000,,addr_state,0.000000
4,14,addr_state_AR_MI_PA_OH_MN,0.133445,2.489827e-08,addr_state,9.555709
...,...,...,...,...,...,...
92,6,term:60,0.000000,,term,0.000000
93,29,term_int_36,0.066079,9.222822e-06,term,4.731760
94,3,verification_status:Verified,0.000000,,verification_status,0.000000
95,22,verification_status_Not Verified,0.107811,2.499151e-12,verification_status,7.720117


In [46]:
df_scorecard.loc[0,'Score - Calculation'] = ((df_scorecard['Coefficients'][0] - min_sum_coef) / (max_sum_coef - min_sum_coef)) * (max_score - min_score) + min_score

In [47]:
df_scorecard['Score - Preliminary'] = df_scorecard['Score - Calculation'].round()

In [48]:
min_sum_score_prel = df_scorecard.groupby('Original feature name')['Score - Preliminary'].min().sum()
min_sum_score_prel

301.0

In [50]:
max_sum_score_prel = df_scorecard.groupby('Original feature name')['Score - Preliminary'].max().sum()
max_sum_score_prel

850.0

In [72]:
df_scorecard['Score - Final'] = df_scorecard['Score - Preliminary']
df_scorecard.loc[0,'Score - Final'] = 451

In [77]:
df_scorecard.loc[46,'Score - Final'] = 65

In [78]:
df_scorecard.groupby('Original feature name')['Score - Final'].min().sum(), df_scorecard.groupby('Original feature name')['Score - Final'].max().sum()

(300.0, 850.0)