### Credit Assesment Classification Project

#### Imports

In [184]:
import sklearn
import pandas as pd
import numpy as np
from dateutil import relativedelta

In [185]:
train=pd.read_csv("train.csv")
test=pd.read_csv("test.csv")

  train=pd.read_csv("train.csv")


#### Getting Baseline for Classification

In [186]:
baseline_acc_train=train["Credit_Score"].value_counts()
baseline_acc_train

Credit_Score
Standard    53174
Poor        28998
Good        17828
Name: count, dtype: int64

In [187]:
total_samples = baseline_acc_train.sum()
majority_count = baseline_acc_train.max()
percentage_majority = (majority_count / total_samples) * 100

print("Baseline prediction when guessing majority class for training dataset: {:.2f}%".format(percentage_majority))

Baseline prediction when guessing majority class for training dataset: 53.17%


#### Familiarizing with the data

Fixing the occurences of _ in float/int type objects

In [188]:
train['SSN_Location'] = train['SSN'].str[:3]

In [189]:
for column in ['Annual_Income',"Age", 'Num_of_Loan', 'Num_of_Delayed_Payment', "Changed_Credit_Limit", "Outstanding_Debt", "Amount_invested_monthly","Monthly_Balance","SSN_Location"]:
    train[column] = train[column].str.replace('_', '')

for column in ['Age', 'Num_of_Loan', "SSN_Location"]:
    try:
        train[[column]] = train[[column]].astype(int)
    except:
        pass

for column in train[['Annual_Income', 'Num_of_Delayed_Payment', "Changed_Credit_Limit", "Outstanding_Debt", "Amount_invested_monthly","Monthly_Balance"]]:
    try:
        train[[column]] = train[[column]].astype(float)
    except:
        pass

Dealing with some Na values by filling them with inferred values for the same client and othertimes by changing Nas to different values

In [190]:
train['Monthly_Inhand_Salary'] = train.groupby('Customer_ID')['Monthly_Inhand_Salary'].transform(lambda x: x.fillna(x.median()))
train['Monthly_Inhand_Salary'].fillna(train['Monthly_Inhand_Salary'].median(), inplace=True)

In [191]:
train.sort_values(['Customer_ID', 'Month'], inplace=True)
train['Num_of_Delayed_Payment'].fillna(method='ffill', inplace=True)
train['Num_of_Delayed_Payment'].fillna(train['Num_of_Delayed_Payment'].median(), inplace=True)

In [192]:
train['Num_Credit_Inquiries'].fillna(method='ffill', inplace=True)
train['Num_Credit_Inquiries'].fillna(train['Num_Credit_Inquiries'].median(), inplace=True)

In [193]:
train['Type_of_Loan'].fillna('Missing_Information', inplace=True)

In [194]:
train['Amount_invested_monthly'] = train.groupby('Customer_ID')['Amount_invested_monthly'].transform(lambda x: x.fillna(x.median()))
train["Amount_invested_monthly"].fillna(train['Amount_invested_monthly'].median(), inplace=True)

In [195]:
train['Monthly_Balance'] = train.groupby('Customer_ID')['Monthly_Balance'].transform(
    lambda x: x.fillna(x.median() if x.notna().any() else np.nan)
)

train["Monthly_Balance"].fillna(train['Monthly_Balance'].median(), inplace=True)


In [196]:
train.sort_values(['Customer_ID', 'Month', 'Credit_History_Age'], inplace=True)
train['Credit_History_Age'] = train.groupby('Customer_ID')['Credit_History_Age'].transform(
    lambda x: x.fillna(x.ffill().bfill())
)

def calculate_month_difference(start_month, end_month):
    start_year = 1
    end_year = 1

    if start_month > end_month:
        end_year = 2

    months_diff = abs(start_month - end_month)
    years_diff = end_year - start_year

    return relativedelta.relativedelta(months=months_diff, years=years_diff)

for index, row in train.iterrows():
    if pd.isna(row['Credit_History_Age']):
        client_id = row['Customer_ID']
        month_num = row['Month']
        
        prev_row = train.loc[(train['Customer_ID'] == client_id) & (train['Month'] < month_num), 'Credit_History_Age'].dropna().tail(1)
        next_row = train.loc[(train['Customer_ID'] == client_id) & (train['Month'] > month_num), 'Credit_History_Age'].dropna().head(1)
        
        if not prev_row.empty and not next_row.empty:
            prev_age = pd.to_timedelta(prev_row.values[0])
            next_age = pd.to_timedelta(next_row.values[0])
            
            month_diff = calculate_month_difference(prev_age.months, next_age.months)
            true_age = prev_age + month_diff
            
            train.at[index, 'Credit_History_Age'] = str(true_age.years) + ' Years and ' + str(true_age.months) + ' Months'



train['Credit_History_Age'].fillna(train['Credit_History_Age'].mode()[0], inplace=True)

Dropping ID, Customer_ID and Name columns. Extracting first 3 values from SSN column, adding column to show the result of classification for the previous loan request made by the client.

In [197]:
train = train.sort_values(["Customer_ID", 'Month'])
train['Previous_Classification'] = ''
for index, row in train.iterrows():
    if index > 0 and train.at[index-1, "Customer_ID"] == row["Customer_ID"]:
        train.at[index, 'Previous_Classification'] = train.at[index-1, 'Credit_Score']
    else:
        train.at[index, 'Previous_Classification'] = "NCR"

train.drop(columns=["ID","Customer_ID", "Name","SSN"],inplace=True)

In [198]:
train.dtypes

Month                        object
Age                           int32
Occupation                   object
Annual_Income               float64
Monthly_Inhand_Salary       float64
Num_Bank_Accounts             int64
Num_Credit_Card               int64
Interest_Rate                 int64
Num_of_Loan                   int32
Type_of_Loan                 object
Delay_from_due_date           int64
Num_of_Delayed_Payment      float64
Changed_Credit_Limit         object
Num_Credit_Inquiries        float64
Credit_Mix                   object
Outstanding_Debt            float64
Credit_Utilization_Ratio    float64
Credit_History_Age           object
Payment_of_Min_Amount        object
Total_EMI_per_month         float64
Amount_invested_monthly     float64
Payment_Behaviour            object
Monthly_Balance             float64
Credit_Score                 object
SSN_Location                 object
Previous_Classification      object
dtype: object

In [199]:
train

Unnamed: 0,Month,Age,Occupation,Annual_Income,Monthly_Inhand_Salary,Num_Bank_Accounts,Num_Credit_Card,Interest_Rate,Num_of_Loan,Type_of_Loan,...,Credit_Utilization_Ratio,Credit_History_Age,Payment_of_Min_Amount,Total_EMI_per_month,Amount_invested_monthly,Payment_Behaviour,Monthly_Balance,Credit_Score,SSN_Location,Previous_Classification
56755,April,17,Lawyer,30625.94,2706.161667,6,5,27,2,"Credit-Builder Loan, and Home Equity Loan",...,32.843081,10 Years and 5 Months,Yes,42.941090,87.909909,!@9#%8,419.765167,Poor,913,Poor
56759,August,18,Lawyer,30625.94,2706.161667,6,5,27,2,"Credit-Builder Loan, and Home Equity Loan",...,30.077191,10 Years and 9 Months,Yes,42.941090,77.314276,High_spent_Medium_value_payments,400.360801,Poor,913,Poor
56753,February,17,Lawyer,30625.94,2706.161667,6,5,27,2,"Credit-Builder Loan, and Home Equity Loan",...,29.439759,10 Years and 3 Months,Yes,42.941090,176.132567,High_spent_Small_value_payments,311.542510,Poor,913,Standard
56752,January,17,Lawyer,30625.94,2706.161667,6,5,27,2,"Credit-Builder Loan, and Home Equity Loan",...,26.612093,10 Years and 2 Months,Yes,42.941090,244.750283,Low_spent_Large_value_payments,252.924793,Standard,913,NCR
56758,July,18,Lawyer,30625.94,2706.161667,6,5,27,2,"Credit-Builder Loan, and Home Equity Loan",...,38.149539,10 Years and 8 Months,Yes,42.941090,266.597160,Low_spent_Small_value_payments,251.077916,Poor,913,Poor
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99152,January,29,Scientist,41398.44,3749.870000,8,7,13,6,"Auto Loan, Payday Loan, Payday Loan, Mortgage ...",...,29.506673,18 Years and 2 Months,Yes,182.976649,195.529273,Low_spent_Large_value_payments,336.966433,Standard,832,NCR
99158,July,30,Scientist,41398.44,3749.870000,8,7,13,6,"Auto Loan, Payday Loan, Payday Loan, Mortgage ...",...,33.916363,18 Years and 8 Months,Yes,182.976649,257.989694,High_spent_Small_value_payments,336.966433,Good,832,Standard
99157,June,29,Scientist,41398.44,3749.870000,8,7,13,6,"Auto Loan, Payday Loan, Payday Loan, Mortgage ...",...,36.970072,18 Years and 7 Months,Yes,182.976649,47.007379,High_spent_Medium_value_payments,336.966433,Standard,832,Standard
99154,March,29,Scientist,41398.44,3749.870000,8,7,13,6,"Auto Loan, Payday Loan, Payday Loan, Mortgage ...",...,25.175964,18 Years and 4 Months,Yes,182.976649,336.130231,Low_spent_Small_value_payments,336.966433,Standard,832,Standard


Dealing with missing/incorrect values

In [200]:
train["Credit_History_Age"].value_counts()

Credit_History_Age
19 Years and 4 Months     507
15 Years and 10 Months    497
19 Years and 5 Months     490
15 Years and 11 Months    489
17 Years and 9 Months     487
                         ... 
0 Years and 3 Months       25
33 Years and 7 Months      17
0 Years and 2 Months       16
33 Years and 8 Months      13
0 Years and 1 Months        2
Name: count, Length: 404, dtype: int64

In [201]:
for val in train["Credit_History_Age"]:
    print(val)

10 Years and 5 Months
10 Years and 9 Months
10 Years and 3 Months
10 Years and 2 Months
10 Years and 8 Months
10 Years and 7 Months
10 Years and 4 Months
10 Years and 6 Months
31 Years and 0 Months
31 Years and 0 Months
30 Years and 6 Months
30 Years and 5 Months
30 Years and 11 Months
30 Years and 10 Months
30 Years and 7 Months
30 Years and 9 Months
15 Years and 6 Months
15 Years and 10 Months
15 Years and 10 Months
15 Years and 3 Months
15 Years and 9 Months
15 Years and 8 Months
15 Years and 5 Months
15 Years and 7 Months
15 Years and 6 Months
15 Years and 10 Months
15 Years and 4 Months
15 Years and 3 Months
15 Years and 9 Months
15 Years and 8 Months
15 Years and 5 Months
15 Years and 7 Months
17 Years and 6 Months
17 Years and 10 Months
17 Years and 4 Months
17 Years and 3 Months
17 Years and 9 Months
17 Years and 8 Months
17 Years and 5 Months
17 Years and 5 Months
21 Years and 1 Months
21 Years and 5 Months
21 Years and 5 Months
20 Years and 10 Months
21 Years and 4 Months
21 

In [202]:
train.isna().sum()

Month                       0
Age                         0
Occupation                  0
Annual_Income               0
Monthly_Inhand_Salary       0
Num_Bank_Accounts           0
Num_Credit_Card             0
Interest_Rate               0
Num_of_Loan                 0
Type_of_Loan                0
Delay_from_due_date         0
Num_of_Delayed_Payment      0
Changed_Credit_Limit        0
Num_Credit_Inquiries        0
Credit_Mix                  0
Outstanding_Debt            0
Credit_Utilization_Ratio    0
Credit_History_Age          0
Payment_of_Min_Amount       0
Total_EMI_per_month         0
Amount_invested_monthly     0
Payment_Behaviour           0
Monthly_Balance             0
Credit_Score                0
SSN_Location                0
Previous_Classification     0
dtype: int64