In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.templates.default = "plotly_white"

In [4]:
data = pd.read_csv("test.csv")
print(data.head())

       ID Customer_ID      Month             Name  Age          SSN  \
0  0x160a   CUS_0xd40  September    Aaron Maashoh   23  821-00-0265   
1  0x160b   CUS_0xd40    October    Aaron Maashoh   24  821-00-0265   
2  0x160c   CUS_0xd40   November    Aaron Maashoh   24  821-00-0265   
3  0x160d   CUS_0xd40   December    Aaron Maashoh  24_  821-00-0265   
4  0x1616  CUS_0x21b1  September  Rick Rothackerj   28  004-07-5839   

  Occupation Annual_Income  Monthly_Inhand_Salary  Num_Bank_Accounts  ...  \
0  Scientist      19114.12            1824.843333                  3  ...   
1  Scientist      19114.12            1824.843333                  3  ...   
2  Scientist      19114.12            1824.843333                  3  ...   
3  Scientist      19114.12                    NaN                  3  ...   
4    _______      34847.84            3037.986667                  2  ...   

   Num_Credit_Inquiries  Credit_Mix Outstanding_Debt Credit_Utilization_Ratio  \
0                2022.0      

In [5]:
print(data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27323 entries, 0 to 27322
Data columns (total 27 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   ID                        27323 non-null  object 
 1   Customer_ID               27323 non-null  object 
 2   Month                     27323 non-null  object 
 3   Name                      24557 non-null  object 
 4   Age                       27323 non-null  object 
 5   SSN                       27323 non-null  object 
 6   Occupation                27323 non-null  object 
 7   Annual_Income             27323 non-null  object 
 8   Monthly_Inhand_Salary     23197 non-null  float64
 9   Num_Bank_Accounts         27323 non-null  int64  
 10  Num_Credit_Card           27323 non-null  int64  
 11  Interest_Rate             27323 non-null  int64  
 12  Num_of_Loan               27323 non-null  object 
 13  Type_of_Loan              24231 non-null  object 
 14  Delay_

In [6]:
print(data.isnull().sum())

ID                             0
Customer_ID                    0
Month                          0
Name                        2766
Age                            0
SSN                            0
Occupation                     0
Annual_Income                  0
Monthly_Inhand_Salary       4126
Num_Bank_Accounts              0
Num_Credit_Card                0
Interest_Rate                  0
Num_of_Loan                    0
Type_of_Loan                3092
Delay_from_due_date            0
Num_of_Delayed_Payment      1949
Changed_Credit_Limit           0
Num_Credit_Inquiries         575
Credit_Mix                     0
Outstanding_Debt               0
Credit_Utilization_Ratio       0
Credit_History_Age          2423
Payment_of_Min_Amount          0
Total_EMI_per_month            1
Amount_invested_monthly     1225
Payment_Behaviour              1
Monthly_Balance              328
dtype: int64


In [8]:
data["Payment_Behaviour"].value_counts()

Low_spent_Small_value_payments      6915
High_spent_Medium_value_payments    4830
High_spent_Large_value_payments     3796
Low_spent_Medium_value_payments     3686
High_spent_Small_value_payments     3153
Low_spent_Large_value_payments      2853
!@9#%8                              2089
Name: Payment_Behaviour, dtype: int64

In [9]:
fig = px.box(data,
             x="Occupation",
             color="Payment_Behaviour",
             title="Credit Scores Based on Occupation",
             color_discrete_map={'Poor':'red',
                                 'Standard':'yellow',
                                 'Good':'green'})
fig.show()

In [10]:
fig = px.box(data,
             x="Payment_Behaviour",
             y="Annual_Income",
             color="Payment_Behaviour",
             title="Credit Scores Based on Annual Income",
             color_discrete_map={'Poor':'red',
                                 'Standard':'yellow',
                                 'Good':'green'})
fig.update_traces(quartilemethod="exclusive")
fig.show()

In [13]:
fig = px.box(data,
             x="Payment_Behaviour",
             y="Delay_from_due_date",
             color="Payment_Behaviour",
             title="Credit Scores Based on Number of Delayed Payments",
             color_discrete_map={'Poor':'red',
                                 'Standard':'yellow',
                                 'Good':'green'})
fig.update_traces(quartilemethod="exclusive")
fig.show()

In [14]:
fig = px.box(data,
             x="Payment_Behaviour",
             y="Total_EMI_per_month",
             color="Payment_Behaviour",
             title="Credit Scores Based on Total Number of EMIs per Month",
             color_discrete_map={'Poor':'red',
                                 'Standard':'yellow',
                                 'Good':'green'})
fig.update_traces(quartilemethod="exclusive")
fig.show()

In [15]:
fig = px.box(data,
             x="Payment_Behaviour",
             y="Monthly_Balance",
             color="Payment_Behaviour",
             title="Credit Scores Based on Monthly Balance Left",
             color_discrete_map={'Poor':'red',
                                 'Standard':'yellow',
                                 'Good':'green'})
fig.update_traces(quartilemethod="exclusive")
fig.show()

In [16]:
data["Credit_Mix"] = data["Credit_Mix"].map({"Standard": 1,
                               "Good": 2,
                               "Bad": 0})

In [19]:
from sklearn.model_selection import train_test_split
x = np.array(data[["Annual_Income", "Monthly_Inhand_Salary",
                   "Num_Bank_Accounts", "Num_Credit_Card",
                   "Interest_Rate", "Num_of_Loan","Type_of_Loan" ,
                   "Delay_from_due_date", "Num_of_Delayed_Payment","Changed_Credit_Limit","Num_Credit_Inquiries",
                   "Credit_Mix", "Outstanding_Debt","Credit_Utilization_Ratio",
                   "Credit_History_Age","Payment_of_Min_Amount","Total_EMI_per_month","Amount_invested_monthly","Monthly_Balance"]])
y = np.array(data[["Payment_Behaviour"]])

In [None]:
xtrain, xtest, ytrain, ytest = train_test_split(x, y,
                                                    test_size=0.33,
                                                    random_state=42)
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(xtrain, ytrain)

In [26]:
def Credit_History_Age(s):
    parts = s.split(' and ')
    years = int(parts[0].split(' ')[0])
    months = int(parts[1].split(' ')[0])
    return years * 12 + months

In [None]:
print("Credit Score Prediction : ")
a = float(input("Annual Income: "))
b = float(input("Monthly Inhand Salary: "))
c = float(input("Number of Bank Accounts: "))
d = float(input("Number of Credit cards: "))
e = float(input("Interest rate: "))
f = float(input("Number of Loans: "))
g = float(input("Average number of days delayed by the person: "))
h = float(input("Number of delayed payments: "))
i = input("Credit Mix (Bad: 0, Standard: 1, Good: 3) : ")
j = float(input("Outstanding Debt: "))
k = float(input("Credit History Age: "))
l = float(input("Monthly Balance: "))

features = np.array([[a, b, c, d, e, f, g, h, i, j, k, l]])
print("Predicted Credit Score = ", model.predict(features))