<a href="https://colab.research.google.com/github/S-Ahsan-Haider/Case-Studies/blob/main/Credit_Risk_Assesment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Loading Data and Libraries

In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
import numpy as np
import pandas as pd
import scipy.stats as s
import io
import matplotlib.pyplot as plt
import seaborn as sns
data = pd.read_csv(io.BytesIO(uploaded['Credit_score.csv']))
pd.set_option('display.max_columns', 50)

## Data Structure

In [None]:
# Creating a workable copy

df = data.copy()

In [None]:
df.shape

(100000, 30)

In [None]:
df.head(3)

Unnamed: 0,ID,Customer_ID,Month,Name,Age,SSN,Occupation,Annual_Income,Monthly_Inhand_Salary,Num_Bank_Accounts,Num_Credit_Card,Interest_Rate,Num_of_Loan,Type_of_Loan,Delay_from_due_date,Num_of_Delayed_Payment,Changed_Credit_Limit,Num_Credit_Inquiries,Credit_Mix,Outstanding_Debt,Credit_Utilization_Ratio,Credit_History_Age,Payment_of_Min_Amount,Total_EMI_per_month,Amount_invested_monthly,Payment_Behaviour,Monthly_Balance,Unnamed: 27,Unnamed: 28,Unnamed: 29
0,0x1602,CUS_0xd40,January,Aaron Maashoh,23,821-00-0265,Scientist,19114.12,1824.843333,3,4,3,4,"Auto Loan, Credit-Builder Loan, Personal Loan,...",3,7.0,11.27,4.0,_,809.98,26.82262,22 Years and 1 Months,No,49.574949,80.41529544,High_spent_Small_value_payments,312.4940887,,,..
1,0x1603,CUS_0xd40,February,Aaron Maashoh,23,821-00-0265,Scientist,19114.12,,3,4,3,4,"Auto Loan, Credit-Builder Loan, Personal Loan,...",-1,,11.27,4.0,Good,809.98,31.94496,,No,49.574949,118.2802216,Low_spent_Large_value_payments,284.6291625,,,
2,0x1604,CUS_0xd40,March,Aaron Maashoh,-500,821-00-0265,Scientist,19114.12,,3,4,3,4,"Auto Loan, Credit-Builder Loan, Personal Loan,...",3,7.0,_,4.0,Good,809.98,28.609352,22 Years and 3 Months,No,49.574949,81.69952126,Low_spent_Medium_value_payments,331.2098629,,,


In [None]:
df.columns

Index(['ID', 'Customer_ID', 'Month', 'Name', 'Age', 'SSN', 'Occupation',
       'Annual_Income', 'Monthly_Inhand_Salary', 'Num_Bank_Accounts',
       'Num_Credit_Card', 'Interest_Rate', 'Num_of_Loan', 'Type_of_Loan',
       'Delay_from_due_date', 'Num_of_Delayed_Payment', 'Changed_Credit_Limit',
       'Num_Credit_Inquiries', 'Credit_Mix', 'Outstanding_Debt',
       'Credit_Utilization_Ratio', 'Credit_History_Age',
       'Payment_of_Min_Amount', 'Total_EMI_per_month',
       'Amount_invested_monthly', 'Payment_Behaviour', 'Monthly_Balance',
       'Unnamed: 27', 'Unnamed: 28', 'Unnamed: 29'],
      dtype='object')

In [None]:
df.describe()

Unnamed: 0,Monthly_Inhand_Salary,Num_Bank_Accounts,Num_Credit_Card,Interest_Rate,Delay_from_due_date,Num_Credit_Inquiries,Credit_Utilization_Ratio,Total_EMI_per_month,Unnamed: 27,Unnamed: 28
count,84998.0,100000.0,100000.0,100000.0,100000.0,98035.0,100000.0,100000.0,0.0,0.0
mean,4194.17085,17.09128,22.47443,72.46604,21.06878,27.754251,32.285173,1403.118217,,
std,3183.686167,117.404834,129.05741,466.422621,14.860104,193.177339,5.116875,8306.04127,,
min,303.645417,-1.0,0.0,1.0,-5.0,0.0,20.0,0.0,,
25%,1625.568229,3.0,4.0,8.0,10.0,3.0,28.052567,30.30666,,
50%,3093.745,6.0,5.0,13.0,18.0,6.0,32.305784,69.249473,,
75%,5957.448333,7.0,7.0,20.0,28.0,9.0,36.496663,161.224249,,
max,15204.63333,1798.0,1499.0,5797.0,67.0,2597.0,50.0,82331.0,,


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 30 columns):
 #   Column                    Non-Null Count   Dtype  
---  ------                    --------------   -----  
 0   ID                        100000 non-null  object 
 1   Customer_ID               100000 non-null  object 
 2   Month                     100000 non-null  object 
 3   Name                      90015 non-null   object 
 4   Age                       100000 non-null  object 
 5   SSN                       100000 non-null  object 
 6   Occupation                100000 non-null  object 
 7   Annual_Income             100000 non-null  object 
 8   Monthly_Inhand_Salary     84998 non-null   float64
 9   Num_Bank_Accounts         100000 non-null  int64  
 10  Num_Credit_Card           100000 non-null  int64  
 11  Interest_Rate             100000 non-null  int64  
 12  Num_of_Loan               100000 non-null  object 
 13  Type_of_Loan              88592 non-null   ob

In [None]:
# The Null values are too many. We will keep them as it is now and create a new
# dataframe based on customers. If there are any Null values orr Duplicated values there,
# They will be then removed

## Feature Engineering and Data Cleaning

In [None]:
# Extracting Months and Year Columns for later use

df['C_History_Years'] = df['Credit_History_Age'].str.extract(r'(\d+) Years').fillna(0).astype(int)
df['C_History_Months'] = df['Credit_History_Age'].str.extract(r'(\d+) Months').fillna(0).astype(int)

In [None]:
# Dropping Useless Columns

df.drop(['ID','Name','Credit_History_Age'], axis = 1, inplace= True)

In [None]:
# Correcting Datatypes

Columns = ['Age','Annual_Income','Num_of_Loan','Num_of_Delayed_Payment','Changed_Credit_Limit','Outstanding_Debt','Amount_invested_monthly']

for c in Columns:
  df[c] = df[c].str.replace('_', '')
  df[c] = df[c].str.replace(' ', '')
  df[c] = pd.to_numeric(df[c])

df['Monthly_Balance'] = pd.to_numeric(df['Monthly_Balance'], errors='coerce')

In [None]:
# Correcting the erroneos valus in the columns


# Replacing faulty values with "Unknown"
df['Credit_Mix'] = df['Credit_Mix'].replace('_', 'Unknown')


# Correcting the Negative Values
for i in ['Age','Annual_Income','Monthly_Inhand_Salary','Num_Bank_Accounts',
          'Num_Credit_Card','Interest_Rate','Num_of_Loan','Delay_from_due_date',
          'Num_of_Delayed_Payment','Changed_Credit_Limit','Num_Credit_Inquiries',
          'Outstanding_Debt','Credit_Utilization_Ratio','Total_EMI_per_month',
          'Amount_invested_monthly','Monthly_Balance']:
  df[i] = df[i].apply(lambda x: -x if x < 0 else x)


# Correcting the unnatuaral high values

col = 'Age'
df[col] = df[col].clip(upper=100)

col = 'Num_Bank_Accounts'
df[col] = df[col].clip(upper=15)

col = 'Num_Credit_Card'
df[col] = df[col].clip(upper=30)

col = 'Interest_Rate'
df[col] = df[col].apply(lambda x: x / 100 if x > 99 else x)

col = 'Num_of_Loan'
df[col] = df[col].clip(upper=200)

col = 'Num_of_Delayed_Payment'  # Possible if there are multiple small small credits with low EMIs

col = 'Num_Credit_Inquiries'
df[col] = df[col].clip(upper=150)

col = 'Total_EMI_per_month '    # Possible as the max is < 10 times of mean. Can happen due to multiple loans

In [None]:
# Checking the values

df.describe()

## Customer dataset

In [None]:
# Creating a customer centric dataframe using customer based aggregation

cust = df.groupby('Customer_ID').agg({
    'Annual_Income': lambda x: x.mode().iloc[0],
    'Age': lambda x: x.mode().iloc[0],
    'Occupation': lambda x: x.mode().iloc[0],
    'Monthly_Inhand_Salary': lambda x: x.mode().iloc[0],
    'Num_Bank_Accounts': lambda x: x.mode().iloc[0],
    'Num_Credit_Card': lambda x: x.mode().iloc[0],
    'Interest_Rate': 'mean',
    'Num_of_Loan': lambda x: x.mode().iloc[0],
    # 'Type_of_Loan': ###,
    'Delay_from_due_date': 'mean',
    'Num_of_Delayed_Payment': 'mean',
    'Changed_Credit_Limit': 'mean',
    'Num_Credit_Inquiries': lambda x: x.mode().iloc[0],
    'Credit_Mix': lambda x: x.mode().iloc[0],
    'Outstanding_Debt': lambda x: x.mode().iloc[0],
    'Credit_Utilization_Ratio': 'mean',
    'C_History_Years': lambda x: x.mode().iloc[0],
    'Payment_of_Min_Amount': lambda x: x.mode().iloc[0],
    'Total_EMI_per_month': lambda x: x.mode().iloc[0],
    'Amount_invested_monthly': ['sum', 'mean'],
    # 'Payment_Behaviour':,
    'Monthly_Balance': 'mean'
    })

cust.head(5)

In [None]:
# Creating a workable copy

c = cust.copy()
c.shape

In [None]:
# Renaming Columns

c.columns = ['Annual_Income',
'Age',
'Occupation',
'Monthly_Inhand_Salary',
'Num_Bank_Accounts',
'Num_Credit_Card',
'Avg_Interest_Rate',
'Num_of_Loan',
'Avg_Delay_from_due_date',
'Avg_Num_of_Delayed_Payment',
'Avg_Changed_Credit_Limit',
'Num_Credit_Inquiries',
'Credit_Mix',
'Outstanding_Debt',
'Avg_Credit_Utilization_Ratio',
'C_History_Years',
'Payment_of_Min_Amount',
'Total_EMI_per_month',
'Total_Amount_invested_monthly',
'Avg_Amount_invested_monthly',
'Avg_Monthly_Balance'
]
c.describe()

In [None]:
# Creating Bins for Numerical Values for easy use

bins = [0, 3000, 6000, 9000, 12000, 15000, np.inf]
c['Salary_Bin'] = pd.cut(c['Monthly_Inhand_Salary'], bins=bins, labels=['0-2999', '3000-5999', '6000-8999', '9000-11999', '12000-14999', '15000+'])

bins = [0, 10, 20, 30, np.inf]
c['History_Bin'] = pd.cut(c['C_History_Years'], bins=bins, labels=['0-10', '10-20', '20-30', '30+'])

bins = [0, 1000, 2000, 3000, 4000, 5000]
c['Debt_Bin'] = pd.cut(c['Outstanding_Debt'], bins=bins, labels=['0-1000', '1000-2000', '2000-3000', '3000-4000', '4000-5000'])

bins = [0, 200, 400, 600, 800, 1000, np.inf]
c['Tardiness_Bin'] = pd.cut(c['Avg_Num_of_Delayed_Payment'], bins=bins, labels=['0-200 Days', '200-400 Days', '400-600 Days', '600-800 Days', '800-1000 Days', '1000+ Days'])

bins = [0, 20, 40, 60, 80, np.inf]
c['Age_Bin'] = pd.cut(c['Outstanding_Debt'], bins=bins, labels=['Below 20 Years', '20-40 Years', '40-60 Years', '60-80 Years', '80 Years and Above'])

In [None]:
# Creating a Numerical Column For Credit mix for ease of use

mapping = {'Standard': 2, 'Good': 3, 'Bad': 1, 'Unknown': 1}
c['Credit_Mix_Rating'] = c['Credit_Mix'].map(mapping)

In [None]:
# dropping Faulty 'Occupation' from the rows

c = c[c['Occupation'] != '_______']

In [None]:
# Checking for Null values

((c.isnull().sum()/len(df))*100).sort_values(ascending=False)

In [None]:
# Dropping Null Values

c = c.dropna()

In [None]:
# Checking for duplicates

c.duplicated().sum()

0

## Data Visualisation

In [None]:
# Numerical Correlations

plt.figure(figsize=(20, 20))
n_c = c.select_dtypes(include=np.number)
c_matrix = n_c.corr()
sns.heatmap(c_matrix, annot=True, cmap='coolwarm')
plt.show()

In [None]:

# Age Distribution

sns.kdeplot(data = c['Age'], shade=True)
plt.xlabel('Age')
plt.ylabel('Density')
plt.title('Age Distribution of Customers')
plt.show()

In [None]:
# Monthly salary distributions

sns.kdeplot(data = c['Monthly_Inhand_Salary'], shade=True)
plt.xlabel('Monthly Inhand Salary')
plt.ylabel('Density')
plt.title('Monthly Inhand Salary Distribution of Customers')
plt.show()

In [None]:
# Bank accounts per customer distribution

sns.histplot(data=c, x='Num_Bank_Accounts', kde=False, color='red')
plt.xlabel('No. of Bank Accounts')
plt.ylabel('Count of Bank Accounts')
plt.title('No. of bank Accounts by a customer')
plt.grid(True)
plt.show()

In [None]:

# Loan Distributions across customers

sns.histplot(data=c, x='Num_of_Loan', kde=False, color='green')
plt.xlabel('No. of loans')
plt.ylabel('Count of loans')
plt.title('No. of loans by a customer')
plt.grid(True)
plt.show()

In [None]:
# Interest Rates Distributions

sns.kdeplot(data = c['Avg_Interest_Rate'], shade=True)
plt.xlabel('Interest Rates')
plt.ylabel('Density')
plt.title('Interest Rates Distribution of Customers')
plt.show()

In [None]:
# Occupation distribution of Customers

print(c['Occupation'].value_counts().nlargest(50))

In [None]:
# Relationship of Occupations and no. of Bank Accounts

crosstab = pd.crosstab(c['Occupation'], c['Num_Bank_Accounts'])
sns.heatmap(crosstab, annot=True, fmt='d', cmap='YlGnBu')
plt.xlabel('Number of Bank Accounts')
plt.ylabel('Occupation')
plt.title('Occupation vs. Number of Bank Accounts')
plt.show()

In [None]:
# Relationship of Occupations and earned

crosstab = pd.crosstab(c['Occupation'], c['Salary_Bin'], normalize='index')
sns.heatmap(crosstab, annot=True, fmt='.2%', cmap='YlGnBu')
plt.xlabel('Salary slab')
plt.ylabel('Occupation')
plt.title('Occupation vs. Salaries of the Customers')
plt.show()

In [None]:
# Relationship of Customer's age and their Paying of EMIs on time

crosstab = pd.crosstab(c['Age_Bin'], c['Tardiness_Bin'], normalize='index')
sns.heatmap(crosstab, annot=True, fmt='.2%', cmap='YlGnBu')
plt.xlabel('Extra Time taken To return Loan')
plt.ylabel('Age Range')
plt.title('Customer Age vs. Tardiness Behaviour of Customers')
plt.show()

In [None]:
# Relationship between Salaries and Debt of Customers

crosstab = pd.crosstab(c['Salary_Bin'], c['Debt_Bin'], normalize='index')
sns.heatmap(crosstab, annot=True, fmt='.2%', cmap='YlGnBu')
plt.xlabel('Debt')
plt.ylabel('Salaries Range')
plt.title('Salary vs. Debt Behaviour of Customers')
plt.show()

In [None]:
# Relationship of Customer History and their Amount of Debts

crosstab = pd.crosstab(c['History_Bin'], c['Debt_Bin'], normalize='index')
sns.heatmap(crosstab, annot=True, fmt='.2%', cmap='YlGnBu')
plt.xlabel('Debt')
plt.ylabel('Customer History with Bank (years)')
plt.title('Customer History vs. Debt Behaviour of Customers')
plt.show()

In [None]:
# How does Customer Credit mixture change with their time with the bank

crosstab = pd.crosstab(c['History_Bin'], c['Credit_Mix'], normalize='index')
sns.heatmap(crosstab, annot=True, fmt='.2%', cmap='YlGnBu')
plt.xlabel('Credit Mix')
plt.ylabel('Customer History with Bank (years)')
plt.title('Customer History vs. Credit Mixture')
plt.show()

In [None]:
c['Credit_Mix'].value_counts()

In [None]:
def pearson(x,y,df=data):
  print()
  print(f"Pearson Correlation Coefficient between {x} and {y}:")
  print(df[x].corr(df[y],method='pearson'))
  return ""


def spearman(x,y,df=data):
  print()
  print(f"Spearman Correlation Coefficient between {x} and {y}:")
  print(df[x].corr(df[y],method='spearman'))
  return ""

In [None]:
# Checking Correlations

# Salary and Investment
spearman('Avg_Amount_invested_monthly','Monthly_Inhand_Salary',c)

# Credit Utilisation and Outstanding Debt
spearman('Avg_Credit_Utilization_Ratio','Outstanding_Debt',c)


## Credit Number Calculations

In [None]:
# Scaling Formula for the Data for ease of use

def scaled(c, df=cs):
  min_val = df[c].min()
  max_val = df[c].max()
  df[c] = (df[c] - min_val) / (max_val - min_val) * 100

In [None]:
# Selecting only relevant columns from the Customer dataset (c)

cs = c.copy()      # Creating a workable copy of the dataset - cs dataframe

useful = ['Annual_Income','Monthly_Inhand_Salary','Num_of_Loan',
          'Avg_Num_of_Delayed_Payment','Num_Credit_Inquiries',
          'Credit_Mix_Rating','Outstanding_Debt','Avg_Credit_Utilization_Ratio',
          'C_History_Years','Avg_Monthly_Balance' ]

cs = cs[useful]

# Scaling all columns of the 'cs' Datafetame

for col in useful:
    scaled(col, cs)

print(c.shape)
c.head(2)

In [None]:
# Creating the best possible approximation of the FICO Score (using weights given in myFICO.com)

cs['FICO_Rating'] = 0.35 * (100 - cs['Avg_Num_of_Delayed_Payment']) + \
                    0.1 * (100 - cs['Num_Credit_Inquiries']) + \
                    0.1 * cs['Credit_Mix_Rating'] + \
                    0.3 * (100 - cs['Outstanding_Debt']) + \
                    0.15 * cs['C_History_Years']
cs['FICO_Rating'] = ((cs['FICO_Rating'] / 100) * (850 - 300)) + 300
cs.describe()

Creating New Credit Scores using Linear Regression Models

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [None]:
# Rating categoriser

def score_categ(x, df=cs):
  x_c = x + '_category'
  bins = [-np.inf, 580, 670, 740, 800, np.inf]
  df[x_c] = pd.cut(df[x], bins=bins, labels=['Poor', 'Fair', 'Good', 'Very Good','Exceptional'])

'''
< 580 = Poor
580-669 = Fair
670-739 = Good
740-799 = Very Good
800+ = Exceptional

Source - myFICO.com
'''

# Rating Accuray

def rating_acc(x,y,df=cs):
  x_a = x + '_accuracy'
  df[x_a] = df[x] == df[y]
  df[x_a] = df[x_a].astype(str).replace({'True': 'Match', 'False': 'Mismatch'})


score_categ('FICO_Rating', cs)
cs.head()

Rating - 1 Not Considering the Loans in Credit calculations and treating them separately. Also taking the records monthwise.

**Factors that affect the ratings**

*   **Monthly_Inhand_Salary**: What the Customer is earning per month affects the ability to pay credit
*   **Avg_Num_of_Delayed_Payment**: Tardiness shows how serious or capable the customer is in paying back
*   **Num_Credit_Inquiries**: Higher no. of credit inquiries can give an understanding of the person's loan/ credit tendencies
*   **Credit_Mix_Rating**: Good Credit mix can show that the person is not struggling and can buy different things at different stages of their lives
*   **Outstanding_Debt**: Outstanding debt gives an idea about how much credit load the customer already has that can affect his credit paying back ability
*   **Avg_Credit_Utilization_Ratio**: This ratio gives an idea about the desperation the customer has regarding finances
*   **C_History_Years**: Longer History of credit gives an idea that the customer has experience in this field and has a good track record to still get a loan from the company


In [None]:
# Picking Relevant Columns and doing Linear regression

X = cs[['Monthly_Inhand_Salary',
        'Avg_Num_of_Delayed_Payment',
        'Num_Credit_Inquiries',
        'Credit_Mix_Rating',
        'Outstanding_Debt',
        'Avg_Credit_Utilization_Ratio',
        'C_History_Years']]

y = cs['FICO_Rating']

model = LinearRegression()
model.fit(X, y)

weights = model.coef_


# Multiplying the Columns with Weights from Linear Regression to create new Ratings

cs['r1'] = cs[['Monthly_Inhand_Salary',
        'Avg_Num_of_Delayed_Payment',
        'Num_Credit_Inquiries',
        'Credit_Mix_Rating',
        'Outstanding_Debt',
        'Avg_Credit_Utilization_Ratio',
        'C_History_Years']].mul(weights, axis=1).sum(axis=1)

# Calculating and adding biases to the newly created Ratings

bias = (cs['FICO_Rating'] - cs['r1']).mean()
cs['r1'] = cs['r1'] + bias

score_categ('r1', cs)
rating_acc('r1_category','FICO_Rating_category', cs)

In [None]:
# Comparing Matches in Predictions with FiCO ratings

print(cs['r1_category_accuracy'].value_counts())

# Visualising the ratings

sns.kdeplot(data = cs[['r1','FICO_Rating']], shade=True)
plt.xlabel('Ratings')
plt.ylabel('Density')
plt.title('Ratings Comparison')
plt.show()

Rating - 2: Not considering the Tardiness or late Payments of the customers and only considering their financial status and credit behaviour

**Factors that affect the ratings**

*   **Annual_Income**: Income of the customer affects the loan repayment ability (annual income smmothen outs the monthly fluctuations)
*   **Num_Credit_Inquiries**: Higher no. of credit inquiries can give an understanding of the person's loan/ credit tendencies
*   **Credit_Mix_Rating**: Good Credit mix can show that the person is not struggling and can buy different things at different stages of their lives
*   **Outstanding_Debt**: Outstanding debt gives an idea about how much credit load the customer already has that can affect his credit paying back ability
*   **Avg_Credit_Utilization_Ratio**: This ratio gives an idea about the desperation the customer has regarding finances
*   **C_History_Years**: Longer History of credit gives an idea that the customer has experience in this field and has a good track record to still get a loan from the company
*   **Num_of_Loan**: Larger no. of loans decreases the customers ability to pay credits due to the financial strains
*   **Avg_Monthly_Balance**: Cash in bank after month end tells about the financial well being of the customers

In [None]:
# Picking Relevant Columns and doing Linear regression

X = cs[['Annual_Income',
        'Num_of_Loan',
        'Num_Credit_Inquiries',
        'Credit_Mix_Rating',
        'Outstanding_Debt',
        'Avg_Credit_Utilization_Ratio',
        'C_History_Years',
        'Avg_Monthly_Balance']]

y = cs['FICO_Rating']

model = LinearRegression()
model.fit(X, y)

weights = model.coef_


# Multiplying the Columns with Weights from Linear Regression to create new Ratings

cs['r2'] = cs[['Annual_Income',
        'Num_of_Loan',
        'Num_Credit_Inquiries',
        'Credit_Mix_Rating',
        'Outstanding_Debt',
        'Avg_Credit_Utilization_Ratio',
        'C_History_Years',
        'Avg_Monthly_Balance']].mul(weights, axis=1).sum(axis=1)

# Calculating and adding biases to the newly created Ratings

bias = (cs['FICO_Rating'] - cs['r2']).mean()
cs['r2'] = cs['r2'] + bias

score_categ('r2', cs)
rating_acc('r2_category','FICO_Rating_category', cs)

In [None]:
# Comparing Matches in Predictions with FiCO ratings

print(cs['r2_category_accuracy'].value_counts())

# Visualising the ratings

sns.kdeplot(data = cs[['r2','FICO_Rating']], shade=True)
plt.xlabel('Ratings')
plt.ylabel('Density')
plt.title('Ratings Comparison')
plt.show()

Rating - 3: Not considering the Salaries, Customer History or Monthly Balance of the customers. It doesn't rely on customer income source, but their behaviour of loan/credit repayments

**Factors that affect the ratings**


*   **Num_Credit_Inquiries**: Higher no. of credit inquiries can give an understanding of the person's loan/ credit tendencies
*   **Credit_Mix_Rating**: Good Credit mix can show that the person is not struggling and can buy different things at different stages of their lives
*   **Outstanding_Debt**: Outstanding debt gives an idea about how much credit load the customer already has that can affect his credit paying back ability
*   **Avg_Credit_Utilization_Ratio**: This ratio gives an idea about the desperation the customer has regarding finances
*   **Num_of_Loan**: Larger no. of loans decreases the customers ability to pay credits due to the financial strains
*   **Avg_Num_of_Delayed_Payment**: Tells about how serious the customer takes his loan/credits as




In [None]:
# Picking Relevant Columns and doing Linear regression

X = cs[['Num_of_Loan',
        'Avg_Num_of_Delayed_Payment',
        'Num_Credit_Inquiries',
        'Credit_Mix_Rating',
        'Outstanding_Debt',
        'Avg_Credit_Utilization_Ratio']]

y = cs['FICO_Rating']

model = LinearRegression()
model.fit(X, y)

weights = model.coef_


# Multiplying the Columns with Weights from Linear Regression to create new Ratings

cs['r3'] = cs[['Num_of_Loan',
        'Avg_Num_of_Delayed_Payment',
        'Num_Credit_Inquiries',
        'Credit_Mix_Rating',
        'Outstanding_Debt',
        'Avg_Credit_Utilization_Ratio']].mul(weights, axis=1).sum(axis=1)

# Calculating and adding biases to the newly created Ratings

bias = (cs['FICO_Rating'] - cs['r3']).mean()
cs['r3'] = cs['r3'] + bias

score_categ('r3', cs)
rating_acc('r3_category','FICO_Rating_category', cs)

In [None]:
# Comparing Matches in Predictions with FiCO ratings

print(cs['r3_category_accuracy'].value_counts())

# Visualising the ratings

sns.kdeplot(data = cs[['r3','FICO_Rating']], shade=True)
plt.xlabel('Ratings')
plt.ylabel('Density')
plt.title('Ratings Comparison')
plt.show()

In [None]:
cs[['FICO_Rating','FICO_Rating_category',
    'r1','r1_category','r1_category_accuracy',
    'r2','r2_category','r2_category_accuracy',
    'r3','r3_category','r3_category_accuracy']].head(10)