<a href="https://colab.research.google.com/github/Muhammadridho100902/google_collab/blob/main/Credit_Scoring_%26_Segmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
pio.templates.default = "plotly_white"

In [2]:
data = pd.read_csv("credit_scoring.csv")
data.head()

Unnamed: 0,Age,Gender,Marital Status,Education Level,Employment Status,Credit Utilization Ratio,Payment History,Number of Credit Accounts,Loan Amount,Interest Rate,Loan Term,Type of Loan
0,60,Male,Married,Master,Employed,0.22,2685.0,2,4675000,2.65,48,Personal Loan
1,25,Male,Married,High School,Unemployed,0.2,2371.0,9,3619000,5.19,60,Auto Loan
2,30,Female,Single,Master,Employed,0.22,2771.0,6,957000,2.76,12,Auto Loan
3,58,Female,Married,PhD,Unemployed,0.12,1371.0,2,4731000,6.57,60,Auto Loan
4,32,Male,Married,Bachelor,Self-Employed,0.99,828.0,2,3289000,6.28,36,Personal Loan


In [3]:
credit_utilization_fig = px.box(data, y='Credit Utilization Ratio',
                                title='Credit Utilization Ratio Distribution')
credit_utilization_fig.show()

In [4]:
loan_amount_fig = px.histogram(data, x='Loan Amount', title='Loan Amount Distribution', nbins=20)
loan_amount_fig.show()

In [5]:
numeric_df = data[['Credit Utilization Ratio',
                   'Payment History',
                   'Number of Credit Accounts',
                   'Loan Amount', 'Interest Rate',
                   'Loan Term']]

correlation_fig = px.imshow(numeric_df.corr(), title='Correlation Heatmap')
correlation_fig.show

<bound method BaseFigure.show of Figure({
    'data': [{'coloraxis': 'coloraxis',
              'hovertemplate': 'x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>',
              'name': '0',
              'type': 'heatmap',
              'x': array(['Credit Utilization Ratio', 'Payment History',
                          'Number of Credit Accounts', 'Loan Amount', 'Interest Rate',
                          'Loan Term'], dtype=object),
              'xaxis': 'x',
              'y': array(['Credit Utilization Ratio', 'Payment History',
                          'Number of Credit Accounts', 'Loan Amount', 'Interest Rate',
                          'Loan Term'], dtype=object),
              'yaxis': 'y',
              'z': array([[ 1.        ,  0.0021142 , -0.00184174, -0.01491844,  0.04823694,
                           -0.02292438],
                          [ 0.0021142 ,  1.        ,  0.02502234, -0.01908377, -0.01761825,
                           -0.04553197],
                       

# Calculating Credit Scores

In [6]:
education_level_mapping = {'High School' : 0, 'Bachelor' : 1, 'Master' : 2, 'PhD' : 3}
employment_status_mapping = {'Unemployed' : 0, 'Employed': 1, 'Self-Employed' : 2}

data['Education Level'] = data['Education Level'].map(education_level_mapping)
data['Employment Status'] = data['Employment Status'].map(employment_status_mapping)

# Create Universal Variable we can access
credit_scores = []

for index, row in data.iterrows():
  payment_history = row['Payment History']
  credit_utilization_ratio = row['Credit Utilization Ratio']
  number_of_credit_accounts = row['Number of Credit Accounts']
  education_level = row['Education Level']
  employment_status = row['Employment Status']

  credit_score = (payment_history * 0.35) + (credit_utilization_ratio * 0.30) + (number_of_credit_accounts * 0.15) + (education_level * 0.10) + (employment_status * 0.10)
  credit_scores.append(credit_score)

In [7]:
data['Credit Score'] = credit_scores
data.head()

Unnamed: 0,Age,Gender,Marital Status,Education Level,Employment Status,Credit Utilization Ratio,Payment History,Number of Credit Accounts,Loan Amount,Interest Rate,Loan Term,Type of Loan,Credit Score
0,60,Male,Married,2,1,0.22,2685.0,2,4675000,2.65,48,Personal Loan,940.416
1,25,Male,Married,0,0,0.2,2371.0,9,3619000,5.19,60,Auto Loan,831.26
2,30,Female,Single,2,1,0.22,2771.0,6,957000,2.76,12,Auto Loan,971.116
3,58,Female,Married,3,0,0.12,1371.0,2,4731000,6.57,60,Auto Loan,480.486
4,32,Male,Married,1,2,0.99,828.0,2,3289000,6.28,36,Personal Loan,290.697


# Model

In [12]:
from sklearn.cluster import KMeans

x = data[['Credit Score']]
kmeans = KMeans(n_clusters=4, n_init=10, random_state=42)
kmeans.fit(x)
data['Segment'] = kmeans.labels_

In [14]:
data['Segment'] = data['Segment'].map({2: 'Very Low',
                                       0: 'Low',
                                       1: 'Good',
                                       3: "Excellent"})

data['Segment'] = data['Segment'].astype('category')

fig = px.scatter(data, x=data.index, y='Credit Score', color='Segment',
                 color_discrete_sequence=['green', 'blue', 'yellow', 'red'])

fig.update_layout(
    xaxis_title= 'Customer Index',
    yaxis_title = 'Credit Score',
    title = 'Customer Segmentation Based on Credit Scores'
)

fig.show()

Summary

---


Credit scoring and segmentation refer to the process of evaluating the creditworthiness of individuals or businesses and dividing them into distinct groups based on their credit profiles. It aims to assess the likelihood of borrowers repaying their debts and helps financial institutions make informed decisions regarding lending and managing credit risk.