In [None]:
import random
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

In [None]:
pio.templates.default = 'plotly_white'

In [None]:
df = pd.read_csv('train.csv')

In [None]:
def box_plot(x_axis, y_axis):
  fig = px.box(df, 
               x=x_axis, 
               y=y_axis,
               color='Credit_Score',
               title=f'{x_axis} Based on {y_axis}',
               color_discrete_map={'Poor':'red',
                                   'Standard':'yellow',
                                   'Good':'green'})
  fig.update_traces(quartilemethod='exclusive')
  return fig.show()

In [None]:
def random_features():
  annual_income = random.uniform(df['Annual_Income'].min(), df['Annual_Income'].max())
  print(f'Annual Income: {"{:.2f}".format(annual_income)}')
  
  monthly_inhand_salary = annual_income / 12
  print(f'Monthly Inhand Salary: {"{:.2f}".format(monthly_inhand_salary)}')
  
  num_bank_accounts = float(random.randint(df['Num_Bank_Accounts'].min(), df['Num_Bank_Accounts'].max()))
  print(f'Number of Bank Accounts: {num_bank_accounts}')
  
  num_credit_cards = float(random.randint(df['Num_Credit_Card'].min(), num_bank_accounts))
  print(f'Number of Credit Cards: {num_credit_cards}')
  
  interest_rate = float(random.randint(df['Interest_Rate'].min(), df['Interest_Rate'].max()))
  print(f'Interest Rate: {interest_rate}')
  
  num_loans = float(random.randint(df['Num_of_Loan'].min(), df['Num_of_Loan'].max()))
  print(f'Number of Loans: {num_loans}')
  
  avg_delay_days = float(random.randint(df['Delay_from_due_date'].min(), df['Delay_from_due_date'].max()))
  print(f'Average Number of Days Delayed by the Person: {avg_delay_days}')
  
  num_delayed_payments = float(random.randint(df['Num_of_Delayed_Payment'].min(), df['Num_of_Delayed_Payment'].max()))
  print(f'Number of Delayed Payments: {num_delayed_payments}')
  
  credit_mix = random.randint(0, 2)
  print(f'Credit Mix (Bad: 0, Standard: 1, Good: 2): {credit_mix}')

  outstanding_debt = random.uniform(df['Outstanding_Debt'].min(), df['Outstanding_Debt'].max())
  print(f'Outstanding Debt: {"{:.2f}".format(outstanding_debt)}')
  
  credit_history_age = float(random.randint(df['Credit_History_Age'].min(), df['Credit_History_Age'].max()))
  print(f'Credit History Age: {credit_history_age}')
  
  monthly_balance = random.uniform(df['Monthly_Balance'].min(), df['Monthly_Balance'].max())
  print(f'Monthly Balance: {"{:.2f}".format(monthly_balance)}')

  return np.array([[annual_income, monthly_inhand_salary, num_bank_accounts, num_credit_cards, interest_rate, num_loans, 
                    avg_delay_days, num_delayed_payments, credit_mix, outstanding_debt, credit_history_age, monthly_balance]])

# Data Exploration

O dataset possui muitos informações que podem treinar um modelo de Machine Learning para classificação de crédito. O objetivo é explorar todos os recursos que podem afetar essa classificação.

In [None]:
fig = px.box(df, 
               x='Occupation', 
               color='Credit_Score',
               title='Credit_Score Based on Occupation',
               color_discrete_map={'Poor':'red',
                                   'Standard':'yellow',
                                   'Good':'green'})
fig.show()

In [None]:
box_plot('Credit_Score', 'Annual_Income')

In [None]:
box_plot('Credit_Score', 'Monthly_Inhand_Salary')

In [None]:
box_plot('Credit_Score', 'Num_Bank_Accounts')

In [None]:
box_plot('Credit_Score', 'Num_Credit_Card')

In [None]:
box_plot('Credit_Score', 'Interest_Rate')

In [None]:
box_plot('Credit_Score', 'Num_of_Loan')

In [None]:
box_plot('Credit_Score', 'Delay_from_due_date')

In [None]:
box_plot('Credit_Score', 'Num_of_Delayed_Payment')

In [None]:
box_plot('Credit_Score', 'Outstanding_Debt')

In [None]:
box_plot('Credit_Score', 'Credit_Utilization_Ratio')

In [None]:
box_plot('Credit_Score', 'Credit_History_Age')

In [None]:
box_plot('Credit_Score', 'Total_EMI_per_month')

In [None]:
box_plot('Credit_Score', 'Amount_invested_monthly')

In [None]:
box_plot('Credit_Score', 'Monthly_Balance')

# Modelo de classificação de crédito

## Credit Mix

Outra característica importante no dataset para determinar as pontuações de crédito é o Credit Mix. O Credit Mix informa sobre os tipos de créditos e empréstimos realizados.

In [None]:
df['Credit_Mix'] = df['Credit_Mix'].map({'Bad': 0, 
                                         'Standard': 1, 
                                         'Good': 2})

## Modelo

In [None]:
dataset_features = np.array(df[['Annual_Income', 'Monthly_Inhand_Salary', 
                   'Num_Bank_Accounts', 'Num_Credit_Card', 
                   'Interest_Rate', 'Num_of_Loan', 
                   'Delay_from_due_date', 'Num_of_Delayed_Payment', 
                   'Credit_Mix', 'Outstanding_Debt', 
                   'Credit_History_Age', 'Monthly_Balance']])
dataset_target = np.array(df[['Credit_Score']])

In [None]:
x_train, x_test, y_train, y_test = train_test_split(dataset_features, dataset_target, 
                                                    test_size=0.33, 
                                                    random_state=42)

In [None]:
model = RandomForestClassifier()
model.fit(x_train, y_train)

In [None]:
print("Credit Score Prediction: ")
features = random_features()
print(f'Predicted Credit Score: {model.predict(features)}')