In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

#  Import and read the charity_data.csv.
credit_score_df = pd.read_csv("credit_score_new.csv")
credit_score_df.head()
credit_score_df = credit_score_df.drop(['Type_of_Loan','Unnamed: 0','Customer_ID','ID','Name','Occupation'],axis = 1)
credit_score_df.head()


Unnamed: 0,Month,Annual_Income,Monthly_Inhand_Salary,Num_Bank_Accounts,Num_Credit_Card,Interest_Rate,Num_of_Loan,Delay_from_due_date,Num_of_Delayed_Payment,Num_Credit_Inquiries,Outstanding_Debt,Credit_Utilization_Ratio,Credit_History_Age,Payment_of_Min_Amount,Total_EMI_per_month,Amount_invested_monthly,Payment_Behaviour,Monthly_Balance,Credit_Score,Age
0,1,19114.12,1824.843333,3,4,3,4.0,3,7.0,4.0,809.98,26.82262,22.1,No,49.574949,80.415295,High_spent_Small_value_payments,312.494089,Good,23.0
1,7,19114.12,1824.843333,3,4,3,4.0,3,8.0,4.0,809.98,22.537593,22.7,No,49.574949,178.344067,Low_spent_Small_value_payments,244.565317,Good,23.0
2,1,30689.89,2612.490833,2,5,4,1.0,0,6.0,4.0,632.46,26.544229,17.3,No,16.415452,81.228859,Low_spent_Large_value_payments,433.604773,Standard,55.0
3,2,30689.89,2612.490833,2,5,4,1.0,5,3.0,4.0,632.46,35.279982,17.4,No,16.415452,124.88182,Low_spent_Small_value_payments,409.951812,Standard,55.0
4,3,30689.89,2612.490833,2,5,4,1.0,3,9.0,4.0,632.46,32.301163,17.5,NM,16.415452,83.406509,High_spent_Medium_value_payments,411.427123,Standard,55.0


In [2]:
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix
from imblearn.metrics import classification_report_imbalanced

In [3]:
# Create our features
X = credit_score_df.copy()
X = X.drop("Credit_Score", axis=1)
X = pd.get_dummies(X)



# Create our target
y = pd.DataFrame(credit_score_df,columns = ["Credit_Score"])


In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [5]:
# Resample the training data with the BalancedRandomForestClassifier
from imblearn.ensemble import BalancedRandomForestClassifier
clf = BalancedRandomForestClassifier(random_state=1, n_estimators=100)
clf = clf.fit(X_train, y_train)
clf



BalancedRandomForestClassifier(random_state=1)

In [6]:
# Calculated the balanced accuracy score
predictions = clf.predict(X_test)
from sklearn.metrics import confusion_matrix, balanced_accuracy_score, classification_report
balanced_accuracy_score(y_test, predictions)

0.7556544705127921

In [7]:
# Display the confusion matrix
confusion_matrix(y_test, predictions)

array([[1863,   39,  215],
       [ 342, 2959,  293],
       [1411, 1394, 3623]])

In [8]:
# Print the imbalanced classification report
print(classification_report_imbalanced(y_test, predictions))


                   pre       rec       spe        f1       geo       iba       sup

       Good       0.52      0.88      0.83      0.65      0.85      0.73      2117
       Poor       0.67      0.82      0.83      0.74      0.83      0.68      3594
   Standard       0.88      0.56      0.91      0.69      0.72      0.50      6428

avg / total       0.75      0.70      0.87      0.70      0.77      0.59     12139



In [9]:
# List the features sorted in descending order by feature importance
sorted(zip(clf.feature_importances_,X.columns), reverse=True)
importance_display1 = sorted(zip(clf.feature_importances_,X.columns), reverse=True)
for i in importance_display1:
    print(f"{i[1]}: ({i[0]})")

Outstanding_Debt: (0.12379571611376546)
Interest_Rate: (0.10336781248310586)
Delay_from_due_date: (0.06815863333473247)
Credit_History_Age: (0.06763034276561575)
Num_Credit_Inquiries: (0.04941657266326485)
Monthly_Inhand_Salary: (0.045251672402518046)
Total_EMI_per_month: (0.04479906414295073)
Annual_Income: (0.04446861664569113)
Monthly_Balance: (0.04360256498525273)
Credit_Utilization_Ratio: (0.042698111278240684)
Amount_invested_monthly: (0.042527658776808336)
Num_of_Delayed_Payment: (0.04236840477790194)
Num_Credit_Card: (0.03965487002403447)
Num_Bank_Accounts: (0.037924543416859635)
Payment_of_Min_Amount_No: (0.0358531104147205)
Payment_of_Min_Amount_Yes: (0.035388364042120465)
Age: (0.03469230649363442)
Month: (0.03405655064060846)
Num_of_Loan: (0.0302450496673667)
Payment_Behaviour_Low_spent_Small_value_payments: (0.005991548044034805)
Payment_Behaviour_High_spent_Medium_value_payments: (0.0054874504575602836)
Payment_Behaviour_Low_spent_Medium_value_payments: (0.004846185524040

In [10]:
# Train the EasyEnsembleClassifier
from imblearn.ensemble import EasyEnsembleClassifier
ecc = EasyEnsembleClassifier(random_state=1,n_estimators=100)
ecc.fit(X_train,y_train)


EasyEnsembleClassifier(n_estimators=100, random_state=1)

In [11]:
# Calculated the balanced accuracy score
y_pred = ecc.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

0.6692672015266705

In [12]:
# Display the confusion matrix
confusion_matrix(y_test, y_pred)

array([[1666,   60,  391],
       [ 479, 2499,  616],
       [1686, 1364, 3378]])

In [13]:
# Print the imbalanced classification report
from imblearn.metrics import classification_report_imbalanced
print(classification_report_imbalanced(y_test, y_pred))

                   pre       rec       spe        f1       geo       iba       sup

       Good       0.43      0.79      0.78      0.56      0.79      0.62      2117
       Poor       0.64      0.70      0.83      0.66      0.76      0.57      3594
   Standard       0.77      0.53      0.82      0.62      0.66      0.42      6428

avg / total       0.67      0.62      0.82      0.63      0.71      0.50     12139

