In [16]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [17]:
data = pd.read_csv("../../Data/cleanedNotRecoded.csv")

df = data.copy()

df = df.drop(["ID", "Customer_ID", "Name", "SSN", "Occupation"], axis=1)

df["Credit_Score"] = df["Credit_Score"].map({"Good":2, "Standard":1, "Poor":0})
df["Credit_Mix"] = df["Credit_Mix"].map({"Good":2, "Standard":1, "Bad":0})

df = pd.get_dummies(df, columns=['Payment_of_Min_Amount', 'Payment_Behaviour'], drop_first=True)

In [18]:
from sklearn.ensemble import IsolationForest

iforestModel = IsolationForest(n_estimators=100, contamination=0.1, random_state=42)

iforestModel.fit(df)

# Predict anomalies (-1 for outliers, 1 for inliers)
df['anomaly'] = iforestModel.predict(df)

df = df[df['anomaly'] == 1].drop(columns=['anomaly'])

In [19]:
X = df.drop(["Credit_Score"], axis=1)
y = df["Credit_Score"]

In [20]:
from imblearn.over_sampling import SMOTE

smote = SMOTE()
X, y = smote.fit_resample(X, y)

In [21]:
import prince
famd = prince.FAMD(
    n_components=11,
    n_iter=3,
    copy=True,
    check_input=True,
    random_state=42
)

famd = famd.fit(X)

famd_transformed = famd.transform(X)



  X = self.scaler_.transform(X.to_numpy())
  X = self.scaler_.transform(X.to_numpy())


In [22]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

X_train, X_test, y_train, y_test = train_test_split(famd_transformed, y, test_size=0.2, random_state=42)

In [23]:
from sklearn.metrics import classification_report, accuracy_score, f1_score
from sklearn.model_selection import cross_val_score, StratifiedKFold


stratified_kfold = StratifiedKFold(n_splits=10, shuffle=True)

In [24]:
from sklearn.linear_model import LogisticRegression


logModel = LogisticRegression(multi_class='ovr')

logModel.fit(X_train, y_train)

y_pred = logModel.predict(X_train)

print("=====Train======")
print("Accuracy:", accuracy_score(y_train, y_pred))
print("f1_score:", f1_score(y_train, y_pred, average='weighted'))
print("Classification Report:\n", classification_report(y_train, y_pred))

scores = cross_val_score(logModel, X_train, y_train, cv = stratified_kfold, scoring='accuracy') 

print(f"Cross-validation accuracy scores: {scores}")
print(f"Mean accuracy: {scores.mean()}")

y_pred = logModel.predict(X_test)

print("=====Test======")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("f1_score:", f1_score(y_test, y_pred, average='weighted'))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.6965454370981672
f1_score: 0.6942725339653937
Classification Report:
               precision    recall  f1-score   support

           0       0.75      0.67      0.71     34649
           1       0.62      0.60      0.61     34754
           2       0.71      0.83      0.77     34807

    accuracy                           0.70    104210
   macro avg       0.70      0.70      0.69    104210
weighted avg       0.70      0.70      0.69    104210

Cross-validation accuracy scores: [0.69887727 0.70127627 0.6896651  0.69628634 0.70127627 0.69168026
 0.70194799 0.68928126 0.70588235 0.69033682]
Mean accuracy: 0.6966509931868343
Accuracy: 0.6993820289410049
f1_score: 0.6971191152077623
Classification Report:
               precision    recall  f1-score   support

           0       0.75      0.67      0.71      8772
           1       0.63      0.60      0.61      8667
           2       0.72      0.83      0.77      8614

    accuracy                           0.70     26053
  

In [25]:
import xgboost as xgb

XGBModel = xgb.XGBClassifier(objective='multi:softmax', num_class=3)

XGBModel.fit(X_train, y_train)

y_pred = XGBModel.predict(X_train)

print("=====Train======")
print("Accuracy:", accuracy_score(y_train, y_pred))
print("f1_score:", f1_score(y_train, y_pred, average='weighted'))
print("Classification Report:\n", classification_report(y_train, y_pred))

scores = cross_val_score(XGBModel, X_train, y_train, cv = stratified_kfold, scoring='accuracy') 

print(f"Cross-validation accuracy scores: {scores}")
print(f"Mean accuracy: {scores.mean()}")

y_pred = XGBModel.predict(X_test)

print("=====Test======")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("f1_score:", f1_score(y_test, y_pred, average='weighted'))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.7454275021591018
f1_score: 0.7429736585658501
Classification Report:
               precision    recall  f1-score   support

           0       0.79      0.76      0.77     34649
           1       0.71      0.63      0.67     34754
           2       0.74      0.85      0.79     34807

    accuracy                           0.75    104210
   macro avg       0.75      0.75      0.74    104210
weighted avg       0.75      0.75      0.74    104210

Cross-validation accuracy scores: [0.71413492 0.7101046  0.70943288 0.71307936 0.70972076 0.70588235
 0.711544   0.7134632  0.71519048 0.71183188]
Mean accuracy: 0.711438441608291
Accuracy: 0.7154262464975243
f1_score: 0.7120609227469955
Classification Report:
               precision    recall  f1-score   support

           0       0.75      0.73      0.74      8772
           1       0.67      0.58      0.62      8667
           2       0.73      0.84      0.78      8614

    accuracy                           0.72     26053
   