In [2]:
import warnings
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from scipy import stats
import pickle
import re
from mpl_toolkits.axes_grid1.inset_locator import inset_axes


# Sklearn libraries
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler, StandardScaler, LabelEncoder,OneHotEncoder,OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report, roc_auc_score, roc_curve, log_loss
from sklearn.utils.class_weight import compute_class_weight

# TensorFlow and Keras libraries
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model, save_model
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization,Input
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2, l1

In [None]:

df_train = pd.read_csv('../input/credit-dset/clean_trained.csv')
df_test = pd.read_csv('../input/credit-dset/test_cleaned.csv')


In [7]:
one_hot_encode_cols = ['Month','Profession']
for i in one_hot_encode_cols:
    ohe = OneHotEncoder(sparse_output=False)
    encoded_train = ohe.fit_transform(df_train[[i]])
    encoded_test = ohe.transform(df_test[[i]])
    encoded_cols = [f"{i}_{j}" for j in ohe.categories_[0]]
    df_train_encoded = pd.DataFrame(encoded_train, columns=encoded_cols)
    df_test_encoded = pd.DataFrame(encoded_test, columns=encoded_cols)
    df_train = pd.concat([df_train, df_train_encoded], axis=1).drop(columns=[i])
    df_test = pd.concat([df_test, df_test_encoded], axis=1).drop(columns=[i])

In [8]:
cred_mix_dict = {"Bad" : 0,"Standard" : 1,"Good" : 2}
df_train['Credit_Mix'] = df_train['Credit_Mix'].map(cred_mix_dict)
df_test['Credit_Mix'] = df_test['Credit_Mix'].map(cred_mix_dict)

In [9]:
df_train[['pay_type','val_pay']] = df_train['Payment_Behaviour'].str.split(pat = '_spent_',n=1,expand = True)
df_test[['pay_type','val_pay']] = df_test['Payment_Behaviour'].str.split(pat = '_spent_',n=1,expand = True)
df_train = df_train.drop(['Payment_Behaviour'],axis=1)
df_test = df_test.drop(['Payment_Behaviour'],axis=1)
pay_type_dict = {'Low' : 0, 'High' : 1}
val_pay_dict = {'Small_value_payments' : 0,'Medium_value_payments' : 1,'Large_value_payments' : 2}
df_train['pay_type'] = df_train['pay_type'].map(pay_type_dict)
df_test['pay_type'] = df_test['pay_type'].map(pay_type_dict)
df_train['val_pay'] = df_train['val_pay'].map(val_pay_dict)
df_test['val_pay'] = df_test['val_pay'].map(val_pay_dict)

In [10]:
yes_no_dict = {'Yes' : 1,'No' : 0}
df_train['Payment_of_Min_Amount'] = df_train['Payment_of_Min_Amount'].map(yes_no_dict)
df_test['Payment_of_Min_Amount'] = df_test['Payment_of_Min_Amount'].map(yes_no_dict)

In [11]:
encoder_final = OrdinalEncoder(categories=[['Poor', 'Standard', 'Good']])
df_train['Credit_Score'] = encoder_final.fit_transform(df_train[['Credit_Score']])
df_train['Credit_Score'].unique()

array([1., 0., 2.])

In [14]:
X = df_train.drop(['Credit_Score','Number'],axis=1)
Y = df_train['Credit_Score']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
df_test=df_test.drop(['Number'],axis=1)

In [15]:
X_reshaped = X_scaled.reshape(-1, X_scaled.shape[1], 1)
X_train, X_test, y_train, y_test = train_test_split(X_reshaped, Y, test_size=0.2, random_state=42)

In [19]:
tf.random.set_seed(42)

# Model architecture
model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(256, activation='relu',),
    BatchNormalization(),
    Dropout(0.35),

    Dense(512, activation='relu', kernel_regularizer=l1(1e-4)),
    BatchNormalization(),

    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.1),

    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.1),

    Dense(3, activation='softmax')
])

# Model compilation
model.compile(optimizer=Adam(learning_rate=0.0003),  # Reduced learning rate
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Early stopping with patience and best weight restoration
early_stopping = EarlyStopping(monitor='val_accuracy',
                               patience=35,
                               restore_best_weights=True)

# Model training

history = model.fit(
    x=X_train,
    y=y_train,
    validation_data=(X_test, y_test),
    batch_size=1024,
    epochs=500,
    verbose=1,
    callbacks=[early_stopping]
)

Epoch 1/500
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 55ms/step - accuracy: 0.4646 - loss: 1.8371 - val_accuracy: 0.5760 - val_loss: 1.5279
Epoch 2/500
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 69ms/step - accuracy: 0.5729 - loss: 1.5185 - val_accuracy: 0.6006 - val_loss: 1.4207
Epoch 3/500
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 60ms/step - accuracy: 0.6096 - loss: 1.4316 - val_accuracy: 0.6288 - val_loss: 1.3372
Epoch 4/500
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 39ms/step - accuracy: 0.6264 - loss: 1.3724 - val_accuracy: 0.6472 - val_loss: 1.2940
Epoch 5/500
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 38ms/step - accuracy: 0.6351 - loss: 1.3352 - val_accuracy: 0.6556 - val_loss: 1.2639
Epoch 6/500
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - accuracy: 0.6367 - loss: 1.3028 - val_accuracy: 0.6591 - val_loss: 1.2457
Epoch 7/500
[1m63/63[0m [

In [None]:
tf.random.set_seed(42)

# Model architecture
final_model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(256, activation='relu',),
    BatchNormalization(),
    Dropout(0.35),

    Dense(512, activation='relu', kernel_regularizer=l1(1e-4)),
    BatchNormalization(),

    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.1),

    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.1),

    Dense(3, activation='softmax')
])

# Model compilation
model.compile(optimizer=Adam(learning_rate=0.0003),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])


# Model training
history = model.fit(x=X_train_scaled,
                    y=y_train,
                    batch_size=1024,
                    epochs=260,
                    verbose=1)