In [1]:
import warnings
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from scipy import stats
import pickle
import re
from mpl_toolkits.axes_grid1.inset_locator import inset_axes


# Sklearn libraries
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler, StandardScaler, LabelEncoder,OneHotEncoder,OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report, roc_auc_score, roc_curve, log_loss
from sklearn.utils.class_weight import compute_class_weight

# TensorFlow and Keras libraries
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model, save_model
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization,Input
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2, l1

2024-12-11 12:33:51.037597: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-12-11 12:33:51.064891: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1733900631.096297  197036 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1733900631.105514  197036 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-11 12:33:51.130889: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [2]:

df_train = pd.read_csv('../input/credit-dset/clean_trained.csv')
df_test = pd.read_csv('../input/credit-dset/test_cleaned.csv')


In [3]:
one_hot_encode_cols = ['Month','Profession']
for i in one_hot_encode_cols:
    ohe = OneHotEncoder(sparse_output=False)
    encoded_train = ohe.fit_transform(df_train[[i]])
    encoded_test = ohe.transform(df_test[[i]])
    encoded_cols = [f"{i}_{j}" for j in ohe.categories_[0]]
    df_train_encoded = pd.DataFrame(encoded_train, columns=encoded_cols)
    df_test_encoded = pd.DataFrame(encoded_test, columns=encoded_cols)
    df_train = pd.concat([df_train, df_train_encoded], axis=1).drop(columns=[i])
    df_test = pd.concat([df_test, df_test_encoded], axis=1).drop(columns=[i])

In [4]:
cred_mix_dict = {"Bad" : 0,"Standard" : 1,"Good" : 2}
df_train['Credit_Mix'] = df_train['Credit_Mix'].map(cred_mix_dict)
df_test['Credit_Mix'] = df_test['Credit_Mix'].map(cred_mix_dict)

In [5]:
df_train[['pay_type','val_pay']] = df_train['Payment_Behaviour'].str.split(pat = '_spent_',n=1,expand = True)
df_test[['pay_type','val_pay']] = df_test['Payment_Behaviour'].str.split(pat = '_spent_',n=1,expand = True)
df_train = df_train.drop(['Payment_Behaviour'],axis=1)
df_test = df_test.drop(['Payment_Behaviour'],axis=1)
pay_type_dict = {'Low' : 0, 'High' : 1}
val_pay_dict = {'Small_value_payments' : 0,'Medium_value_payments' : 1,'Large_value_payments' : 2}
df_train['pay_type'] = df_train['pay_type'].map(pay_type_dict)
df_test['pay_type'] = df_test['pay_type'].map(pay_type_dict)
df_train['val_pay'] = df_train['val_pay'].map(val_pay_dict)
df_test['val_pay'] = df_test['val_pay'].map(val_pay_dict)

In [6]:
yes_no_dict = {'Yes' : 1,'No' : 0}
df_train['Payment_of_Min_Amount'] = df_train['Payment_of_Min_Amount'].map(yes_no_dict)
df_test['Payment_of_Min_Amount'] = df_test['Payment_of_Min_Amount'].map(yes_no_dict)

In [7]:
encoder_final = OrdinalEncoder(categories=[['Poor', 'Standard', 'Good']])
df_train['Credit_Score'] = encoder_final.fit_transform(df_train[['Credit_Score']])
df_train['Credit_Score'].unique()

array([1., 0., 2.])

In [8]:
X = df_train.drop(['Credit_Score','Number'],axis=1)
Y = df_train['Credit_Score']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
df_test=df_test.drop(['Number'],axis=1)

In [9]:
X_reshaped = X_scaled.reshape(-1, X_scaled.shape[1], 1)
X_train, X_test, y_train, y_test = train_test_split(X_reshaped, Y, test_size=0.2, random_state=42)

In [10]:
tf.random.set_seed(42)

# Model architecture
model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(256, activation='relu',),
    BatchNormalization(),
    Dropout(0.35),

    Dense(512, activation='relu', kernel_regularizer=l1(1e-4)),
    BatchNormalization(),

    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.1),

    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.1),

    Dense(3, activation='softmax')
])

# Model compilation
model.compile(optimizer=Adam(learning_rate=0.0003),  # Reduced learning rate
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Early stopping with patience and best weight restoration
early_stopping = EarlyStopping(monitor='val_accuracy',
                               patience=35,
                               restore_best_weights=True)

# Model training

history = model.fit(
    x=X_train,
    y=y_train,
    validation_data=(X_test, y_test),
    batch_size=1024,
    epochs=500,
    verbose=1,
    callbacks=[early_stopping]
)

2024-12-11 12:33:54.336492: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


Epoch 1/500
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 42ms/step - accuracy: 0.4552 - loss: 1.8368 - val_accuracy: 0.5842 - val_loss: 1.5471
Epoch 2/500
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 38ms/step - accuracy: 0.5710 - loss: 1.5294 - val_accuracy: 0.6104 - val_loss: 1.4325
Epoch 3/500
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 41ms/step - accuracy: 0.6147 - loss: 1.4376 - val_accuracy: 0.6276 - val_loss: 1.3487
Epoch 4/500
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 46ms/step - accuracy: 0.6273 - loss: 1.3800 - val_accuracy: 0.6502 - val_loss: 1.3012
Epoch 5/500
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 49ms/step - accuracy: 0.6370 - loss: 1.3366 - val_accuracy: 0.6598 - val_loss: 1.2681
Epoch 6/500
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 51ms/step - accuracy: 0.6423 - loss: 1.3034 - val_accuracy: 0.6649 - val_loss: 1.2452
Epoch 7/500
[1m63/63[0m [

In [11]:
# tf.random.set_seed(42)

# # Model architecture
# final_model = Sequential([
#     Input(shape=(X_train.shape[1],)),
#     Dense(256, activation='relu',),
#     BatchNormalization(),
#     Dropout(0.35),

#     Dense(512, activation='relu', kernel_regularizer=l1(1e-4)),
#     BatchNormalization(),

#     Dense(256, activation='relu'),
#     BatchNormalization(),
#     Dropout(0.1),

#     Dense(256, activation='relu'),
#     BatchNormalization(),
#     Dropout(0.1),

#     Dense(3, activation='softmax')
# ])

# # Model compilation
# model.compile(optimizer=Adam(learning_rate=0.0003),
#               loss='sparse_categorical_crossentropy',
#               metrics=['accuracy'])


# # Model training
# history = model.fit(x=X_train_scaled,
#                     y=y_train,
#                     batch_size=1024,
#                     epochs=260,
#                     verbose=1)

NameError: name 'X_train_scaled' is not defined

In [13]:
test_predictions = model.predict(scaler.transform(df_test.drop(['ID'],axis=1)))
pred_class = test_predictions.argmax(axis=1)
pd.DataFrame({
    'ID' : df_test['ID'],
    'Predicted' : encoder_final.inverse_transform(pred_class.reshape(-1,1)).reshape(-1)
}).to_csv('sub_ann.csv',index=False)

[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step
