In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
import keras_tuner as kt
from tensorflow.keras.optimizers import Adam
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Concatenate, Dropout
from tensorflow.keras.layers import Bidirectional

In [2]:
import numpy as np
df = pd.read_excel("default of credit card clients.xls", skiprows=1)

print("Columns in dataset:\n", df.columns.tolist())

if 'default payment next month' in df.columns:
    df.rename(columns={"default payment next month": "target"}, inplace=True)
elif 'target' not in df.columns:
    raise ValueError("Target column not found!")


if 'ID' in df.columns:
    df.drop(columns=['ID'], inplace=True)

df = df.apply(pd.to_numeric, errors='coerce')
df.fillna(df.median(numeric_only=True), inplace=True)

X = df.drop(columns=['target'])
y = df['target']

time_seq_cols = [
    ['BILL_AMT1', 'BILL_AMT2', 'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6'],
    ['PAY_AMT1', 'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6'],
    ['PAY_0', 'PAY_2', 'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6']
]

X_seq = np.stack([df[cols].values for cols in time_seq_cols], axis=2)
sequence_flattened = sum(time_seq_cols, [])
static_cols = [col for col in df.columns if col not in sequence_flattened + ['target', 'ID']]

X_static = df[static_cols].values

y = df['target'].values


Columns in dataset:
 ['ID', 'LIMIT_BAL', 'SEX', 'EDUCATION', 'MARRIAGE', 'AGE', 'PAY_0', 'PAY_2', 'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6', 'BILL_AMT1', 'BILL_AMT2', 'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6', 'PAY_AMT1', 'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6', 'default payment next month']


In [3]:
def focal_loss(gamma=2., alpha=0.25):
    def loss(y_true, y_pred):
        epsilon = tf.keras.backend.epsilon()
        y_pred = tf.clip_by_value(y_pred, epsilon, 1. - epsilon)
        pt = tf.where(tf.equal(y_true, 1), y_pred, 1 - y_pred)
        return -tf.reduce_mean(alpha * tf.pow(1. - pt, gamma) * tf.math.log(pt))
    return loss


In [4]:
df['avg_bill_amt'] = df[[f'BILL_AMT{i}' for i in range(1, 7)]].mean(axis=1)
df['avg_pay_amt'] = df[[f'PAY_AMT{i}' for i in range(1, 7)]].mean(axis=1)
df['limit_util_ratio'] = df['avg_bill_amt'] / df['LIMIT_BAL']


In [5]:
scaler = StandardScaler()
X_static_scaled = scaler.fit_transform(X_static)

X_seq_train, X_seq_test, X_static_train, X_static_test, y_train, y_test = train_test_split(
    X_seq, X_static_scaled, y, test_size=0.2, random_state=42, stratify=y
)


In [6]:
seq_input = Input(shape=(6, 3), name="sequence_input")
x_seq = Bidirectional(LSTM(64, return_sequences=False))(seq_input)
x_seq = Dropout(0.3)(x_seq)

static_input = Input(shape=(X_static_train.shape[1],), name="static_input")
x_static = Dense(128, activation='relu')(static_input)
x_static = Dropout(0.3)(x_static)
x_static = Dense(64, activation='relu')(x_static)

x = Concatenate()([x_seq, x_static])
x = Dense(64, activation='relu')(x)
output = Dense(1, activation='sigmoid')(x)

hybrid_model = Model(inputs=[seq_input, static_input], outputs=output)

hybrid_model.compile(optimizer='adam', loss=focal_loss(), metrics=['Accuracy'])


In [8]:
history = hybrid_model.fit(
    {'sequence_input': X_seq_train, 'static_input': X_static_train},
    y_train,
    validation_split=0.2,
    epochs=25,
    batch_size=64,
    verbose=1, 
    class_weight = class_weights_dict
)


Epoch 1/25
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - Accuracy: 0.7640 - loss: 0.0352 - val_Accuracy: 0.7663 - val_loss: 0.0337
Epoch 2/25
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - Accuracy: 0.7786 - loss: 0.0332 - val_Accuracy: 0.7683 - val_loss: 0.0333
Epoch 3/25
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - Accuracy: 0.7787 - loss: 0.0331 - val_Accuracy: 0.7688 - val_loss: 0.0334
Epoch 4/25
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - Accuracy: 0.7788 - loss: 0.0330 - val_Accuracy: 0.7633 - val_loss: 0.0336
Epoch 5/25
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - Accuracy: 0.7855 - loss: 0.0322 - val_Accuracy: 0.7683 - val_loss: 0.0333
Epoch 6/25
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - Accuracy: 0.7840 - loss: 0.0324 - val_Accuracy: 0.7683 - val_loss: 0.0333
Epoch 7/25
[1m300/300[0m 

In [9]:
loss, acc = hybrid_model.evaluate({'sequence_input': X_seq_test, 'static_input': X_static_test}, y_test)
print(f"✅ Hybrid Model Accuracy: {acc:.4f}")


[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - Accuracy: 0.7694 - loss: 0.0334
✅ Hybrid Model Accuracy: 0.7760
