In [15]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from  keras.callbacks import EarlyStopping
from keras.layers import SimpleRNN
from imblearn.over_sampling import SMOTE
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import LearningRateScheduler

In [16]:
train_path = '../input/credit-dset/clean_trained_outlier.csv'
test_path = '../input/credit-dset/test_cleaned_outlier.csv'

train_df = pd.read_csv(train_path)
test_df = pd.read_csv(test_path)

if (test_path == '../input/credit-dset/test_cleaned.csv'):
    test_df.drop(['Number'], axis=1, inplace=True)

month_map = {
    'January': 1, 'February': 2, 'March': 3, 'April': 4, 'May': 5, 'June': 6,
    'July': 7, 'August': 8, 'September': 9, 'October': 10, 'November': 11, 'December': 12
}

# Convert month names to numbers
train_df['Month'] = train_df['Month'].map(month_map)
test_df['Month'] = test_df['Month'].map(month_map)

train_df['Month_sin'] = np.sin(2 * np.pi * train_df['Month'] / 12)
train_df['Month_cos'] = np.cos(2 * np.pi * train_df['Month'] / 12)
test_df['Month_sin'] = np.sin(2 * np.pi * test_df['Month'] / 12)
test_df['Month_cos'] = np.cos(2 * np.pi * test_df['Month'] / 12)
train_df.drop(['Month'], axis=1, inplace=True)
test_df.drop(['Month'], axis=1, inplace=True)

In [17]:
categorical_cols = []
numerical_cols = []

for col in train_df.columns:
    if col != 'Credit_Score':
        if train_df[col].dtype == 'object':
            categorical_cols.append(col)
        else:
            numerical_cols.append(col)
    else:
        print('Skipping Credit_Score column')

print(categorical_cols)
print(numerical_cols)

Skipping Credit_Score column
['Profession', 'Credit_Mix', 'Payment_of_Min_Amount', 'Payment_Behaviour']
['Age', 'Income_Annual', 'Base_Salary_PerMonth', 'Total_Bank_Accounts', 'Total_Credit_Cards', 'Rate_Of_Interest', 'Delay_from_due_date', 'Total_Delayed_Payments', 'Credit_Limit', 'Total_Credit_Enquiries', 'Current_Debt_Outstanding', 'Ratio_Credit_Utilization', 'Credit_History_Age', 'Per_Month_EMI', 'Monthly_Investment', 'Monthly_Balance', 'Payday Loan', 'Mortgage Loan', 'Debt Consolidation Loan', 'Student Loan', 'Not Specified', 'Auto Loan', 'Credit-Builder Loan', 'Personal Loan', 'Home Equity Loan', 'Total_Current_Loans', 'Month_sin', 'Month_cos']


In [18]:
# string -> no of months
def convert_to_2_cols(s):
    parts = s.split('_spent_')
    spent = parts[0]
    payment = parts[1].split('_payments')[0]
    return pd.Series([spent,payment])

train_df[['Spent', 'Value_Payments']] = train_df['Payment_Behaviour'].apply(convert_to_2_cols)
train_df.drop(['Payment_Behaviour'], axis=1, inplace=True)

test_df[['Spent', 'Value_Payments']] = test_df['Payment_Behaviour'].apply(convert_to_2_cols)
test_df.drop(['Payment_Behaviour'], axis=1, inplace=True)

In [19]:
encoder = OneHotEncoder(sparse_output=False)

encoded_columns = encoder.fit_transform(train_df[['Profession', 'Payment_of_Min_Amount']])
encoded_df = pd.DataFrame(encoded_columns, columns=encoder.get_feature_names_out(['Profession', 'Payment_of_Min_Amount']))
train_df = pd.concat([train_df, encoded_df], axis=1)

train_df.drop(['Profession', 'Payment_of_Min_Amount'], axis=1, inplace=True)

label_encoder = LabelEncoder()
train_df['Credit_Mix'] = label_encoder.fit_transform(train_df[
    'Credit_Mix'
])

train_df['Spent'] = label_encoder.fit_transform(train_df[
    'Spent'
])

train_df['Value_Payments'] = label_encoder.fit_transform(train_df[
    'Value_Payments'
])

In [20]:
encoded_columns = encoder.fit_transform(test_df[['Profession', 'Payment_of_Min_Amount']])
encoded_df = pd.DataFrame(encoded_columns, columns=encoder.get_feature_names_out(['Profession', 'Payment_of_Min_Amount']))
test_df = pd.concat([test_df, encoded_df], axis=1)

test_df.drop(['Profession', 'Payment_of_Min_Amount'], axis=1, inplace=True)


test_df['Credit_Mix'] = label_encoder.fit_transform(test_df[
    'Credit_Mix'
])

test_df['Spent'] = label_encoder.fit_transform(test_df[
    'Spent'
])

test_df['Value_Payments'] = label_encoder.fit_transform(test_df[
    'Value_Payments'
])

In [21]:
scaler = StandardScaler()
train_df[numerical_cols] = scaler.fit_transform(train_df[numerical_cols])

train_df['Credit_Score'] = label_encoder.fit_transform(train_df['Credit_Score'])

X = train_df.drop(['Credit_Score'], axis=1)
y = to_categorical(train_df['Credit_Score'])

# Train-test split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

def lr_schedule(epoch):
    lr = 1e-3
    if epoch > 10:
        lr = 1e-4
    return lr

optimizer = Adam(learning_rate=1e-3)

model = Sequential([
    Dense(512, activation='relu', input_shape=(X_train.shape[1],)),
    BatchNormalization(),
    Dropout(0.3),
    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),
    Dense(32, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),
    Dense(y.shape[1], activation='softmax')
])

model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=64,
    callbacks=[early_stopping, LearningRateScheduler(lr_schedule)]
)

loss, accuracy = model.evaluate(X_val, y_val)
print(f"Validation Loss: {loss}, Validation Accuracy: {accuracy}")
# Evaluate the model
loss, accuracy = model.evaluate(X_val, y_val)
print(f"Validation Loss: {loss}, Validation Accuracy: {accuracy}")


test_df[numerical_cols] = scaler.transform(test_df[numerical_cols])
X_test_final = test_df.drop(['ID'], axis=1)

test_preds = model.predict(X_test_final)
predicted_classes = test_preds.argmax(axis=1)
predicted_labels = label_encoder.inverse_transform(predicted_classes)

submission = pd.DataFrame({
    'ID': test_df['ID'], 
    'Credit_Score': predicted_labels
})

submission.to_csv('rnn.csv', index=False)
print("Submission file 'rnn.csv' created successfully.")

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.5744 - loss: 0.9464 - val_accuracy: 0.6796 - val_loss: 0.6869 - learning_rate: 0.0010
Epoch 2/50
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.6750 - loss: 0.7072 - val_accuracy: 0.6888 - val_loss: 0.6690 - learning_rate: 0.0010
Epoch 3/50
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.6839 - loss: 0.6901 - val_accuracy: 0.6917 - val_loss: 0.6620 - learning_rate: 0.0010
Epoch 4/50
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.6929 - loss: 0.6741 - val_accuracy: 0.6981 - val_loss: 0.6493 - learning_rate: 0.0010
Epoch 5/50
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.6974 - loss: 0.6643 - val_accuracy: 0.6963 - val_loss: 0.6511 - learning_rate: 0.0010
Epoch 6/50
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1