<a href="https://colab.research.google.com/github/ShuHuiK/WIE3007_Group_Assignment/blob/ShuHui/Neural_Network_Model_Development.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.utils import class_weight
from sklearn.metrics import classification_report, roc_auc_score, f1_score, precision_score, recall_score, roc_curve

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# 1. LOAD DATA
df = pd.read_csv('2025_Sterling_Financial_Dataset_clean.csv')

In [2]:
# 2. PREPROCESSING
# Dropping non-predictive columns
drop_cols = ['date', 'customer_id', 'customer_feedback', 'location']
X = df.drop(columns=['default_history'] + drop_cols)
y = df['default_history']

# Defining numeric and categorical features
numeric_features = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_features = X.select_dtypes(include=['object']).columns.tolist()

# Creating the transformer pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), categorical_features)
    ])

# Fit and transform
X_processed = preprocessor.fit_transform(X)

In [3]:
# 3. SPLIT DATA
# Stratify ensures the minority class is represented in both sets
X_train, X_test, y_train, y_test = train_test_split(
    X_processed, y.values, test_size=0.2, random_state=42, stratify=y
)

In [4]:
# 4. HANDLE CLASS IMBALANCE
# Calculating weights to make the model "pay more attention" to the minority class (Default)
weights = class_weight.compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weights = {i: weights[i] for i in range(len(weights))}