<a href="https://colab.research.google.com/github/LahiWeer/2330902-ML_CW/blob/master/ML%20models/NN_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.metrics import classification_report, accuracy_score

In [None]:
# Step 1: Load the dataset
data_url = 'https://raw.githubusercontent.com/LahiWeer/2330902-ML_CW/refs/heads/master/Preprocessing/resampled_and_original_data.csv'
data = pd.read_csv(data_url)

In [33]:
# Step 2: Separate features (X) and target (y)
X = data.drop('y', axis=1)
y = data['y']

# Step 3: List of columns to scale
columns_to_scale = ['age', 'campaign', 'pdays', 'previous', 'emp.var.rate',
                    'cons.price.idx', 'cons.conf.idx', 'euribor3m', 'nr.employed',
                    'default', 'housing', 'loan', 'contact', 'month',
                    'day_of_week', 'poutcome', 'education']

# Step 4: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Step 5: Apply scaling only to specific columns
scaler = StandardScaler()
X_train_scaled = X_train.copy()
X_test_scaled = X_test.copy()

# Scale only the specified columns
X_train_scaled[columns_to_scale] = scaler.fit_transform(X_train[columns_to_scale])
X_test_scaled[columns_to_scale] = scaler.transform(X_test[columns_to_scale])

# Step 6: Initialize model
model = Sequential()
model.add(Dense(128, input_dim=X_train_scaled.shape[1], activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

# Compile model
optimizer = Adam(learning_rate=0.0025)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# Callbacks for early stopping and reduce learning rate
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5)

# Step 7: Hyperparameter tuning with Stratified K-Fold Cross Validation
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for train_idx, val_idx in kfold.split(X_train_scaled, y_train):
    X_train_fold, X_val_fold = X_train_scaled.iloc[train_idx], X_train_scaled.iloc[val_idx]
    y_train_fold, y_val_fold = y_train.iloc[train_idx], y_train.iloc[val_idx]

    model.fit(
        X_train_fold, y_train_fold,
        validation_data=(X_val_fold, y_val_fold),
        epochs=700,
        batch_size=128,
        callbacks=[early_stopping, reduce_lr],
        verbose=1
    )

Epoch 1/700


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m309/309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - accuracy: 0.7696 - loss: 0.4851 - val_accuracy: 0.8379 - val_loss: 0.3685 - learning_rate: 0.0025
Epoch 2/700
[1m309/309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.8403 - loss: 0.3716 - val_accuracy: 0.8447 - val_loss: 0.3520 - learning_rate: 0.0025
Epoch 3/700
[1m309/309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8434 - loss: 0.3613 - val_accuracy: 0.8454 - val_loss: 0.3463 - learning_rate: 0.0025
Epoch 4/700
[1m309/309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8498 - loss: 0.3494 - val_accuracy: 0.8511 - val_loss: 0.3490 - learning_rate: 0.0025
Epoch 5/700
[1m309/309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.8493 - loss: 0.3520 - val_accuracy: 0.8527 - val_loss: 0.3425 - learning_rate: 0.0025
Epoch 6/700
[1m309/309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m

In [34]:
# Step 8: Predict on Test Data
y_pred_prob = model.predict(X_test_scaled)  # Predicted probabilities
y_pred = (y_pred_prob > 0.5).astype(int)    # Convert probabilities to binary predictions

# Step 9: Evaluate Model
# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Classification Report
print("\nClassification Report:\n", classification_report(y_test, y_pred))

[1m387/387[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Accuracy: 0.8870811073336571

Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.90      0.89      6177
           1       0.90      0.88      0.89      6177

    accuracy                           0.89     12354
   macro avg       0.89      0.89      0.89     12354
weighted avg       0.89      0.89      0.89     12354

