<a href="https://colab.research.google.com/github/FishyDanny/Road-Safety-Risk-Prediction/blob/main/04_train_deep_learning_models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import libraries
!pip install keras-tuner -q
import numpy as np
import pandas as pd
import joblib
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, roc_auc_score, RocCurveDisplay
import keras_tuner as kt
from sklearn.metrics import f1_score, roc_auc_score, RocCurveDisplay
from google.colab import drive

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/129.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
drive.mount('/content/drive')

# Load preprocessed data
X_train = joblib.load('/content/drive/MyDrive/ACTL3143_project/processed/X_train.pkl')
X_val = joblib.load('/content/drive/MyDrive/ACTL3143_project/processed/X_val.pkl')
y_train = joblib.load('/content/drive/MyDrive/ACTL3143_project/processed/y_train.pkl')
y_val = joblib.load('/content/drive/MyDrive/ACTL3143_project/processed/y_val.pkl')
X_test = joblib.load('/content/drive/MyDrive/ACTL3143_project/processed/X_test.pkl')
y_test = joblib.load('/content/drive/MyDrive/ACTL3143_project/processed/y_test.pkl')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
keras.utils.set_random_seed(42)

In [None]:
# For the first model (feedforward)
first_tuner = kt.BayesianOptimization(
    keras.models.load_model('/content/drive/MyDrive/ACTL3143_project/best_first_model.keras'),
    objective='val_auc',
    max_trials=0,
    directory='/content/drive/MyDrive/ACTL3143_project/tuning',
    project_name='first_model'
)
first_tuner.reload()

# For the second model (residual)
second_tuner = kt.BayesianOptimization(
    keras.models.load_model('/content/drive/MyDrive/ACTL3143_project/best_second_model.keras'),
    objective='val_auc',
    max_trials=0,
    directory='/content/drive/MyDrive/ACTL3143_project/tuning',
    project_name='second_model'
)
second_tuner.reload()

  saveable.load_own_variables(weights_store.get(inner_path))


Reloading Tuner from /content/drive/MyDrive/ACTL3143_project/tuning/first_model/tuner0.json


  saveable.load_own_variables(weights_store.get(inner_path))


Reloading Tuner from /content/drive/MyDrive/ACTL3143_project/tuning/second_model/tuner0.json


In [None]:
# Get best hyperparameters
best_first_hps = first_tuner.get_best_hyperparameters()[0]
best_second_hps = second_tuner.get_best_hyperparameters()[0]

print("First model best hyperparameters:")
print(best_first_hps.values)

print("\nSecond model best hyperparameters:")
print(best_second_hps.values)

First model best hyperparameters:
{'num_layers': 3, 'units_0': 192, 'dropout_0': 0.30000000000000004, 'lr': 0.0006562893605899151, 'units_1': 192, 'dropout_1': 0.1, 'units_2': 128, 'dropout_2': 0.5, 'units_3': 192, 'dropout_3': 0.30000000000000004}

Second model best hyperparameters:
{'initial_units': 160, 'num_blocks': 4, 'block_0_units': 256, 'dropout_0': 0.5, 'lr': 0.00011811840774830564, 'block_1_units': 128, 'dropout_1': 0.30000000000000004, 'block_2_units': 64, 'dropout_2': 0.1, 'block_3_units': 64, 'dropout_3': 0.1}


In [None]:
# Rebuild models with best hyperparameters
def build_final_first_model(hp):
    model = keras.Sequential()
    model.add(layers.BatchNormalization(input_shape=(X_train.shape[1],)))

    # Build with best hyperparameters
    for i in range(hp.get('num_layers')):
        model.add(layers.Dense(
            units=hp.get(f'units_{i}'),
            activation='relu'
        ))
        model.add(layers.BatchNormalization())
        model.add(layers.Dropout(
            rate=hp.get(f'dropout_{i}')
        ))

    model.add(layers.Dense(1, activation='sigmoid'))

    model.compile(
        optimizer=keras.optimizers.Adam(
            learning_rate=hp.get('lr'),
            clipnorm=1.0
        ),
        loss='binary_crossentropy',
        metrics=['accuracy', keras.metrics.AUC(name='auc')]
    )
    return model

def build_final_second_model(hp):
    inputs = keras.Input(shape=(X_train.shape[1],))
    x = layers.Dense(
        hp.get('initial_units'),
        activation='relu'
    )(inputs)
    x = layers.BatchNormalization()(x)

    # Build with best hyperparameters
    for i in range(hp.get('num_blocks')):
        residual = x
        units = hp.get(f'block_{i}_units')

        x = layers.Dense(units, activation='relu')(x)
        x = layers.BatchNormalization()(x)
        x = layers.Dropout(
            hp.get(f'dropout_{i}')
        )(x)
        x = layers.Dense(residual.shape[-1])(x)
        x = layers.add([x, residual])
        x = layers.Activation('relu')(x)

    outputs = layers.Dense(1, activation='sigmoid')(x)

    model = keras.Model(inputs=inputs, outputs=outputs)

    model.compile(
        optimizer=keras.optimizers.Adam(
            learning_rate=hp.get('lr'),
            clipnorm=1.0
        ),
        loss='binary_crossentropy',
        metrics=['accuracy', keras.metrics.AUC(name='auc')]
    )
    return model

# Build final models
final_first_model = build_final_first_model(best_first_hps)
final_second_model = build_final_second_model(best_second_hps)

  super().__init__(**kwargs)


In [None]:
# Callbacks
early_stopping = keras.callbacks.EarlyStopping(
    patience=5,
    restore_best_weights=True,
    monitor='val_auc',
    mode='max'
)

# Train models
print("Training first model:")
first_history = final_first_model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=100,
    batch_size=64,
    callbacks=[early_stopping],
    verbose=1
)

print("Training second model:")
second_history = final_second_model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=100,
    batch_size=64,
    callbacks=[early_stopping],
    verbose=1
)

Training first model:
Epoch 1/100
[1m538/538[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 15ms/step - accuracy: 0.6273 - auc: 0.6737 - loss: 0.7521 - val_accuracy: 0.7102 - val_auc: 0.7805 - val_loss: 0.5599
Epoch 2/100
[1m538/538[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.6884 - auc: 0.7493 - loss: 0.5957 - val_accuracy: 0.7161 - val_auc: 0.7890 - val_loss: 0.5513
Epoch 3/100
[1m538/538[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.7054 - auc: 0.7739 - loss: 0.5659 - val_accuracy: 0.7184 - val_auc: 0.7925 - val_loss: 0.5462
Epoch 4/100
[1m538/538[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.7171 - auc: 0.7846 - loss: 0.5551 - val_accuracy: 0.7204 - val_auc: 0.7940 - val_loss: 0.5440
Epoch 5/100
[1m538/538[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.7174 - auc: 0.7878 - loss: 0.5497 - val_accuracy: 0.7208 - val_auc: 0.7938 - val_loss: 0.5448
E

In [None]:
# Save final models
final_first_model.save('/content/drive/MyDrive/ACTL3143_project/models/final_first_dl_model.keras')
final_second_model.save('/content/drive/MyDrive/ACTL3143_project/models/final_second_dl_model.keras')