In [None]:
pip install keras-tuner optuna


Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting optuna
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading optuna-4.3.0-py3-none-any.whl (386 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m386.6/386.6 kB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.15.2-py3-none-any.whl (231 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.9/231.9 kB[0m [31m18.5 MB/s[0m eta [36m

In [None]:
import optuna
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
tf.keras.backend.clear_session()

# Load and preprocess data
df = pd.read_csv("lstm_ready_traffic_data.csv")
df['datetime'] = pd.to_datetime(df['datetime'])

if df['day_of_week'].dtype == 'object':
    df['day_of_week'] = df['day_of_week'].astype('category').cat.codes

features = [
    'hour', 'day_of_week', 'month', 'year', 'road_name_encoded',
    'location_encoded', 'suburb_encoded', 'average_speed',
    'maximum_speed', 'Speeding_Incident', 'Speeding_Incident_Avg_Speed'
]
target = 'Total_Traffic_Volume'
scaler = MinMaxScaler()
df['scaled_traffic_volume'] = scaler.fit_transform(df[[target]])

X = df[features].values.astype('float32')
y = df['scaled_traffic_volume'].values.astype('float32')

def create_sequences(X, y, time_steps=24):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:i + time_steps])
        ys.append(y[i + time_steps])
    return np.array(Xs), np.array(ys)

X_seq, y_seq = create_sequences(X, y)
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, shuffle=False)


In [None]:
def objective(trial):
    model = Sequential()
    # Layer 1
    model.add(LSTM(
        units=trial.suggest_int("units_1", 32, 128, step=32),
        activation=trial.suggest_categorical("activation", ["relu", "tanh"]),
        return_sequences=True,
        input_shape=(X_train.shape[1], X_train.shape[2])
    ))
    model.add(Dropout(trial.suggest_float("dropout_1", 0.1, 0.5)))

    # Layer 2
    model.add(LSTM(
        units=trial.suggest_int("units_2", 16, 64, step=16),
        activation=trial.suggest_categorical("activation2", ["relu", "tanh"]),
        return_sequences=True
    ))
    model.add(Dropout(trial.suggest_float("dropout_2", 0.1, 0.5)))

    # Layer 3
    model.add(LSTM(
        units=trial.suggest_int("units_3", 8, 32, step=8),
        activation=trial.suggest_categorical("activation3", ["relu", "tanh"]),
    ))
    model.add(Dropout(trial.suggest_float("dropout_3", 0.1, 0.5)))

    model.add(Dense(1))

    model.compile(
        optimizer=trial.suggest_categorical("optimizer", ["adam", "rmsprop"]),
        loss="mse"
    )

    model.fit(X_train, y_train, validation_data=(X_test, y_test),
              epochs=10, batch_size=64, verbose=0)

    return model.evaluate(X_test, y_test, verbose=0)


In [None]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=15)

best_params = study.best_params
best_value = study.best_value

print("✅ Best Trial:")
print("Params:", best_params)
print("Loss:", best_value)


[I 2025-05-11 14:22:54,422] A new study created in memory with name: no-name-1f6ae168-72a5-482a-8dcc-6874dfcceef2
  super().__init__(**kwargs)
[I 2025-05-11 14:35:36,161] Trial 0 finished with value: 0.00792195089161396 and parameters: {'units_1': 96, 'activation': 'relu', 'dropout_1': 0.25920262999496535, 'units_2': 64, 'activation2': 'tanh', 'dropout_2': 0.46862628544674323, 'units_3': 24, 'activation3': 'relu', 'dropout_3': 0.4326406038066962, 'optimizer': 'rmsprop'}. Best is trial 0 with value: 0.00792195089161396.
[I 2025-05-11 14:44:40,927] Trial 1 finished with value: 0.007825823500752449 and parameters: {'units_1': 64, 'activation': 'tanh', 'dropout_1': 0.11884863435417148, 'units_2': 64, 'activation2': 'tanh', 'dropout_2': 0.4727470149069394, 'units_3': 16, 'activation3': 'tanh', 'dropout_3': 0.14507274603981735, 'optimizer': 'adam'}. Best is trial 1 with value: 0.007825823500752449.
[I 2025-05-11 14:51:30,197] Trial 2 finished with value: 0.007881208322942257 and parameters: 

✅ Best Trial:
Params: {'units_1': 32, 'activation': 'tanh', 'dropout_1': 0.23949893470775382, 'units_2': 16, 'activation2': 'tanh', 'dropout_2': 0.4056669575910915, 'units_3': 8, 'activation3': 'tanh', 'dropout_3': 0.21174372066086172, 'optimizer': 'rmsprop'}
Loss: 0.006790131330490112


In [None]:
# Rebuild using best params
model = Sequential()
model.add(LSTM(best_params['units_1'], activation=best_params['activation'], return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(best_params['dropout_1']))
model.add(LSTM(best_params['units_2'], activation=best_params['activation2'], return_sequences=True))
model.add(Dropout(best_params['dropout_2']))
model.add(LSTM(best_params['units_3'], activation=best_params['activation3']))
model.add(Dropout(best_params['dropout_3']))
model.add(Dense(1))
model.compile(optimizer=best_params['optimizer'], loss='mse')
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=20, batch_size=64)

# Predict & evaluate
y_pred_optuna_tri = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred_optuna_tri))
mae = mean_absolute_error(y_test, y_pred_optuna_tri)
r2 = r2_score(y_test, y_pred_optuna_tri)

print("\n📊 Tri-LSTM Optuna Evaluation:")
print(f"RMSE: {rmse:.4f} | MAE: {mae:.4f} | R²: {r2:.4f}")


Epoch 1/20
[1m789/789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 35ms/step - loss: 0.0087 - val_loss: 0.0080
Epoch 2/20
[1m789/789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 33ms/step - loss: 0.0069 - val_loss: 0.0078
Epoch 3/20
[1m789/789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 33ms/step - loss: 0.0071 - val_loss: 0.0078
Epoch 4/20
[1m789/789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 34ms/step - loss: 0.0071 - val_loss: 0.0078
Epoch 5/20
[1m789/789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 32ms/step - loss: 0.0069 - val_loss: 0.0079
Epoch 6/20
[1m789/789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 33ms/step - loss: 0.0072 - val_loss: 0.0079
Epoch 7/20
[1m789/789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 34ms/step - loss: 0.0069 - val_loss: 0.0078
Epoch 8/20
[1m789/789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 34ms/step - loss: 0.0072 - val_loss: 0.0078
Epoch 9/20
[1m789/789[

In [None]:
import numpy as np

np.save("y_pred_optuna_tri.npy", y_pred_optuna_tri)


In [None]:
from google.colab import files

files.download("y_pred_optuna_tri.npy")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>