In [None]:
import pandas as pd
import numpy as np

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
# Navigate to the directory where your file is located
import os
os.chdir('/content/drive/MyDrive/Data MSc thesis')

# Read the CSV file
train_data = pd.read_csv("train_encoded.csv")
val_data = pd.read_csv("val_encoded.csv")
test_data = pd.read_csv("test_encoded.csv")

In [None]:
!pip install keras-tuner


Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Bidirectional, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import mean_squared_error, mean_absolute_error
from kerastuner.tuners import RandomSearch

# Set random seed for TensorFlow
tf.random.set_seed(42)

# Set random seed for numpy operations
np.random.seed(42)

# Define the features and target variable
X_train = train_data.drop(columns=['avg_speed']).values
y_train = train_data['avg_speed'].values

X_val = val_data.drop(columns=['avg_speed']).values
y_val = val_data['avg_speed'].values

X_test = test_data.drop(columns=['avg_speed']).values
y_test = test_data['avg_speed'].values

# Convert X_train to float32
X_train = X_train.astype('float32')
y_train = y_train.astype('float32')

X_val = X_val.astype('float32')
y_val = y_val.astype('float32')

X_test = X_test.astype('float32')
y_test = y_test.astype('float32')

# Reshape data for LSTM input: (samples, time steps, features)
X_train = X_train.reshape((X_train.shape[0] // 5, 5, X_train.shape[1]))
X_val = X_val.reshape((X_val.shape[0] // 5, 5, X_val.shape[1]))
X_test = X_test.reshape((X_test.shape[0] // 5, 5, X_test.shape[1]))

# Reshape target variables
y_train = y_train.reshape((y_train.shape[0] // 5, 5))
y_val = y_val.reshape((y_val.shape[0] // 5, 5))
y_test = y_test.reshape((y_test.shape[0] // 5, 5))

def build_model(hp):
    model = Sequential()
    model.add(Bidirectional(LSTM(units=hp.Int('units', min_value=160, max_value=256, step=32),
                                  input_shape=(X_train.shape[1], X_train.shape[2]))))
    model.add(Dense(224, activation='relu'))
    model.add(Dropout(hp.Float('dropout_rate', min_value=0.2, max_value=0.4, step=0.1)))
    model.add(Dense(5))

    # Define the learning rate parameter
    hp_learning_rate = hp.Choice('learning_rate', values=[3e-4])

    # Compile the model with the specified learning rate
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=hp_learning_rate), loss='mean_squared_error')

    return model

tuner = RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=3,
    executions_per_trial=1,
    directory='hyperparameter_tuning',
    project_name='bidirectional_lstm_hyperparameter_tuning_1min_2016_2019v6_1'
)

tuner.search(X_train, y_train, epochs=60, batch_size=75, validation_data=(X_val, y_val), callbacks=[EarlyStopping(patience=3)])

best_model = tuner.get_best_models(num_models=1)[0]

# Print the hyperparameters used in the best model
print("Best hyperparameters:")
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]
print(best_hyperparameters.values)

# Predict on the validation set for hyperparameter tuning
y_val_pred = best_model.predict(X_val).reshape((-1,))

# Calculate RMSE and MAE on the validation set
rmse_val = np.sqrt(mean_squared_error(y_val.reshape((-1,)), y_val_pred))
mae_val = mean_absolute_error(y_val.reshape((-1,)), y_val_pred)

print("Validation set metrics:")
print("RMSE:", rmse_val)
print("MAE:", mae_val)

# Predict on the test set
y_test_pred = best_model.predict(X_test).reshape((-1,))

# Calculate RMSE and MAE on the test set
rmse_test = np.sqrt(mean_squared_error(y_test.reshape((-1,)), y_test_pred))
mae_test = mean_absolute_error(y_test.reshape((-1,)), y_test_pred)

print("\nTest set metrics:")
print("RMSE:", rmse_test)
print("MAE:", mae_test)

# Append predictions to validation and test data
#val_data['predictions_lstm_15min_2016_2019'] = y_val_pred
test_data['predictions_lstm_1min_2016_2019'] = y_test_pred

# Save predictions to new dataframes
#predictions_val_lstm_15min = val_data.copy()
predictions_test_lstm_15min = test_data.copy()

# Optionally, you can save these dataframes to CSV files
#predictions_val_lstm_15min.to_csv('predictions_val_lstm_1min_2016_2019.csv', index=False)
predictions_test_lstm_15min.to_csv('predictions_test_lstm_1min_2016_2019_new.csv', index=False)

Trial 3 Complete [01h 49m 47s]
val_loss: 185.07351684570312

Best val_loss So Far: 185.07351684570312
Total elapsed time: 04h 58m 15s
Best hyperparameters:
{'units': 192, 'dropout_rate': 0.30000000000000004, 'learning_rate': 0.0003}
Validation set metrics:
RMSE: 13.604189
MAE: 7.959712

Test set metrics:
RMSE: 32.77164
MAE: 14.447698


In [None]:
import os
os.kill(os.getpid(), 9)
