# Deep Learning - Assigment 1

### Group Number: 22

### Teammates:
1. **Mahshid Jafar Tajrishi**
2. **Bar Melinarskiy**
3. **Cis van Aken**
4. **Simon van Klompenburg**

## Imports entire notebook

In [20]:
!pip install optuna



In [21]:
# Importing libraries
import pandas as pd
import numpy as np
from scipy.io import loadmat
from IPython.display import display
import torch
import torch.nn as nn
import torch.optim as optim
import optuna
from torch.utils.data import Dataset, DataLoader, TensorDataset
from torchsummary import summary
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
torch.autograd.set_detect_anomaly(True)
from copy import deepcopy as dc
import random
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import seaborn as sns
import json
from scipy.interpolate import interp1d
# from google.colab import files

## Global Definitions

In [22]:
# Path to the zip file
data_dir = "./Data"
data_path = f"{data_dir}/Xtrain.mat"

# Set the device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Set pandas display options to show all columns
pd.set_option('display.max_columns', None)

# Set a professional style for the plots
sns.set_theme(style="whitegrid")

model_name = "RNN"
model_pth_path = f"best_model_{model_name}.pth"

In [23]:
# Check if CUDA is available
print(torch.__version__)
print(torch.version.cuda)  # Should match CUDA 12.5
print(torch.backends.cudnn.version())  # Should return a version, not None
print(torch.cuda.is_available())  # Should return True

2.5.1
None
90100
True


### Utilities

In [24]:
def create_sequences(data, window_size, forecast_steps=1):
    X, y = [], []
    for i in range(len(data) - window_size - forecast_steps + 1):
        X.append(data[i:i + window_size, 0])
        y.append(data[i + window_size:i + window_size + forecast_steps, 0])
    X = np.array(X).reshape(-1, window_size, 1)
    y = np.array(y).reshape(-1, forecast_steps)
    return X, y

def prepare_dataloader(series, window_size, batch_size=16):
    """
    Preprocess the datasets for the transformer model.

    Args:
        series: Array of numbers with the sequence of the laser measurements.
        window_size: Integer, the sliding window size.
        batch_size: The number of samples used in one forward and backward pass through the network.

    Returns:
        DataLoader, X, y
    """
    X, y = create_sequences(series, window_size)
    # Ensure proper shapes: X → (N, seq_len, input_dim), y → (N, input_dim)
    X_tensor = torch.tensor(X, dtype=torch.float32)  # Shape: (N, seq_len, input_dim)
    y_tensor = torch.tensor(y, dtype=torch.float32)  # Shape: (N, input_dim)
    dataset = TensorDataset(X_tensor, y_tensor)
    return DataLoader(dataset, batch_size=batch_size, shuffle=False), X_tensor, y_tensor

def train_model(model, train_loader, val_loader, attempt_index, epochs=20, lr=1e-3, teacher_forcing_prob=0.5, patience=25):
    """
    Train the given model with validation and return the final validation L1 loss.
    Includes early stopping based on validation loss.
    Logs all metrics (L1, MSE, MAE) for each epoch into a DataFrame.
    """
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)
    # Define loss functions
    huberLoss_criterion = nn.HuberLoss()  # Huber Loss
    mse_criterion = nn.MSELoss()  # MSE Loss
    mae_criterion = nn.L1Loss()  # MAE Loss (same as L1 Loss)

    best_val_loss = float('inf')
    best_model_state = model.state_dict()  # Initialize with the current model state
    no_improvement_epochs = 0  # Counter for early stopping

    # DataFrame to store metrics
    metrics_df = pd.DataFrame(columns=["trial_id", "epoch", "train_l1", "train_mse", "train_mae", "val_l1", "val_mse", "val_mae"])

    # Progress bar for epochs
    epochs_pbar = tqdm(range(epochs), desc=f"Trial: {attempt_index}", unit="epoch")

    for epoch in epochs_pbar:
        # Training phase
        model.train()
        running_train_huberLoss_loss = 0.0
        running_train_mse_loss = 0.0
        running_train_mae_loss = 0.0

        for batch_index, batch in enumerate(train_loader):
            x_batch, y_batch = batch[0].to(device), batch[1].to(device)

            # Forward pass
            output = model(x_batch)

            # Apply teacher forcing
            # if random.random() < teacher_forcing_prob:
            #     x_batch = x_batch.clone()  # Avoid in-place modification
            #     x_batch[:, -1, 0] = y_batch[:, 0]

            # Calculate loss
            huberLoss_loss = huberLoss_criterion(output, y_batch)
            mse_loss = mse_criterion(output, y_batch)
            mae_loss = mae_criterion(output, y_batch)

            running_train_huberLoss_loss += huberLoss_loss.item()
            running_train_mse_loss += mse_loss.item()
            running_train_mae_loss += mae_loss.item()

            # Backward pass
            optimizer.zero_grad()
            mae_loss.backward()
            optimizer.step()

        avg_train_huberLoss_loss = running_train_huberLoss_loss / len(train_loader)
        avg_train_mse_loss = running_train_mse_loss / len(train_loader)
        avg_train_mae_loss = running_train_mae_loss / len(train_loader)

        # Validation phase
        model.eval()
        running_val_huberLoss_loss = 0.0
        running_val_mse_loss = 0.0
        running_val_mae_loss = 0.0

        with torch.no_grad():
            for batch in val_loader:
                x_batch, y_batch = batch[0].to(device), batch[1].to(device)
                output = model(x_batch)

                huberLoss_loss = huberLoss_criterion(output, y_batch)
                mse_loss = mse_criterion(output, y_batch)
                mae_loss = mae_criterion(output, y_batch)

                running_val_huberLoss_loss += huberLoss_loss.item()
                running_val_mse_loss += mse_loss.item()
                running_val_mae_loss += mae_loss.item()

        avg_val_huberLoss_loss = running_val_huberLoss_loss / len(val_loader)
        avg_val_mse_loss = running_val_mse_loss / len(val_loader)
        avg_val_mae_loss = running_val_mae_loss / len(val_loader)

        # Save metrics to DataFrame
        new_row = pd.DataFrame([{
            "trial_id": attempt_index,
            "epoch": epoch + 1,
            "train_HuberLoss": avg_train_huberLoss_loss,
            "train_MSE": avg_train_mse_loss,
            "train_MAE": avg_train_mae_loss,
            "val_HuberLoss": avg_val_huberLoss_loss,
            "val_MSE": avg_val_mse_loss,
            "val_MAE": avg_val_mae_loss
        }])
        metrics_df = pd.concat([metrics_df, new_row], ignore_index=True)

        # Save the best model
        if avg_val_mse_loss < best_val_loss:
            best_val_loss = avg_val_mse_loss
            best_model_state = model.state_dict()
            no_improvement_epochs = 0  # Reset early stopping counter
        else:
            no_improvement_epochs += 1  # Increment early stopping counter

        # Early stopping
        if no_improvement_epochs >= patience:
            print(f"Early stopping triggered after {epoch + 1} epochs.")
            break

        # Adjust learning rate
        scheduler.step()

        # Update progress bar
        epochs_pbar.set_postfix(
            Train_huberLoss_Loss=avg_train_huberLoss_loss,
            Train_MSE_Loss=avg_train_mse_loss,
            Train_MAE_Loss=avg_train_mae_loss,
            Val_huberLoss_Loss=avg_val_huberLoss_loss,
            Val_MSE_Loss=avg_val_mse_loss,
            Val_MAE_Loss=avg_val_mae_loss,
        )

    # Load the best model
    model.load_state_dict(best_model_state)

    return best_val_loss, metrics_df  # Return the best validation L1 loss and metrics DataFrame

### Long short-term memory (LSTM) RNN Model

In [25]:
class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, i):
        return self.X[i], self.y[i]

'''class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_stacked_layers, dropout=0.0) :
        super().__init__()
        self.hidden_size = hidden_size
        self.num_stacked_layers = num_stacked_layers

        self.lstm = nn.LSTM(input_size, hidden_size, num_stacked_layers,
                            batch_first=True, dropout=dropout if num_stacked_layers > 1 else 0.0)

        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        batch_size = x.size(0)
        h0 = torch.zeros(self.num_stacked_layers, batch_size, self.hidden_size).to(device)
        c0 = torch.zeros(self.num_stacked_layers, batch_size, self.hidden_size).to(device)

        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out'''

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_stacked_layers, dropout=0.0) :
        super().__init__()
        self.hidden_size = hidden_size
        self.num_stacked_layers = num_stacked_layers
        self.dropout = dropout

        self.rnn = nn.RNN(input_size, hidden_size, num_stacked_layers,
                            batch_first=True, dropout=dropout if num_stacked_layers > 1 else 0.0, nonlinearity="tanh")

        self.fc = nn.Linear(hidden_size, 1)
        # Initialize weights based on activation
        self.apply(self.initialize_weights_xavier)

    def forward(self, x):
        batch_size = x.size(0)
        h0 = torch.zeros(self.num_stacked_layers, batch_size, self.hidden_size).to(device)
        #c0 = torch.zeros(self.num_stacked_layers, batch_size, self.hidden_size).to(device)

        out, _ = self.rnn(x, h0)
        out = self.fc(out[:, -1, :])
        return out

    def initialize_weights_xavier(self, m):
        if isinstance(m, nn.RNN):
            for name, param in m.named_parameters():
                if 'weight' in name:
                    nn.init.xavier_uniform_(param)
                elif 'bias' in name:
                    nn.init.zeros_(param)

### Load and Split the dataset into train-validation

In [26]:
# Define augmentation functions
def add_noise(x, std=0.05):
    return x + np.random.normal(0, std, size=x.shape)

def add_drift(x, strength=0.05):
    drift = np.linspace(0, strength, num=len(x)).reshape(-1, 1)
    return x + drift

def time_warp(x, stretch_factor=1.15):
    original_steps = np.arange(len(x))
    new_steps = np.linspace(0, len(x) - 1, int(len(x) * stretch_factor))
    warped = interp1d(original_steps, x.ravel(), kind='linear', fill_value="extrapolate")(new_steps)
    warped = warped[:len(x)]  # clip/pad to original size
    return warped.reshape(-1, 1)

# Define jittering function
def add_jitter(x, std=0.01):
    """
    Add small random perturbations (jitter) to the data.

    Args:
        x (np.ndarray): Input data.
        std (float): Standard deviation of the jitter.

    Returns:
        np.ndarray: Jittered data.
    """
    return x + np.random.normal(0, std, size=x.shape)


data = loadmat(data_path)
measurements = data['Xtrain']
# Check the shape of the data
print("Shape of measurements:", measurements.shape)

# Display the first few rows of the data
print(f"First 5 measurements: {measurements[:5]}")

print(f"measurements range: min={measurements.min()}, max={measurements.max()}")

# Creating sequences
max_window_size = 50  # Maximum window size
# Split data into training and validation sets
size_of_test = 200 + max_window_size  # 200 steps for testing + window size
train_data, val_data = measurements[:-size_of_test], measurements[-size_of_test:]

# Normalize the training data
scaler = MinMaxScaler(feature_range=(-1, 1))
train_data_normalized = scaler.fit_transform(train_data)

# Normalize the validation data using the same scaler
val_data_normalized = scaler.transform(val_data)

# Augment training data
print(f"Train shape before augmention: {train_data.shape}")

augmented = [train_data]  # 1x original

'''for _ in range(2):  # 2x noise
    augmented.append(add_noise(train_data))

# for _ in range(2):  # 2x drift
#     augmented.append(add_drift(train_data))

# for _ in range(2):  # 2x time warp
#     augmented.append(time_warp(train_data))

for _ in range(2):  # 2x jittering
    augmented.append(add_jitter(train_data))

train_augmented = np.vstack(augmented)
print("Augmented shape:", train_augmented.shape)'''

# Creating sequences
train_dataset, val_dataset = train_data, val_data_normalized

print(f"Train shape: {train_dataset.shape}, Val shape: {val_dataset.shape}")
print(f"train_dataset range: min={train_dataset.min()}, max={train_dataset.max()}")
print(f"val_dataset range: min={val_dataset.min()}, max={val_dataset.max()}")

Shape of measurements: (1000, 1)
First 5 measurements: [[ 86]
 [141]
 [ 95]
 [ 41]
 [ 22]]
measurements range: min=2, max=255
Train shape before augmention: (750, 1)
Train shape: (750, 1), Val shape: (250, 1)
train_dataset range: min=2, max=255
val_dataset range: min=-0.9209486166007904, max=0.3992094861660078


### Training the Model

In [27]:
best_model = None

# Initialize a global DataFrame to store metrics for all trials
all_metrics_df = pd.DataFrame(columns=["trial_id", "epoch", "train_HuberLoss", "train_MSE", "train_MAE", "val_HuberLoss", "val_MSE", "val_MAE",
                                       "hidden_size", "num_layers", "learning_rate", "batch_size", "window_size"])

def objective(trial):
    global all_metrics_df  # Use the global DataFrame to store metrics

    # Suggest hyperparameters
    hidden_size = trial.suggest_int('hidden_size', 32, 256, step=32)
    num_layers = trial.suggest_int('num_layers', 1, 2)
    learning_rate = trial.suggest_float('lr', 1e-5, 1e-2, log=True)
    batch_size = trial.suggest_categorical('batch_size', [32])
    window_size = trial.suggest_int('window_size', 5, 105, step=5)
    dropout = trial.suggest_float('dropout', 0.0, 0.3, step=0.1)

    # Update data loaders with the suggested batch size
    train_loader, train_X, train_y = prepare_dataloader(train_dataset, window_size, batch_size=batch_size)
    val_loader, val_X, val_y = prepare_dataloader(val_dataset, window_size, batch_size=batch_size)

    # Define the model
    model = RNN(
        input_size=1,  # Number of features in the input sequence
        hidden_size=hidden_size, # Number of features in the hidden state
        num_stacked_layers=num_layers, # Number of LSTM layers
        dropout=dropout  # Dropout rate
    ).to(device)

    # Train the model and get the final validation loss and metrics DataFrame
    trial_index = trial.number + 1  # Start from 1 for better readability in logs
    val_loss, metrics_df = train_model(
        model, train_loader, val_loader, attempt_index=trial_index, epochs=100, lr=learning_rate,
        teacher_forcing_prob=1, patience=50
    )

    # Add trial parameters to the metrics DataFrame
    metrics_df["hidden_size"] = hidden_size
    metrics_df["num_layers"] = num_layers
    metrics_df["learning_rate"] = learning_rate
    metrics_df["batch_size"] = batch_size
    metrics_df["window_size"] = window_size
    metrics_df["dropout"] = dropout  # Log the dropout value


    # Save the best model
    if trial.number == 0 or val_loss < study.best_value:
        torch.save(model.state_dict(), model_pth_path)
        print(f"Best model saved with trial {trial.number}")
        best_model = model

        # Save the best hyperparameters
        with open(f"best_hyperparameters_{model_name}.json", "w") as f:
            json.dump({
                "hidden_size": hidden_size,
                "num_layers": num_layers,
                "learning_rate": learning_rate,
                "batch_size": batch_size,
                "window_size": window_size,
                "dropout": dropout  
            }, f)

    # Append the metrics for this trial to the global DataFrame
    all_metrics_df = pd.concat([all_metrics_df, metrics_df], ignore_index=True)

    return val_loss  # Optuna will minimize this


# Create a study object
study = optuna.create_study(direction="minimize")  # Minimize validation loss

# Optimize the objective function
study.optimize(objective, n_trials=50)  # Run 100 trials

# Save the combined metrics DataFrame to a CSV file
csv_path = f"all_trials_metrics_{model_name}.csv"
all_metrics_df.to_csv(csv_path, index=False)
print(f"All metrics saved to {csv_path}")

# Print the best hyperparameters
print("Best hyperparameters:", study.best_params)

# Print the best validation loss
print("Best validation loss:", study.best_value)

# Load the best model
best_model = RNN(
    input_size=1,
    hidden_size=study.best_params['hidden_size'],
    num_stacked_layers=study.best_params['num_layers']
).to(device)

best_model.load_state_dict(torch.load(model_pth_path))
print("Best model loaded.")

# Update train_loader and val_loader with the best hyperparameters
train_loader, train_X, train_y = prepare_dataloader(train_dataset, study.best_params['window_size'], batch_size=study.best_params['batch_size'])
val_loader, val_X, val_y = prepare_dataloader(val_dataset, study.best_params['window_size'], batch_size=study.best_params['batch_size'])
print("Data loaders updated with best hyperparameters.")

[I 2025-05-07 21:23:42,487] A new study created in memory with name: no-name-3c0fa079-cdc2-49c2-b242-25ec457072e7


Trial: 1:   0%|          | 0/100 [00:00<?, ?epoch/s]

  all_metrics_df = pd.concat([all_metrics_df, metrics_df], ignore_index=True)
[I 2025-05-07 21:24:02,897] Trial 0 finished with value: 0.16178768873214722 and parameters: {'hidden_size': 224, 'num_layers': 1, 'lr': 1.503141663448836e-05, 'batch_size': 32, 'window_size': 35, 'dropout': 0.1}. Best is trial 0 with value: 0.16178768873214722.


Early stopping triggered after 64 epochs.
Best model saved with trial 0


Trial: 2:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:24:25,573] Trial 1 finished with value: 1.2913029449326652 and parameters: {'hidden_size': 224, 'num_layers': 2, 'lr': 0.000673328699922382, 'batch_size': 32, 'window_size': 35, 'dropout': 0.2}. Best is trial 0 with value: 0.16178768873214722.


Early stopping triggered after 70 epochs.


Trial: 3:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:24:40,285] Trial 2 finished with value: 3.1449190889086043 and parameters: {'hidden_size': 192, 'num_layers': 1, 'lr': 0.0006086599419411262, 'batch_size': 32, 'window_size': 55, 'dropout': 0.2}. Best is trial 0 with value: 0.16178768873214722.


Early stopping triggered after 51 epochs.


Trial: 4:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:25:08,031] Trial 3 finished with value: 1.6654823303222657 and parameters: {'hidden_size': 192, 'num_layers': 2, 'lr': 0.00048422313080904794, 'batch_size': 32, 'window_size': 95, 'dropout': 0.0}. Best is trial 0 with value: 0.16178768873214722.


Early stopping triggered after 93 epochs.


Trial: 5:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:25:27,584] Trial 4 finished with value: 0.26830715366772245 and parameters: {'hidden_size': 160, 'num_layers': 1, 'lr': 4.8524511053199635e-05, 'batch_size': 32, 'window_size': 50, 'dropout': 0.1}. Best is trial 0 with value: 0.16178768873214722.


Early stopping triggered after 68 epochs.


Trial: 6:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:25:42,848] Trial 5 finished with value: 0.24406066962650844 and parameters: {'hidden_size': 256, 'num_layers': 1, 'lr': 0.00014516700049410695, 'batch_size': 32, 'window_size': 30, 'dropout': 0.2}. Best is trial 0 with value: 0.16178768873214722.


Early stopping triggered after 51 epochs.


Trial: 7:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:26:05,462] Trial 6 finished with value: 21.209729433059692 and parameters: {'hidden_size': 128, 'num_layers': 2, 'lr': 0.0065834796475563355, 'batch_size': 32, 'window_size': 10, 'dropout': 0.3}. Best is trial 0 with value: 0.16178768873214722.


Early stopping triggered after 68 epochs.


Trial: 8:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:26:21,781] Trial 7 finished with value: 0.5051826238632202 and parameters: {'hidden_size': 160, 'num_layers': 2, 'lr': 0.00017750978945737774, 'batch_size': 32, 'window_size': 15, 'dropout': 0.2}. Best is trial 0 with value: 0.16178768873214722.


Early stopping triggered after 51 epochs.


Trial: 9:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:26:53,601] Trial 8 finished with value: 0.17207180708646774 and parameters: {'hidden_size': 192, 'num_layers': 2, 'lr': 0.0020008853993586312, 'batch_size': 32, 'window_size': 15, 'dropout': 0.0}. Best is trial 0 with value: 0.16178768873214722.


Trial: 10:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:27:14,661] Trial 9 finished with value: 2.222129702568054 and parameters: {'hidden_size': 96, 'num_layers': 1, 'lr': 0.006161872442241921, 'batch_size': 32, 'window_size': 55, 'dropout': 0.3}. Best is trial 0 with value: 0.16178768873214722.


Early stopping triggered after 73 epochs.


Trial: 11:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:27:42,199] Trial 10 finished with value: 0.13515542820096016 and parameters: {'hidden_size': 64, 'num_layers': 1, 'lr': 1.621874838184555e-05, 'batch_size': 32, 'window_size': 85, 'dropout': 0.1}. Best is trial 10 with value: 0.13515542820096016.


Best model saved with trial 10


Trial: 12:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:28:09,636] Trial 11 finished with value: 0.5171603322029114 and parameters: {'hidden_size': 32, 'num_layers': 1, 'lr': 1.0563893621799683e-05, 'batch_size': 32, 'window_size': 90, 'dropout': 0.1}. Best is trial 10 with value: 0.13515542820096016.


Trial: 13:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:28:37,958] Trial 12 finished with value: 0.8506484826405843 and parameters: {'hidden_size': 32, 'num_layers': 1, 'lr': 1.2018211326992388e-05, 'batch_size': 32, 'window_size': 75, 'dropout': 0.1}. Best is trial 10 with value: 0.13515542820096016.


Trial: 14:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:29:02,508] Trial 13 finished with value: 0.2899973839521408 and parameters: {'hidden_size': 96, 'num_layers': 1, 'lr': 3.948097842465562e-05, 'batch_size': 32, 'window_size': 75, 'dropout': 0.0}. Best is trial 10 with value: 0.13515542820096016.


Early stopping triggered after 86 epochs.


Trial: 15:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:29:29,907] Trial 14 finished with value: 0.192704501748085 and parameters: {'hidden_size': 64, 'num_layers': 1, 'lr': 3.7602254550232675e-05, 'batch_size': 32, 'window_size': 105, 'dropout': 0.1}. Best is trial 10 with value: 0.13515542820096016.


Trial: 16:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:29:49,483] Trial 15 finished with value: 0.25235315163930255 and parameters: {'hidden_size': 256, 'num_layers': 1, 'lr': 2.6942979244195464e-05, 'batch_size': 32, 'window_size': 75, 'dropout': 0.1}. Best is trial 10 with value: 0.13515542820096016.


Early stopping triggered after 66 epochs.


Trial: 17:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:30:08,996] Trial 16 finished with value: 0.22704360953399114 and parameters: {'hidden_size': 96, 'num_layers': 1, 'lr': 9.384676779157502e-05, 'batch_size': 32, 'window_size': 35, 'dropout': 0.0}. Best is trial 10 with value: 0.13515542820096016.


Early stopping triggered after 65 epochs.


Trial: 18:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:30:34,209] Trial 17 finished with value: 0.23020906249682108 and parameters: {'hidden_size': 128, 'num_layers': 1, 'lr': 2.0844564196063904e-05, 'batch_size': 32, 'window_size': 70, 'dropout': 0.1}. Best is trial 10 with value: 0.13515542820096016.


Early stopping triggered after 88 epochs.


Trial: 19:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:30:53,051] Trial 18 finished with value: 0.24657232846532548 and parameters: {'hidden_size': 224, 'num_layers': 1, 'lr': 5.9556192119094305e-05, 'batch_size': 32, 'window_size': 45, 'dropout': 0.2}. Best is trial 10 with value: 0.13515542820096016.


Early stopping triggered after 63 epochs.


Trial: 20:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:31:22,300] Trial 19 finished with value: 0.1770085021853447 and parameters: {'hidden_size': 64, 'num_layers': 1, 'lr': 1.861222519574912e-05, 'batch_size': 32, 'window_size': 65, 'dropout': 0.0}. Best is trial 10 with value: 0.13515542820096016.


Trial: 21:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:31:53,768] Trial 20 finished with value: 1.019877827167511 and parameters: {'hidden_size': 224, 'num_layers': 2, 'lr': 0.00023554155143835817, 'batch_size': 32, 'window_size': 90, 'dropout': 0.1}. Best is trial 10 with value: 0.13515542820096016.


Trial: 22:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:32:25,991] Trial 21 finished with value: 1.4468780010938644 and parameters: {'hidden_size': 192, 'num_layers': 2, 'lr': 0.0018616549062610555, 'batch_size': 32, 'window_size': 20, 'dropout': 0.0}. Best is trial 10 with value: 0.13515542820096016.


Trial: 23:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:32:58,179] Trial 22 finished with value: 0.43916260078549385 and parameters: {'hidden_size': 160, 'num_layers': 2, 'lr': 0.001875925027012955, 'batch_size': 32, 'window_size': 25, 'dropout': 0.0}. Best is trial 10 with value: 0.13515542820096016.


Trial: 24:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:33:15,724] Trial 23 finished with value: 127.67779636383057 and parameters: {'hidden_size': 192, 'num_layers': 2, 'lr': 0.002734267313985903, 'batch_size': 32, 'window_size': 5, 'dropout': 0.1}. Best is trial 10 with value: 0.13515542820096016.


Early stopping triggered after 52 epochs.


Trial: 25:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:33:32,207] Trial 24 finished with value: 0.2817573015178953 and parameters: {'hidden_size': 224, 'num_layers': 2, 'lr': 8.659773070003787e-05, 'batch_size': 32, 'window_size': 40, 'dropout': 0.0}. Best is trial 10 with value: 0.13515542820096016.


Early stopping triggered after 51 epochs.


Trial: 26:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:33:48,572] Trial 25 finished with value: 17.764972448349 and parameters: {'hidden_size': 256, 'num_layers': 1, 'lr': 0.001036490754260754, 'batch_size': 32, 'window_size': 20, 'dropout': 0.1}. Best is trial 10 with value: 0.13515542820096016.


Early stopping triggered after 54 epochs.


Trial: 27:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:34:11,719] Trial 26 finished with value: 0.08017907571047544 and parameters: {'hidden_size': 128, 'num_layers': 2, 'lr': 0.0038719158719565418, 'batch_size': 32, 'window_size': 60, 'dropout': 0.0}. Best is trial 26 with value: 0.08017907571047544.


Early stopping triggered after 75 epochs.
Best model saved with trial 26


Trial: 28:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:34:27,909] Trial 27 finished with value: 3.4400795300801597 and parameters: {'hidden_size': 64, 'num_layers': 1, 'lr': 0.008576326998199013, 'batch_size': 32, 'window_size': 60, 'dropout': 0.1}. Best is trial 26 with value: 0.08017907571047544.


Early stopping triggered after 56 epochs.


Trial: 29:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:34:43,126] Trial 28 finished with value: 2.926436165968577 and parameters: {'hidden_size': 128, 'num_layers': 2, 'lr': 0.00036501578963535564, 'batch_size': 32, 'window_size': 85, 'dropout': 0.2}. Best is trial 26 with value: 0.08017907571047544.


Early stopping triggered after 51 epochs.


Trial: 30:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:35:13,301] Trial 29 finished with value: 0.5105852910450527 and parameters: {'hidden_size': 64, 'num_layers': 1, 'lr': 1.951288006703852e-05, 'batch_size': 32, 'window_size': 45, 'dropout': 0.3}. Best is trial 26 with value: 0.08017907571047544.


Trial: 31:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:35:42,898] Trial 30 finished with value: 2.2334332704544066 and parameters: {'hidden_size': 96, 'num_layers': 2, 'lr': 0.0010598313991393527, 'batch_size': 32, 'window_size': 105, 'dropout': 0.0}. Best is trial 26 with value: 0.08017907571047544.


Trial: 32:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:36:06,745] Trial 31 finished with value: 11.69815022604806 and parameters: {'hidden_size': 192, 'num_layers': 2, 'lr': 0.003377022924316679, 'batch_size': 32, 'window_size': 30, 'dropout': 0.0}. Best is trial 26 with value: 0.08017907571047544.


Early stopping triggered after 74 epochs.


Trial: 33:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:36:28,172] Trial 32 finished with value: 9.99714708328247 and parameters: {'hidden_size': 160, 'num_layers': 2, 'lr': 0.004096593829802042, 'batch_size': 32, 'window_size': 5, 'dropout': 0.0}. Best is trial 26 with value: 0.08017907571047544.


Early stopping triggered after 64 epochs.


Trial: 34:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:36:57,613] Trial 33 finished with value: 0.21377282163926534 and parameters: {'hidden_size': 224, 'num_layers': 2, 'lr': 0.0014141478792774219, 'batch_size': 32, 'window_size': 35, 'dropout': 0.0}. Best is trial 26 with value: 0.08017907571047544.


Early stopping triggered after 91 epochs.


Trial: 35:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:37:28,968] Trial 34 finished with value: 1.269348382949829 and parameters: {'hidden_size': 192, 'num_layers': 2, 'lr': 0.0006089503040182965, 'batch_size': 32, 'window_size': 50, 'dropout': 0.1}. Best is trial 26 with value: 0.08017907571047544.


Trial: 36:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:37:50,388] Trial 35 finished with value: 28.19203249613444 and parameters: {'hidden_size': 224, 'num_layers': 2, 'lr': 0.004419067009572234, 'batch_size': 32, 'window_size': 60, 'dropout': 0.2}. Best is trial 26 with value: 0.08017907571047544.


Early stopping triggered after 67 epochs.


Trial: 37:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:38:22,820] Trial 36 finished with value: 0.7827609367668629 and parameters: {'hidden_size': 128, 'num_layers': 2, 'lr': 0.0008422256233555674, 'batch_size': 32, 'window_size': 15, 'dropout': 0.0}. Best is trial 26 with value: 0.08017907571047544.


Trial: 38:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:38:42,698] Trial 37 finished with value: 4.033667643864949 and parameters: {'hidden_size': 160, 'num_layers': 1, 'lr': 0.002500427439286157, 'batch_size': 32, 'window_size': 80, 'dropout': 0.1}. Best is trial 26 with value: 0.08017907571047544.


Early stopping triggered after 68 epochs.


Trial: 39:   0%|          | 0/100 [00:00<?, ?epoch/s]

[I 2025-05-07 21:39:02,910] Trial 38 finished with value: 1.850998415146023 and parameters: {'hidden_size': 192, 'num_layers': 2, 'lr': 0.0004314696831605192, 'batch_size': 32, 'window_size': 25, 'dropout': 0.0}. Best is trial 26 with value: 0.08017907571047544.


Early stopping triggered after 55 epochs.


Trial: 40:   0%|          | 0/100 [00:00<?, ?epoch/s]

[W 2025-05-07 21:39:08,827] Trial 39 failed with parameters: {'hidden_size': 256, 'num_layers': 1, 'lr': 0.00011987761994124381, 'batch_size': 32, 'window_size': 100, 'dropout': 0.2} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "c:\Users\bar24\anaconda3\envs\ExplainableAI\Lib\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\bar24\AppData\Local\Temp\ipykernel_39004\641694211.py", line 32, in objective
    val_loss, metrics_df = train_model(
                           ^^^^^^^^^^^^
  File "C:\Users\bar24\AppData\Local\Temp\ipykernel_39004\497752395.py", line 64, in train_model
    output = model(x_batch)
             ^^^^^^^^^^^^^^
  File "c:\Users\bar24\anaconda3\envs\ExplainableAI\Lib\site-packages\torch\nn\modules\module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^

KeyboardInterrupt: 

In [None]:
def plot_metrics_by_parameters(metrics_df, parameters, save_path="metrics_by_parameters.png"):
    """
    Create a single figure with subplots for each parameter, showing average losses.

    Args:
        metrics_df (pd.DataFrame): DataFrame containing trial metrics and parameters.
        parameters (list): List of parameters to plot (e.g., ['hidden_size', 'num_layers', 'dropout']).
    """
    # Mapping loss types to more readable names
    loss_type_mapping = {
        "train_HuberLoss": "Train Huber Loss",
        "train_MSE": "Train MSE",
        "train_MAE": "Train MAE",
        "val_HuberLoss": "Validation Huber Loss",
        "val_MSE": "Validation MSE",
        "val_MAE": "Validation MAE"
    }

    num_params = len(parameters)
    ncols = 3  # Number of columns
    nrows = (num_params + ncols - 1) // ncols  # Dynamically calculate rows based on parameters
    fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(18, 6 * nrows), sharex=False)

    # Flatten axes for easier iteration
    axes = axes.flatten()

    # Plot each parameter
    for i, (ax, parameter) in enumerate(zip(axes, parameters)):
        grouped = metrics_df.groupby(parameter).mean()  # Group by the parameter and calculate averages
        for loss, readable_name in loss_type_mapping.items():
            if loss in grouped.columns:
                ax.plot(grouped.index, grouped[loss], label=readable_name, marker='o', linestyle='-', linewidth=1.5)

        # Customize each subplot
        ax.set_xlabel(parameter, fontsize=18)
        ax.set_ylabel("Loss", fontsize=18)
        ax.tick_params(axis='both', which='major', labelsize=16)
        ax.set_title(f"Average Loss by {parameter}", fontsize=16, weight='bold', color='darkblue')
        ax.grid(True, linestyle='--', alpha=0.6)

    # Remove unused subplots if parameters < nrows * ncols
    for j in range(len(parameters), len(axes)):
        if j == len(parameters):  # Use the first unused subplot for the legend
            handles, labels = ax.get_legend_handles_labels()
            axes[j].legend(
                handles, labels, loc="center", fontsize=16, title="Loss Type", title_fontsize=20, frameon=False
            )
            axes[j].set_axis_off()  # Turn off the axis for the legend plot
        else:
            fig.delaxes(axes[j])  # Remove any additional unused subplots

    # Adjust layout for better spacing
    plt.tight_layout()
    plt.legend()
    plt.savefig(save_path, dpi=300)
    plt.show()

# Example: Plot average losses for all parameters in one figure
parameters_to_plot = ["hidden_size", "num_layers", "learning_rate", "batch_size", "window_size", "dropout"]
plot_metrics_by_parameters(all_metrics_df, parameters_to_plot)

### Predict the next 200 data points recursively

In [None]:
def predict_and_inverse_transform(model, init_window, scaler, steps=200):
    """
    Perform recursive forecasting using the trained model and convert predictions back to the original range.

    Args:
        model: The trained LSTM model.
        init_window: The initial sequence window (tensor) in normalized form.
        scaler: The MinMaxScaler used for normalization.
        steps: Number of forecasting steps.

    Returns:
        np.array: Forecasted values in the original range.
    """
    model.eval()
    preds = []
    current_seq = init_window.clone().to(device)  # Ensure the initial window is on the correct device

    with torch.no_grad():
        for step in range(steps):
            # Forward pass
            input = current_seq.unsqueeze(0).contiguous()  # Add batch dimension
            pred = model(input)  # shape: (1, seq_len, forecast_steps)

            # Extract the first predicted value
            pred_value = pred.item()  # Extract the first step of the forecast
            preds.append(pred_value)

            # Update the sequence by appending the prediction and removing the oldest value
            next_value = torch.tensor([[pred_value]], dtype=torch.float32, device=device)
            current_seq = torch.cat((current_seq[1:], next_value), dim=0)

    # Convert predictions to numpy array
    preds = np.array(preds).reshape(-1, 1)

    # Inverse transform the predictions to the original range
    preds_original = scaler.inverse_transform(preds)

    return preds_original

def plot_signals(predictions_rescaled, actual_values, save_path="actual_vs_predicted_plot.png"):
    """
    Plot training and validation metrics after training and compare predictions with actual values.
    """
    plt.figure(figsize=(18, 6))

    # Plot Actual vs Predicted Values
    plt.plot(actual_values, label="Actual Values", color="green", linewidth=2)
    plt.plot(predictions_rescaled, label="Predicted Values", color="blue", linestyle="--", linewidth=2)
    plt.xlabel("Time Steps")
    plt.ylabel("Laser Measurement (Rescaled)")
    plt.title("Actual vs. Predicted Laser Measurements")
    plt.legend()

    plt.tight_layout()
    plt.savefig(save_path, dpi=300)
    plt.show()
    print(f"Metrics plot saved to {save_path}")

# Perform recursive forecasting to generate predictions
# Use the first sequence from the validation set as the initial window
init_window = torch.tensor(val_X[0], dtype=torch.float32).to(device)

# Predict and convert back to the original range
predicted_signal = predict_and_inverse_transform(best_model, init_window, scaler, steps=200)

# Compare with the actual values (also inverse transformed if needed)
actual_signal = scaler.inverse_transform(val_y[:len(predicted_signal)])

plot_signals(predicted_signal, actual_signal)

In [None]:
import torch.nn as nn

def evaluate_model_on_validation(best_model, val_loader):
    """
    Evaluate the best model on the validation dataset and calculate MSE and MAE using PyTorch loss functions.

    Args:
        best_model: The trained model.
        val_loader: The validation dataset (normalized).

    Returns:
        mse: Mean Squared Error.
        mae: Mean Absolute Error.
    """
    # Switch the model to evaluation mode
    best_model.eval()

    # Define PyTorch loss functions
    mse_criterion = nn.MSELoss()
    mae_criterion = nn.L1Loss()

    # Collect predictions and ground truth
    total_mse = 0.0
    total_mae = 0.0
    num_samples = 0

    with torch.no_grad():
        for batch in val_loader:
            x_batch, y_batch = batch[0].to(device), batch[1].to(device)
            output = best_model(x_batch)

            # Calculate losses
            mse = mse_criterion(output, y_batch).item()
            mae = mae_criterion(output, y_batch).item()

            # Accumulate losses
            total_mse += mse * x_batch.size(0)
            total_mae += mae * x_batch.size(0)
            num_samples += x_batch.size(0)

    # Calculate average losses
    avg_mse = total_mse / num_samples
    avg_mae = total_mae / num_samples

    print(f"Validation MSE: {avg_mse}")
    print(f"Validation MAE: {avg_mae}")

    return avg_mse, avg_mae

# Evaluate the best model
mse, mae = evaluate_model_on_validation(best_model, val_loader)

### Load the trained model

In [None]:
import json

def load_best_model():
    # Load the best hyperparameters
    with open(f"best_hyperparameters_{model_name}.json", "r") as f:
        best_params = json.load(f)

    # Recreate the model with the best hyperparameters
    best_model = RNN(
        input_size=1,
        hidden_size=best_params["hidden_size"],
        num_stacked_layers=best_params["num_layers"]
    ).to(device)

    # Load the saved model weights
    best_model.load_state_dict(torch.load(model_pth_path))
    best_model.eval()

    return best_model, best_params

best_model, best_params = load_best_model()
print("Best hyperparameters are:", best_params)