In [1]:
import torch.nn as nn
import torch.optim as optim
from classes import SpeedEstimatorRNN, VehicleSpeedDataset
from torch.utils.data import DataLoader
import torch

In [4]:
if torch.cuda.is_available():
    print("CUDA is available! You can use a GPU for training.")
    print("Number of GPUs available:", torch.cuda.device_count())
    print("Current GPU being used:", torch.cuda.current_device())
    print("GPU Name:", torch.cuda.get_device_name(torch.cuda.current_device()))
else:
    print("CUDA is not available. Training will be performed on the CPU.")

CUDA is available! You can use a GPU for training.
Number of GPUs available: 1
Current GPU being used: 0
GPU Name: NVIDIA GeForce RTX 3050 Laptop GPU


In [6]:
# Set dataset path
training_data_path = "data/i7/it_1/1_training"
extension = "*.csv"

test_data_path = "data/i7/it_1/2_testing"

# Hyperparameters that will alter throughout the model creations
input_size = 20  # Number of CAN signals per timestep
hidden_size = [256, 256, 512, 512, 512]
num_layers = [3, 4, 3, 4, 3]
learning_rate = [0.0001] * 5
# num of sequences in one batch
batch_size = [128] * 5
dropout_rate = [0.2] * 5
sequence_length = [800, 800, 800, 800, 1000]


# parameters of the simulation
step_size = 10 # what the overlap between the sequences should look like in the extracted dataset
output_size = 2
num_epochs = 30

num_models = 5

location_state = "Simple RNN/trained_models/i7/it_1/state_models/model_"
location_traced = "Simple RNN/trained_models/i7/it_1/traced_models/model_"

In [9]:
# Initialize variables to track the best test/validation loss
best_test_loss = float('inf')

early_stopping_counter = 0
patience = 5

# Training loops
for j in range(num_models):

    early_stopping_counter = 0

    # Load dataset and DataLoader
    train_dataset = VehicleSpeedDataset(training_data_path, extension, seq_length = sequence_length[j], step_size = step_size)
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size[j], shuffle=True, num_workers= 6, pin_memory=True)

    # Load test dataset and DataLoader
    test_dataset = VehicleSpeedDataset(test_data_path, extension, seq_length=sequence_length[j], step_size=step_size)
    test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False)  # Batch size = 1 for test evaluation


    # Initialize model, loss function, and optimizer
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = SpeedEstimatorRNN(input_size, hidden_size[j], num_layers[j], output_size).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate[j])

    for epoch in range(num_epochs):
        model.train()
        total_train_loss = 0

        for batch_idx, (features, speeds) in enumerate(train_dataloader):
            speeds = speeds.squeeze(1)  # Remove extra dimension from speeds if present
            features, speeds = features.to(device), speeds.to(device)

            # Forward pass
            outputs = model(features)

            assert outputs.shape == speeds.shape, f"Shape mismatch: outputs {outputs.shape} vs speeds {speeds.shape}"

            train_loss = criterion(outputs, speeds)

            # Backward pass
            optimizer.zero_grad()
            train_loss.backward()
            optimizer.step()

            total_train_loss += train_loss.item()

        print(f"Model: {j}, Epoch [{epoch+1}/{num_epochs}], Loss: {total_train_loss/len(train_dataloader):.4f}")

        model.eval()
        total_test_loss = 0

        with torch.no_grad():  # No need to compute gradients for validation/test
            for features, speeds in test_dataloader:
                speeds = speeds.squeeze(1)
                features, speeds = features.to(device), speeds.to(device)

                # Forward pass
                test_outputs = model(features)
                test_loss = criterion(test_outputs, speeds)

                total_test_loss += test_loss.item()

        avg_test_loss = total_test_loss / len(test_dataloader)

        print(f"Model: {j}, Epoch [{epoch+1}/{num_epochs}], Test Loss: {avg_test_loss:.4f}")

         # Checkpoint: Save model if test loss improves
        if avg_test_loss < best_test_loss:
            print(f"New best model found! Test Loss improved from {best_test_loss:.4f} to {avg_test_loss:.4f}")
            best_test_loss = avg_test_loss
            early_stopping_counter = 0

            torch.save({
                "model_state_dict": model.state_dict(),
                "optimizer_state_dict": optimizer.state_dict(),
                "sequence_length": sequence_length,
                "input_size": input_size,
                "hidden_size": hidden_size,
                "num_layers": num_layers,
                "output_size": output_size,
                "learning_rate": learning_rate,
                "num_epochs": num_epochs
            }, location_state + str(j) + ".pt")

            # Save traced model for MATLAB
            example_input = torch.rand(1, sequence_length[j], input_size).to(device)  # Example input matching model dimensions
            traced_model = torch.jit.trace(model, example_input)
            torch.jit.save(traced_model, location_traced + str(j) + "_traced.pt")  # Save as traced TorchScript model

            print(f"model_{j} saved")
        else:
            early_stopping_counter += 1

        if early_stopping_counter >= patience:
            print("Early stopping triggered!")
            break  # Exit the training loop early




Model: 0, Epoch [1/25], Loss: 11.8025
Model: 0, Epoch [1/25], Test Loss: 6.7434
New best model found! Test Loss improved from inf to 6.7434
model_0 saved
Model: 0, Epoch [2/25], Loss: 3.8498
Model: 0, Epoch [2/25], Test Loss: 3.5973
New best model found! Test Loss improved from 6.7434 to 3.5973
model_0 saved
Model: 0, Epoch [3/25], Loss: 2.1484
Model: 0, Epoch [3/25], Test Loss: 2.1993
New best model found! Test Loss improved from 3.5973 to 2.1993
model_0 saved
Model: 0, Epoch [4/25], Loss: 1.3396
Model: 0, Epoch [4/25], Test Loss: 1.3858
New best model found! Test Loss improved from 2.1993 to 1.3858
model_0 saved
Model: 0, Epoch [5/25], Loss: 0.8428
Model: 0, Epoch [5/25], Test Loss: 0.8555
New best model found! Test Loss improved from 1.3858 to 0.8555
model_0 saved
Model: 0, Epoch [6/25], Loss: 0.5280
Model: 0, Epoch [6/25], Test Loss: 0.5428
New best model found! Test Loss improved from 0.8555 to 0.5428
model_0 saved
Model: 0, Epoch [7/25], Loss: 0.3271
Model: 0, Epoch [7/25], Test 

KeyboardInterrupt: 