In [None]:
from pathlib import Path
import numpy as np
import torch
from typing import List
from torch.nn.utils.rnn import pad_sequence
from mltrainer import rnn_models, Trainer
from torch import optim

from mads_datasets import datatools
import mltrainer
mltrainer.__version__

# 1 Iterators
We will be using an interesting dataset. [link](https://tev.fbk.eu/resources/smartwatch)

From the site:
> The SmartWatch Gestures Dataset has been collected to evaluate several gesture recognition algorithms for interacting with mobile applications using arm gestures. Eight different users performed twenty repetitions of twenty different gestures, for a total of 3200 sequences. Each sequence contains acceleration data from the 3-axis accelerometer of a first generation Sony SmartWatch™, as well as timestamps from the different clock sources available on an Android device. The smartwatch was worn on the user's right wrist. 


In [None]:
from mads_datasets import DatasetFactoryProvider, DatasetType
from mltrainer.preprocessors import PaddedPreprocessor
preprocessor = PaddedPreprocessor()

gesturesdatasetfactory = DatasetFactoryProvider.create_factory(DatasetType.GESTURES)
streamers = gesturesdatasetfactory.create_datastreamer(batchsize=32, preprocessor=preprocessor)
train = streamers["train"]
valid = streamers["valid"]

In [None]:
len(train), len(valid)

In [None]:
trainstreamer = train.stream()
validstreamer = valid.stream()
x, y = next(iter(trainstreamer))
x.shape, y.shape, x, y

Can you make sense of the shape?
What does it mean that the shapes are sometimes (32, 27, 3), but a second time might look like (32, 30, 3)? In other words, the second (or first, if you insist on starting at 0) dimension changes. Why is that? How does the model handle this? Do you think this is already padded, or still has to be padded?


# 2 Excercises
Lets test a basemodel, and try to improve upon that.

Fill the gestures.gin file with relevant settings for `input_size`, `hidden_size`, `num_layers` and `horizon` (which, in our case, will be the number of classes...)

As a rule of thumbs: start lower than you expect to need!

In [None]:
from mltrainer import TrainerSettings, ReportTypes
from mltrainer.metrics import Accuracy

accuracy = Accuracy()


In [None]:
model = rnn_models.BaseRNN(
    input_size=3,      # Number of features per time step (accelerometer axes: x, y, z)
    hidden_size=64,    # Number of hidden units in the RNN layer
    num_layers=1,      # Number of stacked RNN layers
    horizon=20,        # Number of output classes (gesture types)
)

Test the model. What is the output shape you need? Remember, we are doing classification!

In [None]:
yhat = model(x)
yhat.shape

Test the accuracy

In [None]:
accuracy(y, yhat)

What do you think of the accuracy? What would you expect from blind guessing?

Check shape of `y` and `yhat`

In [None]:
yhat.shape, y.shape

And look at the output of yhat

In [None]:
yhat[0]

Does this make sense to you? If you are unclear, go back to the classification problem with the MNIST, where we had 10 classes.

We have a classification problem, so we need Cross Entropy Loss.
Remember, [this has a softmax built in](https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html) 

In [None]:
loss_fn = torch.nn.CrossEntropyLoss()
loss = loss_fn(yhat, y)
loss

In [None]:
import torch
if torch.backends.mps.is_available() and torch.backends.mps.is_built():
    device = torch.device("mps")
    print("Using MPS")
elif torch.cuda.is_available():
    device = "cuda:0"
    print("using cuda")
else:
    device = "cpu"
    print("using cpu")

# on my mac, at least for the BaseRNN model, mps does not speed up training
# probably because the overhead of copying the data to the GPU is too high
# so i override the device to cpu
device = "cpu"
# however, it might speed up training for larger models, with more parameters

Set up the settings for the trainer and the different types of logging you want

In [None]:
settings = TrainerSettings(
    epochs=10, # increase this to about 100 for training
    metrics=[accuracy],           # List of metrics to evaluate during training (here, accuracy)
    logdir=Path("gestures"),      # Directory to save logs and model checkpoints
    train_steps=len(train),       # Number of training steps per epoch (batches in train dataloader)
    valid_steps=len(valid),       # Number of validation steps per epoch (batches in valid dataloader)
    reporttypes=[ReportTypes.TOML, ReportTypes.TENSORBOARD, ReportTypes.MLFLOW], # Types of reports/logs to generate
    scheduler_kwargs={"factor": 0.5, "patience": 5}, # Learning rate scheduler settings
    earlystop_kwargs = {
        "save": True,            # Whether to save the best model during early stopping
        "verbose": True,
        "patience": 5, # number of epochs with no improvement after which training will be stopped
        "delta": 0.0, # minimum change to be considered an improvement
    }
)
settings

In [None]:
import torch.nn as nn
import torch
from torch import Tensor
from dataclasses import dataclass

@dataclass
class ModelConfig:
    input_size: int      # Number of input features per time step
    hidden_size: int     # Number of hidden units in the RNN
    num_layers: int      # Number of stacked RNN/LSTM layers
    output_size: int     # Number of output classes
    dropout: float = 0.0 # Dropout rate between RNN layers

class GRUmodel(nn.Module):
    def __init__(
        self,
        config,
    ) -> None:
        super().__init__()
        self.config = config
        # GRU layer for sequence modeling
        self.rnn = nn.GRU(
            input_size=config.input_size,
            hidden_size=config.hidden_size,
            dropout=config.dropout,
            batch_first=True,
            num_layers=config.num_layers,
        )
        # Linear layer to map hidden state to output classes
        self.linear = nn.Linear(config.hidden_size, config.output_size)

    def forward(self, x: Tensor) -> Tensor:
        x, _ = self.rnn(x)           # x: (batch, seq_len, hidden_size)
        last_step = x[:, -1, :]      # Take the last time step's hidden state
        yhat = self.linear(last_step) # Map to output classes
        return yhat
    
class LSTMmodel(nn.Module):
    def __init__(
        self,
        config,
    ) -> None:
        super().__init__()
        self.config = config
        # LSTM layer for sequence modeling
        self.rnn = nn.LSTM(
            input_size=config.input_size,
            hidden_size=config.hidden_size,
            dropout=config.dropout,
            batch_first=True,
            num_layers=config.num_layers,
        )
        # Linear layer to map hidden state to output classes
        self.linear = nn.Linear(config.hidden_size, config.output_size)

    def forward(self, x: Tensor) -> Tensor:
        x, _ = self.rnn(x)           # x: (batch, seq_len, hidden_size)
        last_step = x[:, -1, :]      # Take the last time step's hidden state
        yhat = self.linear(last_step) # Map to output classes
        return yhat
    

class GRUConv1DModel(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.config = config
        # Conv1d expects (batch, channels, seq_len)
        self.conv1 = nn.Conv1d(
            in_channels=config.input_size, 
            out_channels=16, 
            kernel_size=3, 
            padding=1
        )
        self.relu = nn.ReLU()
        # Update input_size for RNN to match conv1 out_channels
        self.rnn = nn.GRU(
            input_size=16,
            hidden_size=config.hidden_size,
            dropout=config.dropout,
            batch_first=True,
            num_layers=config.num_layers,
        )
        self.linear = nn.Linear(config.hidden_size, config.output_size)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # x: (batch, seq_len, features)
        x = x.permute(0, 2, 1)  # (batch, features, seq_len)
        x = self.relu(self.conv1(x))
        x = x.permute(0, 2, 1)  # (batch, seq_len, channels)
        x, _ = self.rnn(x)
        last_step = x[:, -1, :]
        yhat = self.linear(last_step)
        return yhat


class LSTMConv1DModel(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.config = config
        # Conv1d expects (batch, channels, seq_len)
        self.conv1 = nn.Conv1d(
            in_channels=config.input_size, 
            out_channels=16, 
            kernel_size=3, 
            padding=1
        )
        self.relu = nn.ReLU()
        # Update input_size for RNN to match conv1 out_channels
        self.rnn = nn.LSTM(
            input_size=16,
            hidden_size=config.hidden_size,
            dropout=config.dropout,
            batch_first=True,
            num_layers=config.num_layers,
        )
        self.linear = nn.Linear(config.hidden_size, config.output_size)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # x: (batch, seq_len, features)
        x = x.permute(0, 2, 1)  # (batch, features, seq_len)
        x = self.relu(self.conv1(x))
        x = x.permute(0, 2, 1)  # (batch, seq_len, channels)
        x, _ = self.rnn(x)
        last_step = x[:, -1, :]
        yhat = self.linear(last_step)
        return yhat

In [None]:
import mlflow
from datetime import datetime

mlflow.set_tracking_uri("sqlite:///mlflow.db")
experiment_name = "gestures_10epochs"
if not mlflow.get_experiment_by_name(experiment_name):
    mlflow.create_experiment(experiment_name)
mlflow.set_experiment(experiment_name)
modeldir = Path(experiment_name).resolve()
if not modeldir.exists():
    modeldir.mkdir(parents=True)

# for i in range(10):
with mlflow.start_run():
    mlflow.set_tag("model", "notebook_epochs_10")
    mlflow.set_tag("dev", "kim")
    mlflow.set_active_model(name="GRU_epochs_10")
    mlflow.set_tag("run_name", f"GRU_Conv1_run" + datetime.now().strftime("%Y%m%d-%H%M"))
    # mlflow.set_tag("run_name", f"LSTM_Conv1D_run" + datetime.now().strftime("%Y%m%d-%H%M"))
    config = ModelConfig(
        input_size=3,   # Number of input features per time step
        hidden_size=64, # Number of hidden units in the RNN
        num_layers=2,   # Number of stacked RNN/LSTM layers
        output_size=20, # Number of output classes
        dropout=0.1,    # Dropout rate between RNN layers
    )
    mlflow.log_params(config.__dict__)

    # model = GRUmodel(
    #     config=config,
    # )

    # model = LSTMmodel(
    #     config=config,
    # )

    model = GRUConv1DModel(
        config=config,
    )

    # model = LSTMConv1DModel(
    #     config=config,
    # )

    trainer = Trainer(
        model=model,
        settings=settings,
        loss_fn=loss_fn,
        optimizer=optim.Adam,
        traindataloader=trainstreamer,
        validdataloader=validstreamer,
        scheduler=optim.lr_scheduler.ReduceLROnPlateau,
        device=device,
    )
    trainer.loop()

    if not settings.earlystop_kwargs["save"]:
        tag = datetime.now().strftime("%Y%m%d-%H%M-")
        modelpath = modeldir / (tag + "model.pt")
        torch.save(model, modelpath)

In [None]:
# Create plot accuracy for both LSTM and GRU models against number of layers

import matplotlib.pyplot as plt
import mlflow
import pandas as pd
runs = mlflow.search_runs(experiment_ids=["2"])
runs_df = pd.DataFrame(runs)
runs_df
runs_df[['params.num_layers', 'metrics.metric/Accuracy', 'tags.run_name']]
runs_df['params.num_layers'] = runs_df['params.num_layers'].astype(int)
runs_df['metrics.metric/Accuracy'] = runs_df['metrics.metric/Accuracy'].astype(float)
gru_runs = runs_df[runs_df['tags.run_name'].str.contains("GRU_run")]
lstm_runs = runs_df[runs_df['tags.run_name'].str.contains("LSTM_run")]
gru_conv = runs_df[runs_df['tags.run_name'].str.contains("GRU_Conv1")]
lstm_conv = runs_df[runs_df['tags.run_name'].str.contains("LSTM_Conv1")]
gru_runs = gru_runs.sort_values(by='params.num_layers')
lstm_runs = lstm_runs.sort_values(by='params.num_layers')
gru_conv = gru_conv.sort_values(by='params.num_layers')
lstm_conv = lstm_conv.sort_values(by='params.num_layers')
plt.plot(gru_runs['params.num_layers'], gru_runs['metrics.metric/Accuracy'], marker='o', label='GRU')
plt.plot(lstm_runs['params.num_layers'], lstm_runs['metrics.metric/Accuracy'], marker='o', label='LSTM')
plt.plot(gru_conv['params.num_layers'], gru_conv['metrics.metric/Accuracy'], marker='o', label='GRU Conv1D')
plt.plot(lstm_conv['params.num_layers'], lstm_conv['metrics.metric/Accuracy'], marker='o', label='LSTM Conv1D')
plt.xlabel('Number of Layers')
plt.ylabel('Validation Accuracy')
plt.xlim(0.8, 3.2)
plt.title('GRU vs LSTM Validation Accuracy by Number of Layers')
plt.legend()
plt.grid()
plt.show()


Try to update the code above by changing the hyperparameters.
    
To discern between the changes, also modify the tag mlflow.set_tag("model", "new-tag-here") where you add
a new tag of your choice. This way you can keep the models apart.

In [None]:
# trainer.loop() # if you want to pick up training, loop will continue from the last epoch

In [None]:
mlflow.end_run()