In [1]:
from pathlib import Path
import numpy as np
import torch
from typing import List
from torch.nn.utils.rnn import pad_sequence
from mltrainer import rnn_models, Trainer
from torch import optim

from mads_datasets import datatools
import mltrainer
mltrainer.__version__

'0.2.5'

# 1 Iterators
We will be using an interesting dataset. [link](https://tev.fbk.eu/resources/smartwatch)

From the site:
> The SmartWatch Gestures Dataset has been collected to evaluate several gesture recognition algorithms for interacting with mobile applications using arm gestures. Eight different users performed twenty repetitions of twenty different gestures, for a total of 3200 sequences. Each sequence contains acceleration data from the 3-axis accelerometer of a first generation Sony SmartWatch™, as well as timestamps from the different clock sources available on an Android device. The smartwatch was worn on the user's right wrist. 


In [2]:
from mads_datasets import DatasetFactoryProvider, DatasetType
from mltrainer.preprocessors import PaddedPreprocessor
preprocessor = PaddedPreprocessor()

gesturesdatasetfactory = DatasetFactoryProvider.create_factory(DatasetType.GESTURES)
streamers = gesturesdatasetfactory.create_datastreamer(batchsize=32, preprocessor=preprocessor)
train = streamers["train"]
valid = streamers["valid"]

[32m2025-09-28 15:45:13.319[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m121[0m - [1mFolder already exists at C:\Users\pikob\.cache\mads_datasets\gestures[0m
100%|[38;2;30;71;6m██████████[0m| 2600/2600 [00:01<00:00, 1559.46it/s]
100%|[38;2;30;71;6m██████████[0m| 651/651 [00:00<00:00, 1793.27it/s]


In [3]:
len(train), len(valid)

(81, 20)

In [4]:
trainstreamer = train.stream()
validstreamer = valid.stream()
x, y = next(iter(trainstreamer))
x.shape, y

(torch.Size([32, 29, 3]),
 tensor([ 1, 18,  3,  1,  1,  9, 12,  0, 13,  6, 18, 12,  8, 14, 18, 18, 10,  8,
         17,  0,  2,  7, 16,  1,  1, 10, 17, 18,  4, 15, 16, 19]))

Can you make sense of the shape?
What does it mean that the shapes are sometimes (32, 27, 3), but a second time might look like (32, 30, 3)? In other words, the second (or first, if you insist on starting at 0) dimension changes. Why is that? How does the model handle this? Do you think this is already padded, or still has to be padded?

The shape of x is (batch size, sequence length, feature dim).
The situation:
- The data streamer batches sequences of variable lengths.
- Each batch is padded to the max length in that batch only.
- Different batches can have different sequence length

# 2 Excercises
Lets test a basemodel, and try to improve upon that.

Fill the gestures.gin file with relevant settings for `input_size`, `hidden_size`, `num_layers` and `horizon` (which, in our case, will be the number of classes...)

As a rule of thumbs: start lower than you expect to need!

In [8]:
from mltrainer import TrainerSettings, ReportTypes
from mltrainer.metrics import Accuracy

accuracy = Accuracy()


In [6]:
model = rnn_models.BaseRNN(
    input_size=3,
    hidden_size=64,
    num_layers=1,
    horizon=20,
)

Test the model. What is the output shape you need? Remember, we are doing classification!

In [7]:
yhat = model(x)
yhat.shape

torch.Size([32, 20])

Test the accuracy

In [8]:
accuracy(y, yhat)

0.03125

What do you think of the accuracy? What would you expect from blind guessing? Expected accuracy from blind guessing ≈ 5%

Check shape of `y` and `yhat`

In [9]:
yhat.shape, y.shape

(torch.Size([32, 20]), torch.Size([32]))

And look at the output of yhat

In [10]:
yhat[0]

tensor([-0.0709, -0.0487, -0.1393, -0.0408,  0.0892, -0.1182, -0.1338,  0.2251,
         0.0712,  0.0093,  0.0798, -0.0513,  0.0032, -0.0455, -0.1119,  0.1861,
        -0.0582,  0.1045,  0.1285, -0.0962], grad_fn=<SelectBackward0>)

Does this make sense to you? If you are unclear, go back to the classification problem with the MNIST, where we had 10 classes.

We have a classification problem, so we need Cross Entropy Loss.
Remember, [this has a softmax built in](https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html) 

In [11]:
loss_fn = torch.nn.CrossEntropyLoss()
loss = loss_fn(yhat, y)
loss

tensor(3.0248, grad_fn=<NllLossBackward0>)

In [5]:
import torch
if torch.backends.mps.is_available() and torch.backends.mps.is_built():
    device = torch.device("mps")
    print("Using MPS")
elif torch.cuda.is_available():
    device = "cuda:0"
    print("using cuda")
else:
    device = "cpu"
    print("using cpu")

# on my mac, at least for the BaseRNN model, mps does not speed up training
# probably because the overhead of copying the data to the GPU is too high
# so i override the device to cpu
device = "cpu"
# however, it might speed up training for larger models, with more parameters

using cpu


Set up the settings for the trainer and the different types of logging you want

In [23]:
settings = TrainerSettings(
    epochs=10, # increase this to about 100 for training
    metrics=[accuracy],
    logdir=Path("gestures"),
    train_steps=len(train),
    valid_steps=len(valid),
    reporttypes=[ReportTypes.TOML, ReportTypes.TENSORBOARD, ReportTypes.MLFLOW],
    scheduler_kwargs={"factor": 0.5, "patience": 5},
    earlystop_kwargs = {
        "save": False, # save every best model, and restore the best one
        "verbose": True,
        "patience": 5, # number of epochs with no improvement after which training will be stopped
        "delta": 0.0, # minimum change to be considered an improvement
    }
)
settings

epochs: 10
metrics: [Accuracy]
logdir: gestures
train_steps: 81
valid_steps: 20
reporttypes: [<ReportTypes.TOML: 'TOML'>, <ReportTypes.TENSORBOARD: 'TENSORBOARD'>, <ReportTypes.MLFLOW: 'MLFLOW'>]
optimizer_kwargs: {'lr': 0.001, 'weight_decay': 1e-05}
scheduler_kwargs: {'factor': 0.5, 'patience': 5}
earlystop_kwargs: {'save': False, 'verbose': True, 'patience': 5, 'delta': 0.0}

In [18]:
import torch.nn as nn
import torch
from torch import Tensor
from dataclasses import dataclass

@dataclass
class ModelConfig:
    input_size: int
    hidden_size: int
    num_layers: int
    output_size: int
    dropout: float = 0.0

class GRUmodel(nn.Module):
    def __init__(
        self,
        config,
    ) -> None:
        super().__init__()
        self.config = config
        self.rnn = nn.GRU(
            input_size=config.input_size,
            hidden_size=config.hidden_size,
            dropout=config.dropout,
            batch_first=True,
            num_layers=config.num_layers,
        )
        self.linear = nn.Linear(config.hidden_size, config.output_size)

    def forward(self, x: Tensor) -> Tensor:
        x, _ = self.rnn(x)
        last_step = x[:, -1, :]
        yhat = self.linear(last_step)
        return yhat

In [15]:
config = ModelConfig(
    input_size=3,
    hidden_size=64,
    num_layers=1,
    output_size=20,
    dropout=0.0,
)


In [25]:
import mlflow
from datetime import datetime

mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("gestures")
modeldir = Path("gestures").resolve()
if not modeldir.exists():
    modeldir.mkdir(parents=True)

with mlflow.start_run():
    mlflow.set_tag("model", "GRU_h64_d01")
    mlflow.set_tag("dev", "Eline")
    config = ModelConfig(
        input_size=3,
        hidden_size=64,
        num_layers=1,
        output_size=20,
        dropout=0.1,
    )
    mlflow.log_param("hidden_size", f"{config.hidden_size}")
    mlflow.log_param("dropout", f"{config.dropout}")
    mlflow.log_param("epochs", f"{settings.epochs}")
    mlflow.log_param("patience", f"{settings.earlystop_kwargs['patience']}")
    mlflow.log_param("delta", f"{settings.earlystop_kwargs['delta']}")

    model = GRUmodel(
        config=config,
    )

    trainer = Trainer(
        model=model,
        settings=settings,
        loss_fn=loss_fn,
        optimizer=optim.Adam,
        traindataloader=trainstreamer,
        validdataloader=validstreamer,
        scheduler=optim.lr_scheduler.ReduceLROnPlateau,
        device=device,
    )
    trainer.loop()

    if not settings.earlystop_kwargs["save"]:
        tag = datetime.now().strftime("%Y%m%d-%H%M-")
        modelpath = modeldir / (tag + "model.pt")
        torch.save(model, modelpath)

[32m2025-09-27 17:38:59.387[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m24[0m - [1mLogging to gestures\20250927-173859[0m
[32m2025-09-27 17:38:59.387[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m68[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:00<00:00, 93.47it/s]
[32m2025-09-27 17:39:00.392[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m209[0m - [1mEpoch 0 train 2.9042 test 2.5898 metric ['0.1203'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:01<00:00, 80.75it/s]
[32m2025-09-27 17:39:01.527[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m209[0m - [1mEpoch 1 train 2.3477 test 2.2187 metric ['0.2031'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:00<00:00, 93.63it/s]
[32m2025-09-27 17:39:02.517[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mrepor

Try to update the code above by changing the hyperparameters.
    
To discern between the changes, also modify the tag mlflow.set_tag("model", "new-tag-here") where you add
a new tag of your choice. This way you can keep the models apart.

In [26]:
trainer.loop() # if you want to pick up training, loop will continue from the last epoch

100%|[38;2;30;71;6m██████████[0m| 81/81 [00:00<00:00, 86.49it/s]
[32m2025-09-27 17:39:37.101[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m175[0m - [1mResuming epochs from previous training at 10[0m
[32m2025-09-27 17:39:37.189[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m209[0m - [1mEpoch 10 train 0.6217 test 0.5929 metric ['0.8266'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:00<00:00, 91.43it/s]
[32m2025-09-27 17:39:38.208[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m209[0m - [1mEpoch 11 train 0.5010 test 0.4947 metric ['0.8719'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:00<00:00, 88.91it/s]
[32m2025-09-27 17:39:39.240[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m209[0m - [1mEpoch 12 train 0.4070 test 0.4354 metric ['0.9031'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:00<00:00, 91.71it/s]
[32m2025-09-27 17:39:40.254[0m | [1mINFO    [0m

In [27]:
mlflow.end_run()

Different test with GRU

In [23]:
import mlflow
from datetime import datetime

settings = TrainerSettings(
    epochs=20, #30,20,10 # increase this to about 100 for training
    metrics=[accuracy],
    logdir=Path("gestures"),
    train_steps=len(train),
    valid_steps=len(valid),
    reporttypes=[ReportTypes.TOML, ReportTypes.TENSORBOARD, ReportTypes.MLFLOW],
    scheduler_kwargs={"factor": 0.5, "patience": 5},
    earlystop_kwargs = {
        "save": False, # save every best model, and restore the best one
        "verbose": True,
        "patience": 5, #4,3, 2,5 # number of epochs with no improvement after which training will be stopped
        "delta": 0.0, # minimum change to be considered an improvement
    }
)

mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("gestures")
modeldir = Path("gestures").resolve()
if not modeldir.exists():
    modeldir.mkdir(parents=True)

with mlflow.start_run():
    mlflow.set_tag("model", "GRU_h128_d01")
    mlflow.set_tag("dev", "Eline")
    config = ModelConfig(
        input_size=3,
        hidden_size=64, #128,32,64
        num_layers=3,
        output_size=20,
        dropout=0.1,
    )
    mlflow.log_param("hidden_size", f"{config.hidden_size}")
    mlflow.log_param("dropout", f"{config.dropout}")
    mlflow.log_param("epochs", f"{settings.epochs}")
    mlflow.log_param("num_layers", f"{config.num_layers}")
    mlflow.log_param("patience", f"{settings.earlystop_kwargs['patience']}")
    mlflow.log_param("delta", f"{settings.earlystop_kwargs['delta']}")

    model = GRUmodel(
        config=config,
    )

    trainer = Trainer(
        model=model,
        settings=settings,
        loss_fn=loss_fn,
        optimizer=optim.Adam,
        traindataloader=trainstreamer,
        validdataloader=validstreamer,
        scheduler=optim.lr_scheduler.ReduceLROnPlateau,
        device=device,
    )
    trainer.loop()

    if not settings.earlystop_kwargs["save"]:
        tag = datetime.now().strftime("%Y%m%d-%H%M-")
        modelpath = modeldir / (tag + "model.pt")
        torch.save(model, modelpath)

[32m2025-09-28 17:22:52.922[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m24[0m - [1mLogging to gestures\20250928-172252[0m
[32m2025-09-28 17:22:52.922[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m68[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:02<00:00, 28.23it/s]
[32m2025-09-28 17:22:56.022[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m209[0m - [1mEpoch 0 train 2.6362 test 2.2683 metric ['0.1828'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:02<00:00, 29.81it/s]
[32m2025-09-28 17:22:58.937[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m209[0m - [1mEpoch 1 train 2.1358 test 1.9925 metric ['0.3094'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:02<00:00, 29.64it/s]
[32m2025-09-28 17:23:01.891[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mrepor

Different tests with LSTM

In [15]:
import torch
from torch import nn, Tensor
from dataclasses import dataclass
loss_fn = torch.nn.CrossEntropyLoss()

@dataclass
class ModelConfig:
    input_size: int
    hidden_size: int
    output_size: int
    dropout: float
    num_layers: int

class LSTMModel(nn.Module):
    def __init__(
        self,
        config: ModelConfig,
    ) -> None:
        super().__init__()
        self.config = config
        self.rnn = nn.LSTM(
            input_size=config.input_size,
            hidden_size=config.hidden_size,
            dropout=config.dropout if config.num_layers > 1 else 0.0,
            batch_first=True,
            num_layers=config.num_layers,
        )
        self.linear = nn.Linear(config.hidden_size, config.output_size)

    def forward(self, x: Tensor) -> Tensor:
        x, _ = self.rnn(x)           # x shape: (batch, seq_len, hidden_size)
        last_step = x[:, -1, :]      # Take output from last time step
        yhat = self.linear(last_step)
        return yhat

In [17]:
import mlflow
from datetime import datetime

settings = TrainerSettings(
    epochs=35, #30,20,10 # increase this to about 100 for training
    metrics=[accuracy],
    logdir=Path("gestures"),
    train_steps=len(train),
    valid_steps=len(valid),
    reporttypes=[ReportTypes.TOML, ReportTypes.TENSORBOARD, ReportTypes.MLFLOW],
    scheduler_kwargs={"factor": 0.5, "patience": 5},
    earlystop_kwargs = {
        "save": False, # save every best model, and restore the best one
        "verbose": True,
        "patience": 5, #4,3, 2,5 # number of epochs with no improvement after which training will be stopped
        "delta": 0.0, # minimum change to be considered an improvement
    }
)

mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("gestures")
modeldir = Path("gestures").resolve()
if not modeldir.exists():
    modeldir.mkdir(parents=True)

with mlflow.start_run():
    mlflow.set_tag("model", "LSTM_h64_d01")
    mlflow.set_tag("dev", "Eline")
    config = ModelConfig(
        input_size=3,
        hidden_size=64,
        num_layers=4, #1 indien niets gelogt dan 1
        output_size=20,
        dropout=0.1,
    )
    mlflow.log_param("hidden_size", f"{config.hidden_size}")
    mlflow.log_param("dropout", f"{config.dropout}")
    mlflow.log_param("num_layers", f"{config.num_layers}")
    mlflow.log_param("epochs", f"{settings.epochs}")
    mlflow.log_param("patience", f"{settings.earlystop_kwargs['patience']}")
    mlflow.log_param("delta", f"{settings.earlystop_kwargs['delta']}")

    model = LSTMModel(
        config=config,
    )

    trainer = Trainer(
        model=model,
        settings=settings,
        loss_fn=loss_fn,
        optimizer=optim.Adam,
        traindataloader=trainstreamer,
        validdataloader=validstreamer,
        scheduler=optim.lr_scheduler.ReduceLROnPlateau,
        device=device,
    )
    trainer.loop()

    if not settings.earlystop_kwargs["save"]:
        tag = datetime.now().strftime("%Y%m%d-%H%M-")
        modelpath = modeldir / (tag + "model.pt")
        torch.save(model, modelpath)
mlflow.end_run()

[32m2025-09-28 16:01:50.021[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m24[0m - [1mLogging to gestures\20250928-160150[0m
[32m2025-09-28 16:01:50.021[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m68[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:01<00:00, 72.69it/s]
[32m2025-09-28 16:01:51.293[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m209[0m - [1mEpoch 0 train 2.8112 test 2.4610 metric ['0.1156'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:01<00:00, 72.90it/s]
[32m2025-09-28 16:01:52.545[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m209[0m - [1mEpoch 1 train 2.3338 test 2.0678 metric ['0.2375'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:01<00:00, 67.95it/s]
[32m2025-09-28 16:01:53.887[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mrepor

Adding conv1d layer

In [24]:
import torch
from torch import nn, Tensor
from dataclasses import dataclass

@dataclass
class ModelConfig:
    input_size: int          # Features per timestep (for Conv1d, becomes in_channels)
    hidden_size: int
    output_size: int
    dropout: float
    num_layers: int
    conv_out_channels: int   # Number of filters in Conv1d
    conv_kernel_size: int    # Width of each filter

class LSTMWithConv1D(nn.Module):
    def __init__(self, config: ModelConfig):
        super().__init__()
        self.config = config

        # Conv1d: (batch, in_channels=input_size, seq_len) -> (batch, out_channels, new_seq_len)
        self.conv1 = nn.Conv1d(
            in_channels=config.input_size,
            out_channels=config.conv_out_channels,
            kernel_size=config.conv_kernel_size,
            padding=config.conv_kernel_size // 2  # to keep same length
        )

        # LSTM will take conv_out_channels as input_size
        self.rnn = nn.LSTM(
            input_size=config.conv_out_channels,
            hidden_size=config.hidden_size,
            num_layers=config.num_layers,
            dropout=config.dropout if config.num_layers > 1 else 0.0,
            batch_first=True
        )

        self.linear = nn.Linear(config.hidden_size, config.output_size)

    def forward(self, x: Tensor) -> Tensor:
        # x shape: (batch_size, seq_len, input_size)
        x = x.transpose(1, 2)                          # -> (batch, input_size, seq_len)
        x = self.conv1(x)                              # -> (batch, conv_out_channels, seq_len)
        x = x.transpose(1, 2)                          # -> (batch, seq_len, conv_out_channels)
        x, _ = self.rnn(x)                             # -> (batch, seq_len, hidden_size)
        last_step = x[:, -1, :]                        # -> (batch, hidden_size)
        yhat = self.linear(last_step)                  # -> (batch, output_size)
        return yhat

In [32]:
import mlflow
from datetime import datetime

settings = TrainerSettings(
    epochs=35, # increase this to about 100 for training
    metrics=[accuracy],
    logdir=Path("gestures"),
    train_steps=len(train),
    valid_steps=len(valid),
    reporttypes=[ReportTypes.TOML, ReportTypes.TENSORBOARD, ReportTypes.MLFLOW],
    scheduler_kwargs={"factor": 0.5, "patience": 5},
    earlystop_kwargs = {
        "save": False, # save every best model, and restore the best one
        "verbose": True,
        "patience": 2, #5 # number of epochs with no improvement after which training will be stopped
        "delta": 0.0, # minimum change to be considered an improvement
    }
)

mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("gestures")
modeldir = Path("gestures").resolve()
if not modeldir.exists():
    modeldir.mkdir(parents=True)

with mlflow.start_run():
    mlflow.set_tag("model", "Conv_LSTM_h64_d01")
    mlflow.set_tag("dev", "Eline")
    config = ModelConfig(
        input_size=3,           # original feature size per timestep
        hidden_size=64,
        output_size=20,
        dropout=0.1,
        num_layers=3,#1,2,3
        conv_out_channels=32, #16,32,64   # number of filters in Conv1d
        conv_kernel_size=3       # size of filters
    )
    mlflow.log_param("hidden_size", f"{config.hidden_size}")
    mlflow.log_param("dropout", f"{config.dropout}")
    mlflow.log_param("num_layers", f"{config.num_layers}")
    mlflow.log_param("epochs", f"{settings.epochs}")
    mlflow.log_param("conv_out_channels", f"{config.conv_out_channels}")
    mlflow.log_param("conv_kernel_size", f"{config.conv_kernel_size}")
    mlflow.log_param("patience", f"{settings.earlystop_kwargs['patience']}")
    mlflow.log_param("delta", f"{settings.earlystop_kwargs['delta']}")

    model = LSTMWithConv1D(config)

    trainer = Trainer(
        model=model,
        settings=settings,
        loss_fn=loss_fn,
        optimizer=optim.Adam,
        traindataloader=trainstreamer,
        validdataloader=validstreamer,
        scheduler=optim.lr_scheduler.ReduceLROnPlateau,
        device=device,
    )
    trainer.loop()

    if not settings.earlystop_kwargs["save"]:
        tag = datetime.now().strftime("%Y%m%d-%H%M-")
        modelpath = modeldir / (tag + "model.pt")
        torch.save(model, modelpath)
mlflow.end_run()

[32m2025-09-28 17:36:24.451[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m24[0m - [1mLogging to gestures\20250928-173624[0m
[32m2025-09-28 17:36:24.451[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m68[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:00<00:00, 89.47it/s]
[32m2025-09-28 17:36:25.492[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m209[0m - [1mEpoch 0 train 2.7289 test 2.4243 metric ['0.1531'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:00<00:00, 95.32it/s]
[32m2025-09-28 17:36:26.464[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m209[0m - [1mEpoch 1 train 2.3065 test 2.1722 metric ['0.2219'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:00<00:00, 94.94it/s]
[32m2025-09-28 17:36:27.449[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mrepor

In [33]:
import torch
from torch import nn, Tensor
from dataclasses import dataclass

@dataclass
class ModelConfig:
    input_size: int             # Number of input features per timestep (for Conv1d: in_channels)
    hidden_size: int
    output_size: int
    dropout: float
    num_layers: int
    conv_out_channels: int      # Number of filters in Conv1d
    conv_kernel_size: int       # Width of Conv1d filters

class GRUWithConv1D(nn.Module):
    def __init__(self, config: ModelConfig):
        super().__init__()
        self.config = config

        # 1D Convolution to extract local temporal features
        self.conv1 = nn.Conv1d(
            in_channels=config.input_size,
            out_channels=config.conv_out_channels,
            kernel_size=config.conv_kernel_size,
            padding=config.conv_kernel_size // 2  # preserve sequence length
        )

        # GRU to model temporal dependencies on extracted features
        self.rnn = nn.GRU(
            input_size=config.conv_out_channels,
            hidden_size=config.hidden_size,
            num_layers=config.num_layers,
            dropout=config.dropout if config.num_layers > 1 else 0.0,
            batch_first=True
        )

        # Linear output layer
        self.linear = nn.Linear(config.hidden_size, config.output_size)

    def forward(self, x: Tensor) -> Tensor:
        # x shape: (batch_size, seq_len, input_size)
        x = x.transpose(1, 2)              # -> (batch, input_size, seq_len)
        x = self.conv1(x)                  # -> (batch, conv_out_channels, seq_len)
        x = x.transpose(1, 2)              # -> (batch, seq_len, conv_out_channels)
        x, _ = self.rnn(x)                 # -> (batch, seq_len, hidden_size)
        last_step = x[:, -1, :]            # -> (batch, hidden_size)
        yhat = self.linear(last_step)      # -> (batch, output_size)
        return yhat

In [42]:
import mlflow
from datetime import datetime

settings = TrainerSettings(
    epochs=35, # increase this to about 100 for training
    metrics=[accuracy],
    logdir=Path("gestures"),
    train_steps=len(train),
    valid_steps=len(valid),
    reporttypes=[ReportTypes.TOML, ReportTypes.TENSORBOARD, ReportTypes.MLFLOW],
    scheduler_kwargs={"factor": 0.5, "patience": 5},
    earlystop_kwargs = {
        "save": False, # save every best model, and restore the best one
        "verbose": True,
        "patience": 2, #2,5 # number of epochs with no improvement after which training will be stopped
        "delta": 0.0, # minimum change to be considered an improvement
    }
)

mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("gestures")
modeldir = Path("gestures").resolve()
if not modeldir.exists():
    modeldir.mkdir(parents=True)

with mlflow.start_run():
    mlflow.set_tag("model", "Conv_GRU_h64_d01")
    mlflow.set_tag("dev", "Eline")
    config = ModelConfig(
        input_size=3,           # original feature size per timestep
        hidden_size=64,
        output_size=20,
        dropout=0.1,
        num_layers=3,#1,2
        conv_out_channels=16,  #16,32,64  # number of filters in Conv1d
        conv_kernel_size=3       # size of filters
    )
    mlflow.log_param("hidden_size", f"{config.hidden_size}")
    mlflow.log_param("dropout", f"{config.dropout}")
    mlflow.log_param("num_layers", f"{config.num_layers}")
    mlflow.log_param("epochs", f"{settings.epochs}")
    mlflow.log_param("conv_out_channels", f"{config.conv_out_channels}")
    mlflow.log_param("conv_kernel_size", f"{config.conv_kernel_size}")
    mlflow.log_param("patience", f"{settings.earlystop_kwargs['patience']}")
    mlflow.log_param("delta", f"{settings.earlystop_kwargs['delta']}")

    model = LSTMWithConv1D(config)

    trainer = Trainer(
        model=model,
        settings=settings,
        loss_fn=loss_fn,
        optimizer=optim.Adam,
        traindataloader=trainstreamer,
        validdataloader=validstreamer,
        scheduler=optim.lr_scheduler.ReduceLROnPlateau,
        device=device,
    )
    trainer.loop()

    if not settings.earlystop_kwargs["save"]:
        tag = datetime.now().strftime("%Y%m%d-%H%M-")
        modelpath = modeldir / (tag + "model.pt")
        torch.save(model, modelpath)
mlflow.end_run()

[32m2025-09-28 17:43:39.281[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m24[0m - [1mLogging to gestures\20250928-174339[0m
[32m2025-09-28 17:43:39.282[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m68[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:00<00:00, 90.74it/s]
[32m2025-09-28 17:43:40.305[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m209[0m - [1mEpoch 0 train 2.7152 test 2.4743 metric ['0.1422'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:00<00:00, 94.84it/s]
[32m2025-09-28 17:43:41.280[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m209[0m - [1mEpoch 1 train 2.2197 test 1.9629 metric ['0.2953'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:00<00:00, 90.18it/s]
[32m2025-09-28 17:43:42.298[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mrepor

In [None]:
# Testing if there are results in the .db file
from mlflow.tracking import MlflowClient

client = MlflowClient()
experiment = client.get_experiment_by_name("gestures")
runs = client.search_runs(experiment_ids=[experiment.experiment_id])

for run in runs:
    print(f"Run ID: {run.info.run_id}")
    print("Params:", run.data.params)
    print("Metrics:", run.data.metrics)
    print("Artifacts:", client.list_artifacts(run.info.run_id))


Run ID: 9ceaf96308b34d43ace52a1734092dcd
Params: {}
Metrics: {'Loss/train': 1.4433135986328125, 'Loss/test': 1.3747017323970794, 'metric/Accuracy': 0.5390625, 'learning_rate': 0.001}
Artifacts: []
Run ID: da3fccc21ee941fe9a8bddc9f5ecda52
Params: {'hidden_size': '64', 'dropout': '0.1', 'epochs': '3', 'patience': '5', 'delta': '0.0'}
Metrics: {'Loss/train': 2.1556239128112793, 'Loss/test': 2.0602210998535155, 'metric/Accuracy': 0.23125, 'learning_rate': 0.001}
Artifacts: []
Run ID: 01e4bedce8ec464e952a6250b6958edb
Params: {'hidden_size': '64', 'dropout': '0.1', 'epochs': '3', 'patience': '5', 'delta': '0.0'}
Metrics: {'Loss/train': 2.1349799633026123, 'Loss/test': 1.9992321670055389, 'metric/Accuracy': 0.29375, 'learning_rate': 0.001}
Artifacts: []
