In [1]:
from pathlib import Path
import gin
import numpy as np
import torch
from typing import List
from torch.nn.utils.rnn import pad_sequence
from mltrainer import rnn_models, Trainer
from torch import optim

from mads_datasets import datatools

# 1 Iterators
We will be using an interesting dataset. [link](https://tev.fbk.eu/resources/smartwatch)

From the site:
> The SmartWatch Gestures Dataset has been collected to evaluate several gesture recognition algorithms for interacting with mobile applications using arm gestures. Eight different users performed twenty repetitions of twenty different gestures, for a total of 3200 sequences. Each sequence contains acceleration data from the 3-axis accelerometer of a first generation Sony SmartWatch™, as well as timestamps from the different clock sources available on an Android device. The smartwatch was worn on the user's right wrist. 


In [2]:
from mads_datasets import DatasetFactoryProvider, DatasetType
from mltrainer.preprocessors import PaddedPreprocessor
preprocessor = PaddedPreprocessor()

gesturesdatasetfactory = DatasetFactoryProvider.create_factory(DatasetType.GESTURES)
streamers = gesturesdatasetfactory.create_datastreamer(batchsize=32, preprocessor=preprocessor)
train = streamers["train"]
valid = streamers["valid"]

[32m2024-11-17 20:55:27.900[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m121[0m - [1mFolder already exists at C:\Users\dilek\.cache\mads_datasets\gestures[0m
100%|[38;2;30;71;6m█████████████████████████████████████████████████████████████████████████████[0m| 2600/2600 [00:12<00:00, 205.36it/s][0m
100%|[38;2;30;71;6m███████████████████████████████████████████████████████████████████████████████[0m| 651/651 [00:03<00:00, 206.59it/s][0m


In [3]:
len(train), len(valid)

(81, 20)

In [4]:
trainstreamer = train.stream()
validstreamer = valid.stream()
x, y = next(iter(trainstreamer))
x.shape, y

(torch.Size([32, 30, 3]),
 tensor([13,  7,  6, 18,  8, 13,  6,  5,  5, 19, 19, 15, 14, 16, 11,  8, 12,  1,
         12, 16,  9,  3, 11, 11,  3,  1,  8,  2, 10,  2, 12,  9]))

Depending on how the data was split, the sequence length can vary between batches. This can especially happen when the last few observations in the dataset don't fit into a window of fixed size. PaddedPreprocessor() has already added to standardize the length of sequences by adding padding.

Can you make sense of the shape?
What does it mean that the shapes are sometimes (32, 27, 3), but a second time might look like (32, 30, 3)? In other words, the second (or first, if you insist on starting at 0) dimension changes. Why is that? How does the model handle this? Do you think this is already padded, or still has to be padded?


# 2 Excercises
Lets test a basemodel, and try to improve upon that.

Fill the gestures.gin file with relevant settings for `input_size`, `hidden_size`, `num_layers` and `horizon` (which, in our case, will be the number of classes...)

As a rule of thumbs: start lower than you expect to need!

In [5]:
from mltrainer import TrainerSettings, ReportTypes
from mltrainer.metrics import Accuracy

accuracy = Accuracy()

settings = TrainerSettings(
    epochs=50,
    metrics=[accuracy],
    logdir=Path("gestures"),
    train_steps=len(train),
    valid_steps=len(valid),
    reporttypes=[ReportTypes.GIN, ReportTypes.TENSORBOARD, ReportTypes.MLFLOW],
    scheduler_kwargs={"factor": 0.5, "patience": 5},
    earlystop_kwargs=None
)
settings

[32m2024-11-17 20:56:45.411[0m | [1mINFO    [0m | [36mmltrainer.settings[0m:[36mcheck_path[0m:[36m61[0m - [1mCreated logdir C:\Users\dilek\desktop\Advanced_AI_Applications_WS24-25_MADS_HSRW\notebooks\3_recurrent_networks\gestures[0m


epochs: 50
metrics: [Accuracy]
logdir: gestures
train_steps: 81
valid_steps: 20
reporttypes: [<ReportTypes.GIN: 1>, <ReportTypes.TENSORBOARD: 2>, <ReportTypes.MLFLOW: 3>]
optimizer_kwargs: {'lr': 0.001, 'weight_decay': 1e-05}
scheduler_kwargs: {'factor': 0.5, 'patience': 5}
earlystop_kwargs: None

In [6]:
gin.parse_config_file("gestures.gin")
model = rnn_models.BaseRNN()

In [7]:
gin.get_bindings("BaseRNN")

{'input_size': 3, 'hidden_size': 128, 'num_layers': 3, 'horizon': 20}

Test the model. What is the output shape you need? Remember, we are doing classification!

Since we are trying to classifyy 20 different gestures, expected output shape is 20.

In [15]:
yhat = model(x)
yhat.shape

torch.Size([32, 20])

Test the accuracy

In [9]:
accuracy(y, yhat)

tensor(0.0625)

What do you think of the accuracy? What would you expect from blind guessing?

Check shape of `y` and `yhat`

In [10]:
yhat.shape, y.shape

(torch.Size([32, 20]), torch.Size([32]))

And look at the output of yhat

In [11]:
yhat[0]

tensor([ 0.0776,  0.0487, -0.0732,  0.0234,  0.0681,  0.1330, -0.0249, -0.0859,
         0.0041,  0.0750,  0.0301,  0.1152, -0.0414, -0.0012, -0.0449,  0.0471,
         0.0242,  0.0269, -0.1087, -0.0442], grad_fn=<SelectBackward0>)

Does this make sense to you? If you are unclear, go back to the classification problem with the MNIST, where we had 10 classes.

We have a classification problem, so we need Cross Entropy Loss.
Remember, [this has a softmax built in](https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html) 

In [12]:
loss_fn = torch.nn.CrossEntropyLoss()
loss = loss_fn(yhat, y)
loss

tensor(2.9927, grad_fn=<NllLossBackward0>)

In [13]:
gin.get_bindings("BaseRNN")

{'input_size': 3, 'hidden_size': 128, 'num_layers': 3, 'horizon': 20}

In [14]:
import torch
if torch.backends.mps.is_available() and torch.backends.mps.is_built():
    device = torch.device("mps")
    print("Using MPS")
elif torch.cuda.is_available():
    device = "cuda:0"
    print("using cuda")
else:
    device = "cpu"
    print("using cpu")

# on my mac, at least for the BaseRNN model, mps does not speed up training
# probably because the overhead of copying the data to the GPU is too high
# however, it might speed up training for larger models, with more parameters
device = "cpu"

using cpu


In [18]:
import mlflow
from datetime import datetime
from mltrainer import rnn_models, Trainer, TrainerSettings
import gin
from pathlib import Path
import torch.optim as optim
import torch.nn as nn

mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("gestures")
modeldir = Path("../../models/gestures/").resolve()
if not modeldir.exists():
    modeldir.mkdir(parents=True)

# Load GRU configuration file
gin.parse_config_file('gestures_gru.gin')

with mlflow.start_run():
    mlflow.set_tag("model", "GRUmodel")
    mlflow.set_tag("dev", "raoul")
    mlflow.log_params(gin.get_bindings("BaseRNN"))

    # Create and initialize GRU model
    model = rnn_models.GRUmodel()

    trainer = Trainer(
        model=model,
        settings=settings,
        loss_fn=nn.CrossEntropyLoss(),
        optimizer=optim.Adam,
        traindataloader=trainstreamer,
        validdataloader=validstreamer,
        scheduler=optim.lr_scheduler.ReduceLROnPlateau,
        device=device,
    )
    trainer.loop()

    tag = datetime.now().strftime("%Y%m%d-%H%M")
    modelpath = modeldir / (tag + "model.pt")
    torch.save(model, modelpath)
mlflow.end_run()    

[32m2024-11-17 21:39:20.634[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m29[0m - [1mLogging to gestures\20241117-213920[0m
  0%|[38;2;30;71;6m                                                                                           [0m| 0/50 [00:00<?, ?it/s][0m
  0%|[38;2;30;71;6m                                                                                           [0m| 0/81 [00:00<?, ?it/s][0m[A
  6%|[38;2;30;71;6m█████                                                                              [0m| 5/81 [00:00<00:01, 43.07it/s][0m[A
 14%|[38;2;30;71;6m███████████▏                                                                      [0m| 11/81 [00:00<00:01, 48.10it/s][0m[A
 21%|[38;2;30;71;6m█████████████████▏                                                                [0m| 17/81 [00:00<00:01, 52.32it/s][0m[A
 28%|[38;2;30;71;6m███████████████████████▎                                                          [0m|

Try to update the code above with the following two commands.
    
```python
gin.parse_config_file('gestures_gru.gin')
model = rnn_model.GRUmodel()
```

To discern between the changes, also modify the tag mlflow.set_tag("model", "new-tag-here") where you add
a new tag of your choice. This way you can keep the models apart.

In [19]:
mlflow.end_run()

In addition to above commands, Loss function has also updated before training. (loss_fn=nn.CrossEntropyLoss())

After training, our accuracy metric is around 83%. To improve the accuracy we can change the current hidden size 16 to 32 or 64 this will allow our model to learn more complex patterns. 

In [22]:
import mlflow
from datetime import datetime
from mltrainer import rnn_models, Trainer, TrainerSettings
import gin
from pathlib import Path
import torch.optim as optim
import torch.nn as nn

mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("gestures")
modeldir = Path("../../models/gestures/").resolve()
if not modeldir.exists():
    modeldir.mkdir(parents=True)

# Load GRU configuration file
gin.parse_config_file('gestures_gru_tuning.gin')

with mlflow.start_run():
    mlflow.set_tag("model", "GRUmodel")
    mlflow.set_tag("dev", "raoul")
    mlflow.log_params(gin.get_bindings("BaseRNN"))

    # Create and initialize GRU model
    model = rnn_models.GRUmodel()

    trainer = Trainer(
        model=model,
        settings=settings,
        loss_fn=nn.CrossEntropyLoss(),
        optimizer=optim.Adam,
        traindataloader=trainstreamer,
        validdataloader=validstreamer,
        scheduler=optim.lr_scheduler.ReduceLROnPlateau,
        device=device,
    )
    trainer.loop()

    tag = datetime.now().strftime("%Y%m%d-%H%M")
    modelpath = modeldir / (tag + "model.pt")
    torch.save(model, modelpath)
mlflow.end_run()    

[32m2024-11-17 22:28:47.016[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m29[0m - [1mLogging to gestures\20241117-222847[0m
  0%|[38;2;30;71;6m                                                                                           [0m| 0/50 [00:00<?, ?it/s][0m
  0%|[38;2;30;71;6m                                                                                           [0m| 0/81 [00:00<?, ?it/s][0m[A
  2%|[38;2;30;71;6m██                                                                                 [0m| 2/81 [00:00<00:04, 17.68it/s][0m[A
  6%|[38;2;30;71;6m█████                                                                              [0m| 5/81 [00:00<00:03, 21.81it/s][0m[A
 10%|[38;2;30;71;6m████████▏                                                                          [0m| 8/81 [00:00<00:03, 20.48it/s][0m[A
 14%|[38;2;30;71;6m███████████▏                                                                      [0m|

Excercises:

- improve the RNN model
- test different things. What works? What does not?
- experiment with either GRU or LSTM layers, create your own models + ginfiles. 
- experiment with adding Conv1D layers.

You should be able to get above 90% accuracy with the dataset.

# Increasing the hidden_size from 16 to 64 significantly improved the accuracy metric (above the 90%). 

If we tried to add Conv1D layer:

In [23]:
class GRUmodel(nn.Module):
    def __init__(self, config):
        super(GRUmodel, self).__init__()
        self.config = config
        self.conv1d = nn.Conv1d(
            in_channels=config["input_size"],  
            out_channels=16, 
            kernel_size=3,  
            padding=1  
        )
        self.gru = nn.GRU(
            input_size=16,  
            hidden_size=config["hidden_size"],
            num_layers=config["num_layers"],
            dropout=config["dropout"],
            batch_first=True
        )
        self.fc = nn.Linear(config["hidden_size"], config["output_size"])

    def forward(self, x):
        x = x.permute(0, 2, 1)  
        x = self.conv1d(x)
        x = x.permute(0, 2, 1)  
        x, _ = self.gru(x)
        x = x[:, -1, :] 
        x = self.fc(x)
        return x


In [24]:
import mlflow
from datetime import datetime
from mltrainer import rnn_models, Trainer, TrainerSettings
import gin
from pathlib import Path
import torch.optim as optim
import torch.nn as nn

mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("gestures")
modeldir = Path("../../models/gestures/").resolve()
if not modeldir.exists():
    modeldir.mkdir(parents=True)

# Load GRU configuration file
gin.parse_config_file('gestures_gru.gin')

with mlflow.start_run():
    mlflow.set_tag("model", "GRUmodel_with_Conv1D")
    mlflow.set_tag("dev", "raoul")
    mlflow.log_params(gin.get_bindings("BaseRNN"))

    # Create and initialize GRU model with Conv1D
    model = rnn_models.GRUmodel()

    trainer = Trainer(
        model=model,
        settings=settings,
        loss_fn=nn.CrossEntropyLoss(),
        optimizer=optim.Adam,
        traindataloader=trainstreamer,
        validdataloader=validstreamer,
        scheduler=optim.lr_scheduler.ReduceLROnPlateau,
        device=device,
    )
    trainer.loop()

    tag = datetime.now().strftime("%Y%m%d-%H%M")
    modelpath = modeldir / (tag + "model.pt")
    torch.save(model, modelpath)
mlflow.end_run()

[32m2024-11-17 22:51:15.975[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m29[0m - [1mLogging to gestures\20241117-225115[0m
  0%|[38;2;30;71;6m                                                                                           [0m| 0/50 [00:00<?, ?it/s][0m
  0%|[38;2;30;71;6m                                                                                           [0m| 0/81 [00:00<?, ?it/s][0m[A
  6%|[38;2;30;71;6m█████                                                                              [0m| 5/81 [00:00<00:01, 45.93it/s][0m[A
 12%|[38;2;30;71;6m██████████                                                                        [0m| 10/81 [00:00<00:01, 43.20it/s][0m[A
 20%|[38;2;30;71;6m████████████████▏                                                                 [0m| 16/81 [00:00<00:01, 47.33it/s][0m[A
 26%|[38;2;30;71;6m█████████████████████▎                                                            [0m|

Our accuracy reduced drastically, when we add Conv1D layer to the model. If we tried to change some parameters: 

In [30]:
class GRUmodel(nn.Module):
    def __init__(self, config):
        super(GRUmodel, self).__init__()
        self.config = config
        self.conv1d = nn.Conv1d(
            in_channels=config["input_size"],
            out_channels=32,  # Increased output channels
            kernel_size=3,
            padding=1
        )
        self.gru = nn.GRU(
            input_size=32,
            hidden_size=64,  # Increased hidden size
            num_layers=config["num_layers"],
            dropout=0.3,  # Adjusted dropout
            batch_first=True
        )
        self.fc = nn.Linear(64, config["output_size"]) 

    def forward(self, x):
        x = x.permute(0, 2, 1)
        x = self.conv1d(x)
        x = nn.ReLU()(x)  # Added Non-Linear Activation
        x = x.permute(0, 2, 1)
        x, _ = self.gru(x)
        x = x[:, -1, :]
        x = self.fc(x)
        return x


In [31]:
import mlflow
from datetime import datetime
from mltrainer import rnn_models, Trainer, TrainerSettings
import gin
from pathlib import Path
import torch.optim as optim
import torch.nn as nn

mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("gestures")
modeldir = Path("../../models/gestures/").resolve()
if not modeldir.exists():
    modeldir.mkdir(parents=True)

# Load GRU configuration file
gin.parse_config_file('gestures_gru.gin')

with mlflow.start_run():
    mlflow.set_tag("model", "GRUmodel_with_Conv1D")
    mlflow.set_tag("dev", "raoul")
    mlflow.log_params(gin.get_bindings("BaseRNN"))

    # Create and initialize GRU model with Conv1D
    model = rnn_models.GRUmodel()

    trainer = Trainer(
        model=model,
        settings=settings,
        loss_fn=nn.CrossEntropyLoss(),
        optimizer=optim.Adam,
        traindataloader=trainstreamer,
        validdataloader=validstreamer,
        scheduler=optim.lr_scheduler.ReduceLROnPlateau,
        device=device,
    )
    trainer.loop()

    tag = datetime.now().strftime("%Y%m%d-%H%M")
    modelpath = modeldir / (tag + "model.pt")
    torch.save(model, modelpath)
mlflow.end_run()

[32m2024-11-17 23:05:52.780[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m29[0m - [1mLogging to gestures\20241117-230552[0m
  0%|[38;2;30;71;6m                                                                                           [0m| 0/50 [00:00<?, ?it/s][0m
  0%|[38;2;30;71;6m                                                                                           [0m| 0/81 [00:00<?, ?it/s][0m[A
  6%|[38;2;30;71;6m█████                                                                              [0m| 5/81 [00:00<00:01, 48.45it/s][0m[A
 12%|[38;2;30;71;6m██████████                                                                        [0m| 10/81 [00:00<00:01, 48.59it/s][0m[A
 21%|[38;2;30;71;6m█████████████████▏                                                                [0m| 17/81 [00:00<00:01, 54.02it/s][0m[A
 28%|[38;2;30;71;6m███████████████████████▎                                                          [0m|

Increasing the number of output channels, hidden size of the GRU layer, adjusting the dropout rate, and adding a ReLU activation after the Conv1D layer to introduce non-linearity improved the model's accuracy. However,  original model was performing better without the Conv1D layers. This can indicate that the added complexity may not have been necessary and that the simpler architecture was more suited to the data patterns.