Nabouwen voor BaseRNN model van Gestures datasets

Download data
Preprocess data

In [1]:
from mads_datasets import DatasetFactoryProvider, DatasetType
from mltrainer.preprocessors import PaddedPreprocessor
preprocessor = PaddedPreprocessor()

gesturesdatasetfactory = DatasetFactoryProvider.create_factory(DatasetType.GESTURES)
streamers = gesturesdatasetfactory.create_datastreamer(batchsize=32, preprocessor=preprocessor)
train = streamers["train"]
valid = streamers["valid"]

[32m2024-12-05 15:48:12.757[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m121[0m - [1mFolder already exists at /home/azureuser/.cache/mads_datasets/gestures[0m
100%|[38;2;30;71;6m██████████[0m| 2600/2600 [00:15<00:00, 163.01it/s]
100%|[38;2;30;71;6m██████████[0m| 651/651 [00:03<00:00, 181.23it/s]


In [2]:
from pathlib import Path
from mltrainer import TrainerSettings, ReportTypes
from mltrainer.metrics import Accuracy

accuracy = Accuracy()

settings = TrainerSettings(
    epochs=20,
    metrics=[accuracy],
    logdir=Path("gestures"),
    train_steps=len(train),
    valid_steps=len(valid),
    reporttypes=[ReportTypes.GIN, ReportTypes.TENSORBOARD, ReportTypes.MLFLOW],
    scheduler_kwargs={"factor": 0.5, "patience": 5},
    earlystop_kwargs=None
)
settings

[32m2024-12-05 15:50:26.045[0m | [1mINFO    [0m | [36mmltrainer.settings[0m:[36mcheck_path[0m:[36m61[0m - [1mCreated logdir /home/azureuser/MachineLearning/notebooks/3_recurrent_networks/gestures[0m


epochs: 20
metrics: [Accuracy]
logdir: gestures
train_steps: 81
valid_steps: 20
reporttypes: [<ReportTypes.GIN: 1>, <ReportTypes.TENSORBOARD: 2>, <ReportTypes.MLFLOW: 3>]
optimizer_kwargs: {'lr': 0.001, 'weight_decay': 1e-05}
scheduler_kwargs: {'factor': 0.5, 'patience': 5}
earlystop_kwargs: None

In [3]:
len(train), len(valid)

(81, 20)

In [13]:
import gin
from mltrainer import rnn_models, Trainer

# Laad de configuratie
gin.parse_config_file("gestures.gin")

# Voorbeeld van het gebruik van de configuratie
model = rnn_models.BaseRNN()

# Voorbeeld van het gebruik van trainstreamer en validstreamer
trainstreamer = train.stream()
validstreamer = valid.stream()
x, y = next(iter(trainstreamer))
x.shape, y

(torch.Size([32, 39, 3]),
 tensor([10, 14, 12, 18,  7,  2, 18,  0, 17, 17, 14,  5,  6,  5, 12,  5,  5, 13,
          6,  0,  0,  2, 10, 14,  2, 17,  4,  3,  6,  2,  6,  6]))

Fill the gestures.gin file with relevant settings for `input_size`, `hidden_size`, `num_layers` and `horizon` (which, in our case, will be the number of classes...)

In [14]:
gin.get_bindings("BaseRNN")

{'input_size': 3, 'hidden_size': 64, 'num_layers': 5, 'horizon': 20}

Size model

In [15]:
yhat = model(x)
yhat.shape

torch.Size([32, 20])

Test accuracy

In [16]:
accuracy(y, yhat)

tensor(0.0312)

import torch

In [18]:
import torch
if torch.backends.mps.is_available() and torch.backends.mps.is_built():
    device = torch.device("mps")
    print("Using MPS")
elif torch.cuda.is_available():
    device = "cuda:0"
    print("using cuda")
else:
    device = "cpu"
    print("using cpu")

# on my mac, at least for the BaseRNN model, mps does not speed up training
# probably because the overhead of copying the data to the GPU is too high
# however, it might speed up training for larger models, with more parameters
device = "cpu"

using cpu


BaseRNN model:

In [20]:
import mlflow
from datetime import datetime
from torch import optim

mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("gestures")
modeldir = Path("../../models/gestures/").resolve()
if not modeldir.exists():
    modeldir.mkdir(parents=True)

gin.parse_config_file("gestures.gin")

with mlflow.start_run():
    mlflow.set_tag("model", "GRUmodel")
    mlflow.set_tag("dev", "raoul")
    mlflow.log_params(gin.get_bindings("BaseRNN"))

    model = rnn_models.BaseRNN()
    loss_fn = torch.nn.CrossEntropyLoss()
    
    trainer = Trainer(
        model=model,
        settings=settings,
        loss_fn=loss_fn,
        optimizer=optim.Adam,
        traindataloader=trainstreamer,
        validdataloader=validstreamer,
        scheduler=optim.lr_scheduler.ReduceLROnPlateau,
        device=device,
    )
    trainer.loop()

    tag = datetime.now().strftime("%Y%m%d-%H%M")
    modelpath = modeldir / (tag + "model.pt")
    torch.save(model, modelpath)

[32m2024-12-05 16:25:10.564[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m29[0m - [1mLogging to gestures/20241205-162510[0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:03<00:00, 26.92it/s]
[32m2024-12-05 16:25:13.895[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m191[0m - [1mEpoch 0 train 2.6961 test 2.5570 metric ['0.0875'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:02<00:00, 27.22it/s]
[32m2024-12-05 16:25:17.166[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m191[0m - [1mEpoch 1 train 2.5052 test 2.5581 metric ['0.0906'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:02<00:00, 29.26it/s]
[32m2024-12-05 16:25:20.219[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m191[0m - [1mEpoch 2 train 2.4980 test 2.4794 metric ['0.1047'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:02<00:00, 27.29it/s]
[32m2024-12-05 16:25:23.518[0m | [1mINFO    [0m |

In [21]:
settings

epochs: 20
metrics: [Accuracy]
logdir: gestures
train_steps: 81
valid_steps: 20
reporttypes: [<ReportTypes.GIN: 1>, <ReportTypes.TENSORBOARD: 2>, <ReportTypes.MLFLOW: 3>]
optimizer_kwargs: {'lr': 0.001, 'weight_decay': 1e-05}
scheduler_kwargs: {'factor': 0.5, 'patience': 5}
earlystop_kwargs: None

In [22]:
model

BaseRNN(
  (rnn): RNN(3, 64, num_layers=5, batch_first=True)
  (linear): Linear(in_features=64, out_features=20, bias=True)
)