In [1]:
from mads_datasets import DatasetFactoryProvider, DatasetType
from mltrainer.preprocessors import PaddedPreprocessor
preprocessor = PaddedPreprocessor()

gesturesdatasetfactory = DatasetFactoryProvider.create_factory(DatasetType.GESTURES)
streamers = gesturesdatasetfactory.create_datastreamer(batchsize=32, preprocessor=preprocessor)
train = streamers["train"]
valid = streamers["valid"]

[32m2024-12-05 18:23:08.674[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m121[0m - [1mFolder already exists at /home/azureuser/.cache/mads_datasets/gestures[0m
100%|[38;2;30;71;6m██████████[0m| 2600/2600 [00:16<00:00, 157.07it/s]
100%|[38;2;30;71;6m██████████[0m| 651/651 [00:05<00:00, 130.15it/s]


In [2]:
from mltrainer import TrainerSettings, ReportTypes
from mltrainer.metrics import Accuracy
from pathlib import Path

accuracy = Accuracy()

settings = TrainerSettings(
    epochs=20,
    metrics=[accuracy],
    logdir=Path("gestures_gru"),
    train_steps=len(train),
    valid_steps=len(valid),
    reporttypes=[ReportTypes.GIN, ReportTypes.TENSORBOARD, ReportTypes.MLFLOW],
    scheduler_kwargs={"factor": 0.5, "patience": 5},
    earlystop_kwargs=None
)
settings

[32m2024-12-05 18:23:40.787[0m | [1mINFO    [0m | [36mmltrainer.settings[0m:[36mcheck_path[0m:[36m61[0m - [1mCreated logdir /home/azureuser/MachineLearning/notebooks/3_recurrent_networks/gestures_gru[0m


epochs: 20
metrics: [Accuracy]
logdir: gestures_gru
train_steps: 81
valid_steps: 20
reporttypes: [<ReportTypes.GIN: 1>, <ReportTypes.TENSORBOARD: 2>, <ReportTypes.MLFLOW: 3>]
optimizer_kwargs: {'lr': 0.001, 'weight_decay': 1e-05}
scheduler_kwargs: {'factor': 0.5, 'patience': 5}
earlystop_kwargs: None

In [4]:
len(train), len(valid)

(81, 20)

In [5]:
import gin
from mltrainer import rnn_models, Trainer

# Laad de configuratie
gin.parse_config_file("gestures_gru.gin")

# Voorbeeld van het gebruik van de configuratie
model = rnn_models.GRUmodel()

# Voorbeeld van het gebruik van trainstreamer en validstreamer
trainstreamer = train.stream()
validstreamer = valid.stream()
x, y = next(iter(trainstreamer))
x.shape, y

  decorated_class = decorating_meta(cls.__name__, (cls,), overrides)


(torch.Size([32, 32, 3]),
 tensor([ 8,  4,  1,  4, 14, 18,  7, 16, 19,  2,  6, 13, 19, 15, 13,  5,  0,  6,
          9, 18, 19, 12, 11,  3,  6, 11, 13,  6, 16, 16, 12, 19]))

In [6]:
gin.get_bindings("GRUmodel")["config"]

{'input_size': 3,
 'hidden_size': 16,
 'dropout': 0.5,
 'num_layers': 2,
 'output_size': 20}

Device:

In [7]:
import torch
if torch.backends.mps.is_available() and torch.backends.mps.is_built():
    device = torch.device("mps")
    print("Using MPS")
elif torch.cuda.is_available():
    device = "cuda:0"
    print("using cuda")
else:
    device = "cpu"
    print("using cpu")

# on my mac, at least for the BaseRNN model, mps does not speed up training
# probably because the overhead of copying the data to the GPU is too high
# however, it might speed up training for larger models, with more parameters
device = "cpu"

using cpu


In [11]:
import mlflow
from datetime import datetime
import torch.optim as optim

loss_fn = torch.nn.CrossEntropyLoss()

mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("gestures_gru")
modeldir = Path("../../models/gestures_gru/").resolve()
if not modeldir.exists():
    modeldir.mkdir(parents=True)

gin.parse_config_file("gestures_gru.gin")

with mlflow.start_run():
    mlflow.set_tag("model", "GRUmodel")
    mlflow.set_tag("dev", "raoul")
    mlflow.log_params(gin.get_bindings("GRUmodel")["config"])

    model = rnn_models.GRUmodel()

    trainer = Trainer(
        model=model,
        settings=settings,
        loss_fn=loss_fn,
        optimizer=optim.Adam,
        traindataloader=trainstreamer,
        validdataloader=validstreamer,
        scheduler=optim.lr_scheduler.ReduceLROnPlateau,
        device=device,
    )
    trainer.loop()

    tag = datetime.now().strftime("%Y%m%d-%H%M")
    modelpath = modeldir / (tag + "model.pt")
    torch.save(model, modelpath)

[32m2024-12-05 18:52:17.489[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m29[0m - [1mLogging to gestures_gru/20241205-185217[0m
  0%|[38;2;30;71;6m          [0m| 0/20 [00:00<?, ?it/s]

100%|[38;2;30;71;6m██████████[0m| 81/81 [00:01<00:00, 60.86it/s]
[32m2024-12-05 18:52:19.007[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m191[0m - [1mEpoch 0 train 2.9840 test 2.9427 metric ['0.1000'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:01<00:00, 61.43it/s]
[32m2024-12-05 18:52:20.502[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m191[0m - [1mEpoch 1 train 2.8442 test 2.5842 metric ['0.1125'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:01<00:00, 62.50it/s]
[32m2024-12-05 18:52:21.982[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m191[0m - [1mEpoch 2 train 2.4654 test 2.3747 metric ['0.1516'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:01<00:00, 60.58it/s]
[32m2024-12-05 18:52:23.497[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m191[0m - [1mEpoch 3 train 2.3279 test 2.2897 metric ['0.1953'][0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:01

In [12]:
model

GRUmodel(
  (rnn): GRU(3, 16, num_layers=2, batch_first=True, dropout=0.5)
  (linear): Linear(in_features=16, out_features=20, bias=True)
)

In [13]:
settings

epochs: 20
metrics: [Accuracy]
logdir: gestures_gru
train_steps: 81
valid_steps: 20
reporttypes: [<ReportTypes.GIN: 1>, <ReportTypes.TENSORBOARD: 2>, <ReportTypes.MLFLOW: 3>]
optimizer_kwargs: {'lr': 0.001, 'weight_decay': 1e-05}
scheduler_kwargs: {'factor': 0.5, 'patience': 5}
earlystop_kwargs: None

In [None]:
mlflow.end_run()