In [8]:
from mads_datasets import DatasetFactoryProvider, DatasetType
from mltrainer.preprocessors import PaddedPreprocessor

import sys 
import os
sys.path.append(os.path.abspath('../networks'))
sys.path.append(os.path.abspath('../dev'))

preprocessor = PaddedPreprocessor()

gesturesdatasetfactory = DatasetFactoryProvider.create_factory(DatasetType.GESTURES)
streamers = gesturesdatasetfactory.create_datastreamer(batchsize=32, preprocessor=preprocessor)
train = streamers["train"]
valid = streamers["valid"]

trainstreamer = train.stream()
validstreamer = valid.stream()

[32m2025-06-04 17:32:45.325[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m121[0m - [1mFolder already exists at /home/azureuser/.cache/mads_datasets/gestures[0m
100%|[38;2;30;71;6m██████████[0m| 2600/2600 [00:00<00:00, 3292.10it/s]
100%|[38;2;30;71;6m██████████[0m| 651/651 [00:00<00:00, 3323.68it/s]


In [None]:
from mltrainer import TrainerSettings, ReportTypes
from mltrainer.metrics import Accuracy
import torch

accuracy = Accuracy()
loss_fn = torch.nn.CrossEntropyLoss()

In [None]:
from pathlib import Path

settings = TrainerSettings(
    epochs=10, 
    metrics=[accuracy],
    logdir=Path("gestures"),
    train_steps=len(train),
    valid_steps=len(valid),
    reporttypes=[ReportTypes.TOML, ReportTypes.TENSORBOARD, ReportTypes.MLFLOW],
    scheduler_kwargs={"factor": 0.5, "patience": 5},
    earlystop_kwargs = {
        "save": False, # save every best model, and restore the best one
        "verbose": True,
        "patience": 5, # number of epochs with no improvement after which training will be stopped
    }
)
settings

epochs: 10
metrics: [Accuracy]
logdir: gestures
train_steps: 81
valid_steps: 20
reporttypes: [<ReportTypes.TOML: 'TOML'>, <ReportTypes.TENSORBOARD: 'TENSORBOARD'>, <ReportTypes.MLFLOW: 'MLFLOW'>]
optimizer_kwargs: {'lr': 0.001, 'weight_decay': 1e-05}
scheduler_kwargs: {'factor': 0.5, 'patience': 5}
earlystop_kwargs: {'save': True, 'verbose': True, 'patience': 5}

In [7]:
from datetime import datetime

from mltrainer import Trainer
from torch import optim

from RNN import GRUmodel, ModelConfig

modeldir = Path("gestures").resolve()
if not modeldir.exists():
    modeldir.mkdir(parents=True)

config = ModelConfig(
    input_size=3, # vast
    hidden_size=128,
    num_layers=2,
    output_size=20, # vast
    dropout=0.2,
)

model = GRUmodel(
    config=config,
)

trainer = Trainer(
    model=model,
    settings=settings,
    loss_fn=loss_fn,
    optimizer=optim.Adam,
    traindataloader=trainstreamer,
    validdataloader=validstreamer,
    scheduler=optim.lr_scheduler.ReduceLROnPlateau,
)

trainer.loop()

if not settings.earlystop_kwargs["save"]:
    tag = datetime.now().strftime("%Y%m%d-%H%M-")
    modelpath = modeldir / (tag + "model.pt")
    torch.save(model, modelpath)

[32m2025-06-04 17:13:20.421[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m24[0m - [1mLogging to gestures/20250604-171320[0m
[32m2025-06-04 17:13:20.422[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m68[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:03<00:00, 26.60it/s]
[32m2025-06-04 17:13:23.656[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m209[0m - [1mEpoch 0 train 2.4774 test 2.1521 metric ['0.2281'][0m
[32m2025-06-04 17:13:23.657[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36msave_checkpoint[0m:[36m268[0m - [1mValidation loss (2.1521 --> 2.1521).Saving gestures/20250604-171320/checkpoint.pt ...[0m
100%|[38;2;30;71;6m██████████[0m| 81/81 [00:03<00:00, 26.94it/s]
[32m2025-06-04 17:13:26.873[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m209[0m - [