In [5]:
from mltrainer import Trainer, TrainerSettings, ReportTypes, metrics
from mads_datasets import DatasetFactoryProvider, DatasetType
from mltrainer.preprocessors import BasePreprocessor
import torch

import torch.optim as optim
optimizer = optim.Adam

import sys 
import os
sys.path.append(os.path.abspath('../models'))
sys.path.append(os.path.abspath('../dev'))

In [2]:
fashionfactory = DatasetFactoryProvider.create_factory(DatasetType.FASHION)
preprocessor = BasePreprocessor()

batchsize = 32

streamers = fashionfactory.create_datastreamer(batchsize=batchsize, preprocessor=preprocessor)
train = streamers["train"]
valid = streamers["valid"]

trainstreamer = train.stream()
validstreamer = valid.stream()


[32m2025-05-15 19:57:40.146[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m121[0m - [1mFolder already exists at /home/azureuser/.cache/mads_datasets/fashionmnist[0m
[32m2025-05-15 19:57:40.147[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m124[0m - [1mFile already exists at /home/azureuser/.cache/mads_datasets/fashionmnist/fashionmnist.pt[0m


In [None]:
from pathlib import Path
from CNN import CNN

log_dir = Path("modellog").resolve()
if not log_dir.exists():
    log_dir.mkdir(parents=True)

accuracy = metrics.Accuracy()
loss_fn = torch.nn.CrossEntropyLoss()

model = CNN(units = 128)

settings = TrainerSettings(
    epochs=100,
    metrics=[accuracy],
    logdir=log_dir,
    train_steps=len(train),
    valid_steps=len(valid),
    reporttypes=[ReportTypes.TENSORBOARD, ReportTypes.TOML],
    earlystop_kwargs={
        "save": False,
        "verbose": True,
        "patience": 100,
    },
    scheduler_kwargs={
        "factor": 0.5,
        "patience": 10
    }
)
adam = optim.Adam(model.parameters(), lr=0.01)

trainer = Trainer(
    model=model,
    settings=settings,
    loss_fn=loss_fn,
    optimizer=optimizer,
    traindataloader=trainstreamer,
    validdataloader=validstreamer,
    scheduler=optim.lr_scheduler.ReduceLROnPlateau,
    )

[32m2025-05-15 19:57:43.797[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m24[0m - [1mLogging to /home/azureuser/machinelearning-melissa/notebooks/modellog/20250515-195743[0m
[32m2025-05-15 19:57:43.798[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m68[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m


In [4]:
trainer.loop()

100%|[38;2;30;71;6m██████████[0m| 1875/1875 [00:18<00:00, 103.37it/s]
[32m2025-05-15 19:58:05.381[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m205[0m - [1mEpoch 0 train 0.4057 test 0.3302 metric ['0.8777'][0m
100%|[38;2;30;71;6m██████████[0m| 1875/1875 [00:18<00:00, 101.23it/s]
[32m2025-05-15 19:58:25.028[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m205[0m - [1mEpoch 2 train 0.2933 test 0.3051 metric ['0.8888'][0m
100%|[38;2;30;71;6m██████████[0m| 1875/1875 [00:18<00:00, 102.72it/s]
[32m2025-05-15 19:58:44.331[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m205[0m - [1mEpoch 4 train 0.2579 test 0.2958 metric ['0.8923'][0m
100%|[38;2;30;71;6m██████████[0m| 1875/1875 [00:18<00:00, 101.44it/s]
[32m2025-05-15 19:59:03.906[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m205[0m - [1mEpoch 6 train 0.2365 test 0.2831 metric ['0.8996'][0m
100%|[38;2;30;71;6m████████