In [87]:
import sys 
import os
sys.path.append(os.path.abspath('../models'))

from mads_datasets import DatasetFactoryProvider, DatasetType
from mltrainer.preprocessors import BasePreprocessor
from mltrainer import Trainer, TrainerSettings, ReportTypes, metrics
from neural_network import NeuralNetwork
from neural_network import DeepNeuralNetwork

import torch.optim as optim
import torch

import os

In [90]:
fashionfactory = DatasetFactoryProvider.create_factory(DatasetType.FASHION)
preprocessor = BasePreprocessor()

batchsize = 64

streamers = fashionfactory.create_datastreamer(batchsize=batchsize, preprocessor=preprocessor)
train = streamers["train"]
valid = streamers["valid"]

trainstreamer = train.stream()
validstreamer = valid.stream()

accuracy = metrics.Accuracy()
loss_func = torch.nn.CrossEntropyLoss()

[32m2025-05-02 12:15:32.349[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m121[0m - [1mFolder already exists at /home/azureuser/.cache/mads_datasets/fashionmnist[0m
[32m2025-05-02 12:15:32.350[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m124[0m - [1mFile already exists at /home/azureuser/.cache/mads_datasets/fashionmnist/fashionmnist.pt[0m


In [None]:
units1, units2 = 512, 512
epochs = 20
results = []

settings = TrainerSettings(
    epochs=epochs,
    metrics=[accuracy],
    logdir="modellogs",
    train_steps=100,
    valid_steps=100,
    reporttypes=[ReportTypes.TENSORBOARD, ReportTypes.TOML]
)

model = NeuralNetwork (num_classes=10, units1=units1, units2=units2)
settings.logdir = f"modellogs/BATCHTEST/u{units1}_u{units2}_e{epochs}_b{batchsize}"

trainer = Trainer(
    model=model,
    settings=settings,
    loss_fn=loss_func,
    optimizer=optim.Adam,
    traindataloader=trainstreamer,
    validdataloader=validstreamer,
    scheduler=optim.lr_scheduler.ReduceLROnPlateau
)

trainer.loop()

[32m2025-05-02 11:35:21.651[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m24[0m - [1mLogging to modellogs/BATCHTEST/u512_u512_e20_b64/20250502-113521[0m
[32m2025-05-02 11:35:21.652[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m68[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m
100%|[38;2;30;71;6m██████████[0m| 100/100 [00:00<00:00, 186.68it/s]
[32m2025-05-02 11:35:22.456[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m205[0m - [1mEpoch 0 train 0.8585 test 0.6283 metric ['0.7692'][0m
100%|[38;2;30;71;6m██████████[0m| 100/100 [00:00<00:00, 182.53it/s]
[32m2025-05-02 11:35:23.269[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m205[0m - [1mEpoch 2 train 0.5384 test 0.5215 metric ['0.8153'][0m
100%|[38;2;30;71;6m██████████[0m| 100/100 [00:00<00:00, 188.49it/s]
[32m2025-05-02 11:35:24.063[0m | [1mINFO    [0m |

KeyboardInterrupt: 

In [75]:
units1, units2 = 128, 128
epochs = 10
results = []

settings = TrainerSettings(
    epochs=epochs,
    metrics=[accuracy],
    logdir="modellogs",
    train_steps=100,
    valid_steps=100,
    reporttypes=[ReportTypes.TENSORBOARD, ReportTypes.TOML]
)

model = NeuralNetwork (num_classes=10, units1=units1, units2=units2)
settings.logdir = f"modellogs/UNITS/u{units1}_u{units2}_e{epochs}_b{batchsize}"

trainer = Trainer(
    model=model,
    settings=settings,
    loss_fn=loss_func,
    optimizer=optim.Adam,
    traindataloader=trainstreamer,
    validdataloader=validstreamer,
    scheduler=optim.lr_scheduler.ReduceLROnPlateau
)

trainer.loop()

[32m2025-05-02 12:05:08.054[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m24[0m - [1mLogging to modellogs/UNITS/u128_u128_e10_b64/20250502-120508[0m
[32m2025-05-02 12:05:08.055[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m68[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m
100%|[38;2;30;71;6m██████████[0m| 100/100 [00:00<00:00, 292.42it/s]
[32m2025-05-02 12:05:08.624[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m205[0m - [1mEpoch 0 train 1.0343 test 0.6744 metric ['0.7517'][0m
100%|[38;2;30;71;6m██████████[0m| 100/100 [00:00<00:00, 296.72it/s]
[32m2025-05-02 12:05:09.188[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m205[0m - [1mEpoch 2 train 0.6143 test 0.5887 metric ['0.7792'][0m
100%|[38;2;30;71;6m██████████[0m| 100/100 [00:00<00:00, 293.03it/s]
[32m2025-05-02 12:05:09.758[0m | [1mINFO    [0m | [3

In [79]:
units1, units2 = 256, 64
epochs = 20
results = []

settings = TrainerSettings(
    epochs=epochs,
    metrics=[accuracy],
    logdir="modellogs",
    train_steps=100,
    valid_steps=100,
    reporttypes=[ReportTypes.TENSORBOARD, ReportTypes.TOML]
)

model = NeuralNetwork (num_classes=10, units1=units1, units2=units2)
settings.logdir = f"modellogs/EPOCHS/u{units1}_u{units2}_e{epochs}_b{batchsize}"

trainer = Trainer(
    model=model,
    settings=settings,
    loss_fn=loss_func,
    optimizer=optim.Adam,
    traindataloader=trainstreamer,
    validdataloader=validstreamer,
    scheduler=optim.lr_scheduler.ReduceLROnPlateau
)

trainer.loop()

[32m2025-05-02 12:07:24.565[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m24[0m - [1mLogging to modellogs/EPOCHS/u256_u64_e20_b64/20250502-120724[0m
[32m2025-05-02 12:07:24.567[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m68[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m
100%|[38;2;30;71;6m██████████[0m| 100/100 [00:00<00:00, 270.25it/s]
[32m2025-05-02 12:07:25.172[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m205[0m - [1mEpoch 0 train 0.9935 test 0.6735 metric ['0.7520'][0m
100%|[38;2;30;71;6m██████████[0m| 100/100 [00:00<00:00, 271.45it/s]
[32m2025-05-02 12:07:25.784[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m205[0m - [1mEpoch 2 train 0.6043 test 0.5997 metric ['0.7825'][0m
100%|[38;2;30;71;6m██████████[0m| 100/100 [00:00<00:00, 260.45it/s]
[32m2025-05-02 12:07:26.404[0m | [1mINFO    [0m | [3

In [None]:
units = [512, 512, 128]
epochs = 10
results = []

for units1 in units:
    for units2 in units:

        settings = TrainerSettings(
            epochs=epochs,
            metrics=[accuracy],
            logdir="modellogs",
            train_steps=100,
            valid_steps=100,
            reporttypes=[ReportTypes.TENSORBOARD, ReportTypes.TOML]
        )

        model = NeuralNetwork (num_classes=10, units1=units1, units2=units2)
        settings.logdir = f"modellogs/ADAM/u{units1}_u{units2}_e{epochs}"

        trainer = Trainer(
            model=model,
            settings=settings,
            loss_fn=loss_func,
            optimizer=optim.Adam,
            traindataloader=trainstreamer,
            validdataloader=validstreamer,
            scheduler=optim.lr_scheduler.ReduceLROnPlateau
        )

        trainer.loop()

        results.append({
            "units1": units1,
            "units2": units2,
            "run_dir": settings.logdir,
            "epoch": epochs,
        })

[32m2025-05-02 10:53:05.074[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m24[0m - [1mLogging to modellogs/ADAM/u512_u512_e10/20250502-105305[0m
[32m2025-05-02 10:53:05.075[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m68[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m
100%|[38;2;30;71;6m██████████[0m| 100/100 [00:00<00:00, 132.13it/s]
[32m2025-05-02 10:53:06.135[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m205[0m - [1mEpoch 0 train 0.8427 test 0.5873 metric ['0.7895'][0m
100%|[38;2;30;71;6m██████████[0m| 100/100 [00:00<00:00, 118.80it/s]
[32m2025-05-02 10:53:07.264[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m205[0m - [1mEpoch 2 train 0.5502 test 0.5200 metric ['0.8197'][0m
100%|[38;2;30;71;6m██████████[0m| 100/100 [00:00<00:00, 117.81it/s]
[32m2025-05-02 10:53:08.401[0m | [1mINFO    [0m | [36mmlt

In [18]:
units = [512, 256, 128]
epochs = 10
results = []

for units1 in units:
    for units2 in units:

        settings = TrainerSettings(
            epochs=epochs,
            metrics=[accuracy],
            logdir="modellogs",
            train_steps=100,
            valid_steps=100,
            reporttypes=[ReportTypes.TENSORBOARD, ReportTypes.TOML]
        )

        model = NeuralNetwork (num_classes=10, units1=units1, units2=units2)
        settings.logdir = f"modellogs/SGD/u{units1}_u{units2}_e{epochs}"
        
        trainer = Trainer(
            model=model,
            settings=settings,
            loss_fn=loss_func,
            optimizer=optim.SGD,
            traindataloader=trainstreamer,
            validdataloader=validstreamer,
            scheduler=optim.lr_scheduler.ReduceLROnPlateau
        )

        trainer.loop()

        results.append({
            "units1": units1,
            "units2": units2,
            "run_dir": settings.logdir,
            "epoch": epochs,
        })

[32m2025-05-02 10:46:33.907[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m24[0m - [1mLogging to modellogs/SGD/u512_u512_e10/20250502-104633[0m
[32m2025-05-02 10:46:33.908[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m68[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m
100%|[38;2;30;71;6m██████████[0m| 100/100 [00:00<00:00, 231.57it/s]
[32m2025-05-02 10:46:34.613[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m205[0m - [1mEpoch 0 train 2.2966 test 2.2905 metric ['0.1630'][0m
100%|[38;2;30;71;6m██████████[0m| 100/100 [00:00<00:00, 230.44it/s]
[32m2025-05-02 10:46:35.396[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m205[0m - [1mEpoch 2 train 2.2832 test 2.2775 metric ['0.1956'][0m
100%|[38;2;30;71;6m██████████[0m| 100/100 [00:00<00:00, 202.42it/s]
[32m2025-05-02 10:46:36.153[0m | [1mINFO    [0m | [36mmltr

In [None]:
units1 = 512
units2 = 256
units3 = 128
epochs = 20

settings = TrainerSettings(
            epochs=epochs,
            metrics=[accuracy],
            logdir="modellogs",
            train_steps=100,
            valid_steps=100,
            reporttypes=[ReportTypes.TENSORBOARD, ReportTypes.TOML]
        )

model = DeepNeuralNetwork (num_classes=10, units1=units1, units2=units2, units3 = units3)
settings.logdir = f"modellogs/DEEP/u{units1}_u{units2}_u{units3}_e{epochs}"
        
trainer = Trainer(
    model=model,
    settings=settings,
    loss_fn=loss_func,
    optimizer=optim.Adam,
    traindataloader=trainstreamer,
    validdataloader=validstreamer,
    scheduler=optim.lr_scheduler.ReduceLROnPlateau
)

trainer.loop()

resultsdeep = []

resultsdeep.append({
    "units1": units1,
    "units2": units2,
    "run_dir": settings.logdir,
    "epoch": epochs,
})

[32m2025-05-02 11:37:34.651[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m24[0m - [1mLogging to modellogs/DEEP/u512_u256_u128_e20/20250502-113734[0m
[32m2025-05-02 11:37:34.653[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m68[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m
100%|[38;2;30;71;6m██████████[0m| 100/100 [00:00<00:00, 175.84it/s]
[32m2025-05-02 11:37:35.505[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m205[0m - [1mEpoch 0 train 0.9566 test 0.6424 metric ['0.7641'][0m
100%|[38;2;30;71;6m██████████[0m| 100/100 [00:00<00:00, 186.47it/s]
[32m2025-05-02 11:37:36.309[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m205[0m - [1mEpoch 2 train 0.5678 test 0.5520 metric ['0.8002'][0m
100%|[38;2;30;71;6m██████████[0m| 100/100 [00:00<00:00, 181.61it/s]
[32m2025-05-02 11:37:37.135[0m | [1mINFO    [0m | [3