In [1]:
from pathlib import Path
import torch
import torch.nn as nn
from loguru import logger
import warnings
warnings.simplefilter("ignore", UserWarning)

In [2]:
from mads_datasets import DatasetFactoryProvider, DatasetType
from mltrainer.preprocessors import BasePreprocessor

for dataset in DatasetType:
    print(dataset)

DatasetType.FLOWERS
DatasetType.IMDB
DatasetType.GESTURES
DatasetType.FASHION
DatasetType.SUNSPOTS
DatasetType.IRIS
DatasetType.PENGUINS
DatasetType.FAVORITA
DatasetType.SECURE


In [3]:

fashionfactory = DatasetFactoryProvider.create_factory(DatasetType.FASHION)
batchsize = 64
preprocessor = BasePreprocessor()
streamers = fashionfactory.create_datastreamer(batchsize=batchsize, preprocessor=preprocessor)
train = streamers["train"]
valid = streamers["valid"]
trainstreamer = train.stream()
validstreamer = valid.stream()

[32m2026-01-10 17:59:48.249[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m121[0m - [1mFolder already exists at C:\Users\denni\.cache\mads_datasets\fashionmnist[0m
[32m2026-01-10 17:59:48.250[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m124[0m - [1mFile already exists at C:\Users\denni\.cache\mads_datasets\fashionmnist\fashionmnist.pt[0m


In [4]:
x, y = next(iter(trainstreamer))
x.shape, y.shape

(torch.Size([64, 1, 28, 28]), torch.Size([64]))

In [5]:
import torch
if torch.backends.mps.is_available() and torch.backends.mps.is_built():
    device = torch.device("mps")
    print("Using MPS")
elif torch.cuda.is_available():
    device = "cuda:0"
    print("using cuda")
else:
    device = "cpu"
    print("using cpu")

using cpu


In [6]:
from torch import nn
print(f"Using {device} device")

# Define model
class CNN(nn.Module):
    def __init__(self, filters, units1, units2, input_size=(32, 1, 28, 28)):
        super().__init__()
        self.in_channels = input_size[1]
        self.input_size = input_size
        self.filters = filters
        self.units1 = units1
        self.units2 = units2

        self.convolutions = nn.Sequential(
            nn.Conv2d(self.in_channels, filters, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(filters, filters, kernel_size=3, stride=1, padding=0),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(filters, filters, kernel_size=3, stride=1, padding=0),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
        )

        activation_map_size = self._conv_test(input_size)
        logger.info(f"Aggregating activationmap with size {activation_map_size}")
        self.agg = nn.AvgPool2d(activation_map_size)

        self.dense = nn.Sequential(
            nn.Flatten(),
            nn.Linear(filters, units1),
            nn.ReLU(),
            nn.Linear(units1, units2),
            nn.ReLU(),
            nn.Linear(units2, 10)
        )

    def _conv_test(self, input_size = (32, 1, 28, 28)):
        x = torch.ones(input_size)
        x = self.convolutions(x)
        return x.shape[-2:]

    def forward(self, x):
        x = self.convolutions(x)
        x = self.agg(x)
        logits = self.dense(x)
        return logits

model = CNN(filters=32, units1=128, units2=64).to("cpu")

[32m2026-01-10 17:59:48.356[0m | [1mINFO    [0m | [36m__main__[0m:[36m__init__[0m:[36m27[0m - [1mAggregating activationmap with size torch.Size([2, 2])[0m


Using cpu device


In [7]:
from mltrainer.imagemodels import CNNConfig, CNNblocks

In [8]:
config = CNNConfig(
    matrixshape = (28, 28), # every image is 28x28
    batchsize = batchsize,
    input_channels = 1, # we have black and white images, so only one channel
    hidden = 32, # number of filters
    kernel_size = 3, # kernel size of the convolution
    maxpool = 3, # kernel size of the maxpool
    num_layers = 4, # we will stack 4 Convolutional blocks, each with two Conv2d layers
    num_classes = 10,
)

In [9]:
model = CNNblocks(config)
model.config

Calculated matrix size: 9
Caluclated flatten size: 288


{'matrixshape': (28, 28),
 'batchsize': 64,
 'input_channels': 1,
 'hidden': 32,
 'kernel_size': 3,
 'maxpool': 3,
 'num_layers': 4,
 'num_classes': 10}

In [10]:
from torchinfo import summary
summary(model, input_size=(32, 1, 28, 28))

Layer (type:depth-idx)                   Output Shape              Param #
CNNblocks                                [32, 10]                  --
├─ModuleList: 1-1                        --                        --
│    └─ConvBlock: 2-1                    [32, 32, 28, 28]          --
│    │    └─Sequential: 3-1              [32, 32, 28, 28]          9,568
│    └─ConvBlock: 2-2                    [32, 32, 28, 28]          --
│    │    └─Sequential: 3-2              [32, 32, 28, 28]          18,496
│    └─ReLU: 2-3                         [32, 32, 28, 28]          --
│    └─MaxPool2d: 2-4                    [32, 32, 9, 9]            --
│    └─ConvBlock: 2-5                    [32, 32, 9, 9]            --
│    │    └─Sequential: 3-3              [32, 32, 9, 9]            18,496
│    └─ReLU: 2-6                         [32, 32, 9, 9]            --
│    └─ConvBlock: 2-7                    [32, 32, 9, 9]            --
│    │    └─Sequential: 3-4              [32, 32, 9, 9]            18,496


In [11]:
import torch.optim as optim
from mltrainer import metrics
optimizer = optim.Adam
loss_fn = torch.nn.CrossEntropyLoss()
accuracy = metrics.Accuracy()

In [12]:
yhat = model(x.to("cpu"))
accuracy(y.to("cpu"), yhat)

0.140625

In [13]:
from mltrainer import metrics, Trainer, TrainerSettings, ReportTypes
settings = TrainerSettings(
    epochs=3,
    metrics=[accuracy],
    logdir="demo",
    train_steps=100,
    valid_steps=100,
    reporttypes=[ReportTypes.TOML],
)

[32m2026-01-10 17:59:48.531[0m | [1mINFO    [0m | [36mmltrainer.settings[0m:[36mcheck_path[0m:[36m60[0m - [1mCreated logdir c:\Users\denni\MADS\Portfolio-Dennis\2. Hypertuning mlflow\demo[0m


In [14]:
trainer = Trainer(
            model=model,
            settings=settings,
            loss_fn=loss_fn,
            optimizer=optimizer,
            traindataloader=trainstreamer,
            validdataloader=validstreamer,
            scheduler=optim.lr_scheduler.ReduceLROnPlateau,
            device=device,
        )
trainer.loop()

[32m2026-01-10 17:59:48.541[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m24[0m - [1mLogging to demo\20260110-175948[0m
[32m2026-01-10 17:59:49.366[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m68[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m
100%|[38;2;30;71;6m██████████[0m| 100/100 [00:04<00:00, 22.91it/s]
[32m2026-01-10 17:59:55.232[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m209[0m - [1mEpoch 0 train 1.8382 test 1.0097 metric ['0.5763'][0m
100%|[38;2;30;71;6m██████████[0m| 100/100 [00:04<00:00, 23.01it/s]
[32m2026-01-10 18:00:00.995[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m209[0m - [1mEpoch 1 train 0.8649 test 0.7427 metric ['0.7084'][0m
100%|[38;2;30;71;6m██████████[0m| 100/100 [00:04<00:00, 24.09it/s]
[32m2026-01-10 18:00:06.568[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mrep

In [15]:
experiment_path = "mlflow_test"

In [16]:
import mlflow
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment(experiment_path)

2026/01/10 18:00:13 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2026/01/10 18:00:13 INFO mlflow.store.db.utils: Updating database tables
2026/01/10 18:00:13 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2026/01/10 18:00:13 INFO alembic.runtime.migration: Will assume non-transactional DDL.
2026/01/10 18:00:13 INFO alembic.runtime.migration: Running upgrade  -> 451aebb31d03, add metric step
2026/01/10 18:00:13 INFO alembic.runtime.migration: Running upgrade 451aebb31d03 -> 90e64c465722, migrate user column to tags
2026/01/10 18:00:13 INFO alembic.runtime.migration: Running upgrade 90e64c465722 -> 181f10493468, allow nulls for metric values
2026/01/10 18:00:13 INFO alembic.runtime.migration: Running upgrade 181f10493468 -> df50e92ffc5e, Add Experiment Tags Table
2026/01/10 18:00:13 INFO alembic.runtime.migration: Running upgrade df50e92ffc5e -> 7ac759974ad8, Update run tags with larger limit
2026/01/10 18:00:13 INFO alembic.runtime.migration: Running 

<Experiment: artifact_location='file:///c:/Users/denni/MADS/Portfolio-Dennis/2. Hypertuning mlflow/mlruns/1', creation_time=1768064414136, experiment_id='1', last_update_time=1768064414136, lifecycle_stage='active', name='mlflow_test', tags={}>

In [17]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

In [18]:
modeldir = Path("models").resolve()
if not modeldir.exists():
    modeldir.mkdir()
    print(f"Created {modeldir}")

Created C:\Users\denni\MADS\Portfolio-Dennis\2. Hypertuning mlflow\models


In [19]:
import torch.optim as optim
from mltrainer import metrics, Trainer, TrainerSettings, ReportTypes
from datetime import datetime

# Define the hyperparameter search space
settings = TrainerSettings(
    epochs=3,
    metrics=[accuracy],
    logdir=modeldir,
    train_steps=100,
    valid_steps=100,
    reporttypes=[ReportTypes.MLFLOW, ReportTypes.TOML],
)


# Define the objective function for hyperparameter optimization
def objective(params):
    # Start a new MLflow run for tracking the experiment
    with mlflow.start_run():
        # Set MLflow tags to record metadata about the model and developer
        mlflow.set_tag("model", "convnet")
        mlflow.set_tag("dev", "raoul")
        # Log hyperparameters to MLflow
        mlflow.log_params(params)
        mlflow.log_param("batchsize", f"{batchsize}")


        # Initialize the optimizer, loss function, and accuracy metric
        optimizer = optim.Adam
        loss_fn = torch.nn.CrossEntropyLoss()
        accuracy = metrics.Accuracy()
        config = CNNConfig(
            matrixshape = (28, 28), # every image is 28x28
            batchsize = batchsize,
            input_channels = 1, # we have black and white images, so only one channel
            hidden = params["filters"], # number of filters
            kernel_size = 3, # kernel size of the convolution
            maxpool = 3, # kernel size of the maxpool
            num_layers = 4, # we will stack 4 Convolutional blocks, each with two Conv2d layers
            num_classes = 10,
        )

        # Instantiate the CNN model with the given hyperparameters
        model = CNNblocks(config)
        # Train the model using a custom train loop
        trainer = Trainer(
            model=model,
            settings=settings,
            loss_fn=loss_fn,
            optimizer=optimizer,
            traindataloader=trainstreamer,
            validdataloader=validstreamer,
            scheduler=optim.lr_scheduler.ReduceLROnPlateau,
            device=device,
        )
        trainer.loop()

        # Save the trained model with a timestamp
        tag = datetime.now().strftime("%Y%m%d-%H%M")
        modelpath = modeldir / (tag + "model.pt")
        torch.save(model, modelpath)

        # Log the saved model as an artifact in MLflow
        mlflow.log_artifact(local_path=modelpath, artifact_path="pytorch_models")
        return {'loss' : trainer.test_loss, 'status': STATUS_OK}

See https://hyperopt.github.io/hyperopt/getting-started/search_spaces/ for more information about searchspaces for hyperopt

In [20]:
search_space = {
    'filters' : scope.int(hp.quniform('filters', 16, 128, 8)),
    'kernel_size' : scope.int(hp.quniform('kernel_size', 2, 5, 1)),
    'num_layers' : scope.int(hp.quniform('num_layers', 1, 10, 1)),
}

In [None]:
best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=3,
    trials=Trials()
)

Calculated matrix size: 9                            
Caluclated flatten size: 432                         
  0%|          | 0/3 [00:00<?, ?trial/s, best loss=?]

[32m2026-01-10 18:00:16.927[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m24[0m - [1mLogging to C:\Users\denni\MADS\Portfolio-Dennis\2. Hypertuning mlflow\models\20260110-180016[0m
[32m2026-01-10 18:00:16.927[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m68[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m
  0%|[38;2;30;71;6m          [0m| 0/3 [00:00<?, ?it/s]
  0%|[38;2;30;71;6m          [0m| 0/100 [00:00<?, ?it/s][A
  2%|[38;2;30;71;6m2         [0m| 2/100 [00:00<00:07, 12.67it/s][A
  4%|[38;2;30;71;6m4         [0m| 4/100 [00:00<00:07, 13.06it/s][A
  6%|[38;2;30;71;6m6         [0m| 6/100 [00:00<00:07, 13.26it/s][A
  8%|[38;2;30;71;6m8         [0m| 8/100 [00:00<00:06, 13.50it/s][A
 10%|[38;2;30;71;6m#         [0m| 10/100 [00:00<00:06, 13.46it/s][A
 12%|[38;2;30;71;6m#2        [0m| 12/100 [00:00<00:06, 13.29it/s][A
 14%|[38;2;30;71;6m#4        [0m| 

Calculated matrix size: 9                                                      
Caluclated flatten size: 216                                                   
 33%|███▎      | 1/3 [00:35<01:11, 35.54s/trial, best loss: 0.7009506744146347]

[32m2026-01-10 18:00:51.969[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m24[0m - [1mLogging to C:\Users\denni\MADS\Portfolio-Dennis\2. Hypertuning mlflow\models\20260110-180051[0m
[32m2026-01-10 18:00:51.971[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m68[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m
  0%|[38;2;30;71;6m          [0m| 0/3 [00:00<?, ?it/s]
  0%|[38;2;30;71;6m          [0m| 0/100 [00:00<?, ?it/s][A
  3%|[38;2;30;71;6m3         [0m| 3/100 [00:00<00:04, 22.39it/s][A
  6%|[38;2;30;71;6m6         [0m| 6/100 [00:00<00:03, 23.69it/s][A
  9%|[38;2;30;71;6m9         [0m| 9/100 [00:00<00:03, 24.55it/s][A
 12%|[38;2;30;71;6m#2        [0m| 12/100 [00:00<00:03, 25.27it/s][A
 15%|[38;2;30;71;6m#5        [0m| 15/100 [00:00<00:03, 22.86it/s][A
 18%|[38;2;30;71;6m#8        [0m| 18/100 [00:00<00:03, 23.98it/s][A
 21%|[38;2;30;71;6m##1       [0m|

Calculated matrix size: 9                                                      
Caluclated flatten size: 216                                                   
 67%|██████▋   | 2/3 [00:51<00:24, 24.17s/trial, best loss: 0.7009506744146347]

[32m2026-01-10 18:01:08.177[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m24[0m - [1mLogging to C:\Users\denni\MADS\Portfolio-Dennis\2. Hypertuning mlflow\models\20260110-180108[0m
[32m2026-01-10 18:01:08.179[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m68[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m
  0%|[38;2;30;71;6m          [0m| 0/3 [00:00<?, ?it/s]
  0%|[38;2;30;71;6m          [0m| 0/100 [00:00<?, ?it/s][A
  3%|[38;2;30;71;6m3         [0m| 3/100 [00:00<00:03, 28.43it/s][A
  6%|[38;2;30;71;6m6         [0m| 6/100 [00:00<00:03, 27.65it/s][A
  9%|[38;2;30;71;6m9         [0m| 9/100 [00:00<00:03, 27.07it/s][A
 12%|[38;2;30;71;6m#2        [0m| 12/100 [00:00<00:03, 26.75it/s][A
 15%|[38;2;30;71;6m#5        [0m| 15/100 [00:00<00:03, 26.19it/s][A
 18%|[38;2;30;71;6m#8        [0m| 18/100 [00:00<00:03, 26.70it/s][A
 21%|[38;2;30;71;6m##1       [0m|

 67%|██████▋   | 2/3 [01:09<00:34, 34.94s/trial, best loss: 0.7009506744146347]


RuntimeError: Parent directory C:\Users\denni\MADS\Portfolio-Dennis\2. Hypertuning mlflow\models does not exist.

: 

After running this, you can look at the best_result

In [None]:
best_result

{'filters': np.float64(88.0),
 'kernel_size': np.float64(4.0),
 'num_layers': np.float64(9.0)}