In [None]:
import sys 
import os
sys.path.append(os.path.abspath('../models'))
sys.path.append(os.path.abspath('../dev'))

import torch
from mltrainer.preprocessors import BasePreprocessor
from mltrainer import Trainer, TrainerSettings, ReportTypes, metrics

import torch.optim as optim
from torch import nn
from mads_datasets import DatasetFactoryProvider, DatasetType

from datetime import datetime as _datetime

In [None]:
# altijd runnen
fashionfactory = DatasetFactoryProvider.create_factory(DatasetType.FASHION)
preprocessor = BasePreprocessor()

In [None]:
fashionfactory = DatasetFactoryProvider.create_factory(DatasetType.FASHION)
preprocessor = BasePreprocessor()

batchsize = 32

streamers = fashionfactory.create_datastreamer(batchsize=batchsize, preprocessor=preprocessor)
train = streamers["train"]
valid = streamers["valid"]

trainstreamer = train.stream()
validstreamer = valid.stream()

accuracy = metrics.Accuracy()
loss_func = torch.nn.CrossEntropyLoss()

In [None]:
class NeuralNetworkWithDropout(nn.Module):
    def __init__(self, num_classes: int, units1: int, units2: int) -> None:
        super().__init__()
        self.flatten = nn.Flatten()

        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28 * 28, units1),
            nn.ReLU(),
            nn.Linear(units1, units2),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(units2, num_classes),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [None]:
units = [512, 256]
epochs = 20
results = []
logdir = "modellogs"

settings = TrainerSettings(
    epochs=epochs,
    metrics=[accuracy],
    logdir=logdir,
    train_steps=100,
    valid_steps=100,
    reporttypes=[ReportTypes.TENSORBOARD, ReportTypes.TOML],
)

model = NeuralNetworkWithDropout(num_classes=10, units1=units[0], units2=units[1])

settings.logdir = f"{logdir}/DROPOUT/u{units[0]}_u{units[1]}_e{epochs}"

trainer = Trainer(
    model=model,
    settings=settings,
    loss_fn=loss_func,
    optimizer=optim.Adam,
    traindataloader=trainstreamer,
    validdataloader=validstreamer,
    scheduler=optim.lr_scheduler.ReduceLROnPlateau
)

trainer.loop()

In [None]:
class NeuralNetworkWithDropoutNormalization(nn.Module):
    def __init__(self, num_classes: int, units1: int, units2: int, dropout: float) -> None:
        super().__init__()
        self.flatten = nn.Flatten()

        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28 * 28, units1),
            nn.BatchNorm1d(units1),
            nn.ReLU(),
            nn.Linear(units1, units2),
            nn.BatchNorm1d(units2),
            nn.ReLU(),
            nn.Dropout(p=dropout),
            nn.Linear(units2, num_classes),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [None]:
units = [704, 288]
epochs = 25
results = []
logdir = "modellogs"

settings = TrainerSettings(
    epochs=epochs,
    metrics=[accuracy],
    logdir=logdir,
    train_steps=100,
    valid_steps=100,
    reporttypes=[ReportTypes.TENSORBOARD, ReportTypes.TOML],
)

model = NeuralNetworkWithDropoutNormalization(num_classes=10, units1=units[0], units2=units[1], dropout = 0.2)

settings.logdir = f"{logdir}/DROPOUT/u{units[0]}_u{units[1]}_e{epochs}"

trainer = Trainer(
    model=model,
    settings=settings,
    loss_fn=loss_func,
    optimizer=optim.Adam,
    traindataloader=trainstreamer,
    validdataloader=validstreamer,
    scheduler=optim.lr_scheduler.ReduceLROnPlateau
)

trainer.loop()

In [None]:
import mlflow
experiment_path = "mlflow_test"
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment(experiment_path)

In [None]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope
from pathlib import Path

modeldir = Path("models").resolve()

if not modeldir.exists():
    modeldir.mkdir()
    print(f"Created {modeldir}")

In [None]:
from datetime import datetime

settings = TrainerSettings(
    epochs=25,
    metrics=[accuracy],
    logdir=modeldir,
    train_steps=100,
    valid_steps=100,
    reporttypes=[ReportTypes.MLFLOW, ReportTypes.TOML],
)

# Define the objective function for hyperparameter optimization
def objective(params):
    with mlflow.start_run():
        mlflow.set_tag("model", "dense-net")
        mlflow.set_tag("dev", "Melissa")
        mlflow.log_params(params)
        mlflow.log_param("batchsize", f"{batchsize}")

        loss_fn = torch.nn.CrossEntropyLoss()
        optimizer = optim.Adam

        model = NeuralNetworkWithDropoutNormalization(num_classes=10, units1=params["units1"], units2=params["units2"], dropout=params["dropout"])

        trainer = Trainer(
            model=model,
            settings=settings,
            loss_fn=loss_fn,
            optimizer=optimizer,
            traindataloader=trainstreamer,
            validdataloader=validstreamer,
            scheduler=optim.lr_scheduler.ReduceLROnPlateau,
        )
        trainer.loop()

        tag = datetime.now().strftime("%Y%m%d-%H%M")
        modelpath = modeldir / (tag + "_model.pt")
        torch.save(model, modelpath)

        mlflow.log_artifact(local_path=modelpath, artifact_path="pytorch_models")
        return {'loss' : trainer.test_loss, 'status': STATUS_OK}

In [None]:
search_space = {
    'units1': scope.int(hp.quniform('units1', 32, 1028, 32)),
    'units2': scope.int(hp.quniform('units2', 32, 1028, 32)),
    'dropout': hp.uniform('dropout', 0.0, 0.5)}

best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=3,
    trials=Trials()
)

best_result

In [None]:
from CNN import CNNConfig, CNNblocks 

# Metric en instellingen
accuracy = metrics.Accuracy()
settings = TrainerSettings(
    epochs=5,
    metrics=[accuracy],
    logdir="modellogs/cnn_mlflow",
    train_steps=100,
    valid_steps=100,
    reporttypes=[ReportTypes.MLFLOW, ReportTypes.TOML]
)

def objective(params):
    config = CNNConfig(
        hidden=params["hidden"],
        num_layers=params["num_layers"],
        dropout=params["dropout"],
        batchsize=params["batchsize"]
    )

    with mlflow.start_run():
        mlflow.log_params(config.__dict__)
        mlflow.set_tag("model", "CNNblocks")

        streamers = fashionfactory.create_datastreamer(batchsize=config.batchsize, preprocessor=preprocessor)
        trainstreamer = streamers["train"].stream()
        validstreamer = streamers["valid"].stream()

        model = CNNblocks(config)
        trainer = Trainer(
            model=model,
            settings=settings,
            loss_fn=nn.CrossEntropyLoss(),
            optimizer=optim.Adam,
            traindataloader=trainstreamer,
            validdataloader=validstreamer,
            scheduler=optim.lr_scheduler.ReduceLROnPlateau,
            device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
        )

        trainer.loop()

        timestamp = _datetime.now().strftime("%Y%m%d-%H%M")
        model_path = f"modellogs/cnn_mlflow/model_{timestamp}.pt"
        torch.save(model.state_dict(), model_path)
        mlflow.log_artifact(model_path, artifact_path="pytorch_models")

        return {'loss': trainer.test_loss, 'status': STATUS_OK}

In [None]:
# Zoekruimte definieren
search_space = {
    'hidden': scope.int(hp.quniform('hidden', 32, 256, 32)),
    'num_layers': scope.int(hp.quniform('num_layers', 2, 6, 1)),
    'dropout': hp.uniform('dropout', 0.1, 0.5),
    'batchsize': scope.int(hp.quniform('batchsize', 32, 128, 32)),
}

# Hyperparameter optimalisatie uitvoeren
trials = Trials()
best = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=10,
    trials=trials
)

print("Beste configuratie:", best)


In [None]:
import os
import toml
import pandas as pd

log_dir = "modellogs/cnn_mlflow"
runs = []

for root, _, files in os.walk(log_dir):
    for file in files:
        if file == "model.toml":
            path = os.path.join(root, file)
            try:
                data = toml.load(path)
                config = data.get("model", {}).get("config", {})
                runs.append({
                    "path": path,
                    "hidden": config.get("hidden"),
                    "dropout": config.get("dropout"),
                    "batchsize": config.get("batchsize"),
                    "num_layers": config.get("num_layers"),
                })
            except Exception as e:
                print(f"Kon {path} niet inlezen: {e}")

df = pd.DataFrame(runs)
print(df.head())


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.pairplot(df, hue="accuracy", palette="coolwarm")

plt.suptitle("Hyperparameter relaties", y=1.02)
plt.tight_layout()
plt.show()