Mlflow & Tensorboard

Name: Justin Stange-Heiduk  
Matrikelnummer: [Deine Matrikelnummer]  
Universität: AKAD   
Kurs: B.Sc Data Science    
Dozent: Dr. Martin Prause   
Beginn: 03.02.2025  
Orientiert an: https://keras.io/guides/transfer_learning/

In [1]:
import tensorflow as tf
import os
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout, Conv2D, MaxPool2D
from datetime import datetime
from tensorboard.plugins.hparams import api as hp
import numpy as np
import mlflow
import mlflow.tensorflow
import psutil

## Allgemeiner Teil

In [2]:
# Bildverzeichnis
data_dir = "simpsons"
img_height = 224
img_width = 224
batch_size_par = 5

# Datensatz laden
train_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir, seed=123, labels="inferred", label_mode='int',
    validation_split=0.2, subset="training",
    image_size=(img_height, img_width), batch_size=batch_size_par)

validation_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir, seed=123, labels="inferred", label_mode='int',
    validation_split=0.2, subset="validation",
    image_size=(img_height, img_width), batch_size=batch_size_par)

# Dataset optimieren
train_dataset = train_ds.prefetch(buffer_size=tf.data.AUTOTUNE)
validation_dataset = validation_ds.prefetch(buffer_size=tf.data.AUTOTUNE)

# Modell laden
vgg_model = VGG16(include_top=False, input_shape=(224, 224, 3))

# Schichten einfrieren
for layer in vgg_model.layers:
    layer.trainable = False

Found 98 files belonging to 2 classes.
Using 79 files for training.
Found 98 files belonging to 2 classes.
Using 19 files for validation.


## MLflow

In [3]:
import mlflow

# Zähler für Run-Nummer bestimmen
experiment = mlflow.get_experiment_by_name("Simpsons_Classification")
if experiment:
    run_count = len(mlflow.search_runs(experiment_ids=[experiment.experiment_id]))
else:
    run_count = 0  # Falls Experiment noch nicht existiert

run_name = f"run_simpson_klassifikation_{run_count + 1}"

# MLflow Experiment starten
mlflow.set_experiment("Simpsons_Classification")

with mlflow.start_run(run_name=run_name) as run:
    
    # Definierte Parameter für das Experiment
    experiment_params = {
        "filters": 64,
        "Dense_neurons": 128,
        "Dropout_rate": 0.4,
        "epochs": 3,
        "batch_size": batch_size_par,
        "loss_function": "binary_crossentropy",
        "optimizer": "rmsprop",
        "metrics": ["accuracy"]
    }

    # Logging des Run-Namens als Parameter
    mlflow.log_param("run_name", run_name)

    # Zusätzliche Schichten definieren
    x = vgg_model.output
    x = Conv2D(experiment_params["filters"], kernel_size=(3, 3), activation='relu')(x)
    x = MaxPool2D(pool_size=(2, 2))(x)
    x = Flatten()(x)
    x = Dense(experiment_params["Dense_neurons"], activation='relu')(x)
    x = Dropout(experiment_params["Dropout_rate"])(x)
    x = Dense(1, activation='sigmoid')(x)

    # Modell erstellen
    custom_model = Model(vgg_model.input, x)

    # Schichten einfrieren
    for layer in custom_model.layers[:18]:
        layer.trainable = False

    # Modell kompilieren
    custom_model.compile(
        loss=experiment_params["loss_function"], 
        optimizer=experiment_params["optimizer"], 
        metrics=experiment_params["metrics"]
    )

    # MLflow Logging für alle Parameter
    mlflow.log_params(experiment_params)

    # Training starten
    history = custom_model.fit(train_dataset, epochs=experiment_params["epochs"], validation_data=validation_dataset)

    # Metriken loggen
    for epoch, (train_loss, train_acc, val_loss, val_acc) in enumerate(zip(
        history.history["loss"], history.history["accuracy"], 
        history.history["val_loss"], history.history["val_accuracy"])):

        mlflow.log_metric("train_loss", train_loss, step=epoch)
        mlflow.log_metric("train_accuracy", train_acc, step=epoch)
        mlflow.log_metric("val_loss", val_loss, step=epoch)
        mlflow.log_metric("val_accuracy", val_acc, step=epoch)

    # Modell speichern und loggen
    model_path = "saved_model/simpsons_classifier_mlflow"
    custom_model.save(model_path)
    mlflow.tensorflow.log_model(custom_model, "model_simpsons_mlflow")

    # # Modell in der Model Registry speichern
    # model_uri = f"runs:/{run.info.run_id}/model_simpsons_mlflow"
    # registered_model_name = "Simpsons_Classifier"
    # model_version = mlflow.register_model(model_uri, registered_model_name)

    print(f"Run abgeschlossen: {run.info.run_id} mit Namen: {run_name}")
    print(f"Modell gespeichert unter: {model_path}")
    #print(f"Modell registriert als: {registered_model_name}, Version: {model_version.version}")


Epoch 1/3
Epoch 2/3
Epoch 3/3




INFO:tensorflow:Assets written to: saved_model/simpsons_classifier_mlflow\assets


INFO:tensorflow:Assets written to: saved_model/simpsons_classifier_mlflow\assets


INFO:tensorflow:Assets written to: C:\Users\Justi\AppData\Local\Temp\tmpn2l_aipo\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\Justi\AppData\Local\Temp\tmpn2l_aipo\model\data\model\assets


Run abgeschlossen: 5edeef09937b4372856e4676e466d24f mit Namen: run_simpson_klassifikation_3
Modell gespeichert unter: saved_model/simpsons_classifier_mlflow


## Tensorboard

In [4]:
# Definierte fixe Parameter für das Experiment
experiment_params = {
    "filters": 64,
    "Dense_neurons": 256,
    "Dropout_rate": 0.5,
    "epochs": 3,
    "batch_size": 5,
    "loss_function": "binary_crossentropy",
    "metrics": ["accuracy"]
}

# Definierte Hyperparameter (Optimizer)
HPARAMS = {"optimizer": hp.HParam("optimizer", hp.Discrete(["adam", "rmsprop"]))}

# Runs für verschiedene Optimizer durchführen
for i, optimizer_name in enumerate(HPARAMS["optimizer"].domain.values, start=1):

    # Eindeutiger Run-Name für TensorBoard
    run_name = f"run_{i}_{optimizer_name}"

    # Eindeutiges TensorBoard-Log-Verzeichnis für jeden Run
    log_dir = os.path.join("tensorboard_logs", f"{run_name}_{datetime.now().strftime('%Y%m%d-%H%M%S')}")
 
    # Hyperparameter-Logging (Speichert den verwendeten Optimizer)
    with tf.summary.create_file_writer(log_dir).as_default():
        hp.hparams({HPARAMS["optimizer"]: optimizer_name})  

    # Zusätzliche Schichten definieren
    x = vgg_model.output
    x = Conv2D(experiment_params["filters"], kernel_size=(3, 3), activation='relu')(x)
    x = MaxPool2D(pool_size=(2, 2))(x)
    x = Flatten()(x)
    x = Dense(experiment_params["Dense_neurons"], activation='relu')(x)
    x = Dropout(experiment_params["Dropout_rate"])(x)
    x = Dense(1, activation='sigmoid')(x)

    # Modell erstellen
    custom_model = Model(vgg_model.input, x)

    # Schichten einfrieren
    for layer in custom_model.layers[:18]:
        layer.trainable = False

    # Optimizer auswählen
    if optimizer_name == "adam":
        optimizer = tf.keras.optimizers.Adam()
    else:
        optimizer = tf.keras.optimizers.RMSprop()

    # Modell kompilieren
    custom_model.compile(
        loss=experiment_params["loss_function"], 
        optimizer=optimizer, 
        metrics=experiment_params["metrics"]
    )

    # TensorBoard-Callbacks einrichten
    tensorboard_callback = tf.keras.callbacks.TensorBoard(
    log_dir=log_dir, histogram_freq=1, write_graph=True, write_images=True,
    update_freq='epoch',  # Verhindert zu häufiges Logging
    profile_batch=0        # Deaktiviert das automatische Erstellen von Unterlogs für `train` und `validation`
)

    profiler_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, profile_batch=1)  


    # Funktion zur Speicherung von Eingabebildern in TensorBoard
    def log_images(dataset, writer, step):
        for images, labels in dataset.take(1):  
            # Bilder von [0,255] auf [0,1] normalisieren
            images = images / 255.0

            with writer.as_default():
                tf.summary.image("Training Images", images, step=step, max_outputs=5)




    # TensorBoard-Writer für Bilder
    image_writer = tf.summary.create_file_writer(log_dir)


    # Training starten
    history = custom_model.fit(
        train_dataset, epochs=experiment_params["epochs"], 
        validation_data=validation_dataset,
        callbacks=[tensorboard_callback, profiler_callback]
    )

    # Eingabebilder in TensorBoard speichern
    log_images(train_ds, image_writer, step=experiment_params["epochs"])

    # Modell speichern mit einzigartigem Namen für jeden Optimizer
    model_path = os.path.join("models", f"model_simpsons_tensorboard_{optimizer_name}")
    custom_model.save(model_path)

    print(f"Training abgeschlossen mit Optimizer: {optimizer_name}")
    print(f"TensorBoard-Logs gespeichert unter: {log_dir}")
    print(f"Modell gespeichert unter: {model_path}")

Epoch 1/3
Epoch 2/3
Epoch 3/3




INFO:tensorflow:Assets written to: models\model_simpsons_tensorboard_adam\assets


INFO:tensorflow:Assets written to: models\model_simpsons_tensorboard_adam\assets


Training abgeschlossen mit Optimizer: adam
TensorBoard-Logs gespeichert unter: tensorboard_logs\run_1_adam_20250217-091841
Modell gespeichert unter: models\model_simpsons_tensorboard_adam
Epoch 1/3
Epoch 2/3
Epoch 3/3




INFO:tensorflow:Assets written to: models\model_simpsons_tensorboard_rmsprop\assets


INFO:tensorflow:Assets written to: models\model_simpsons_tensorboard_rmsprop\assets


Training abgeschlossen mit Optimizer: rmsprop
TensorBoard-Logs gespeichert unter: tensorboard_logs\run_2_rmsprop_20250217-091908
Modell gespeichert unter: models\model_simpsons_tensorboard_rmsprop
