In [None]:
"""
MODEL TRAINER MITTELS TRANSFER LEARNING AUF VGG16

Dieses Skript dient dem Training der Bildklassifikationsmodelle.
Es nutzt die Methode des Transfer Learnings, bei der ein auf ImageNet vortrainiertes VGG16-Modell
als Basis verwendet und an die spezifischen Klassen des neuen Datensatzes angepasst wird.

Der Prozess umfasst folgende Schritte:
1. Laden des VGG16-Modells mit den vortrainierten ImageNet-Gewichten, wobei der ursprüngliche
   Klassifikationskopf entfernt wird.
2. Einfrieren der Gewichte der Basis-Layer, um das in ihnen gespeicherte Wissen zu erhalten.
3. Hinzufügen neuer, trainierbarer Layer (GlobalAveragePooling, Dense mit Softmax-Aktivierung),
   die auf die Anzahl der Klassen im geladenen Datensatz zugeschnitten sind.
4. Laden und Vorverarbeiten des geladenen Datensatzes, inklusive Größenanpassung der Bilder,
   VGG-spezifischer Normalisierung und One-Hot-Kodierung der Labels.
5. Kompilieren und Trainieren des neuen Modells unter Verwendung von Callbacks wie ModelCheckpoint
   (zum Speichern des besten Modells) und EarlyStopping (zur Vermeidung von Overfitting).
6. Speichern des final trainierten Modells als .h5-Datei.

"""

In [None]:
!nvidia-smi

Tue Jun 10 16:09:32 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.144.03             Driver Version: 550.144.03     CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA RTX A6000               Off |   00000000:06:10.0 Off |                  Off |
| 30%   31C    P8              8W /  300W |       2MiB /  49140MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                     

In [None]:
import os
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import models

import tensorflow_datasets as tfds
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.optimizers import Adam

In [None]:
# GPU-Speicherwachstum ermöglichen, um Out-of-Memory-Fehler zu vermeiden
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    for device in physical_devices:
        tf.config.experimental.set_memory_growth(device, True)
    print(f"GPU verfügbar: {len(physical_devices)} Gerät(e)")
else:
    print("Keine GPU gefunden, verwende CPU")

GPU verfügbar: 1 Gerät(e)


In [None]:
# Transfer learning, der Layer namen müssen angepasst werden sonst exception mit INNvestigate Bibliothek
base_vgg_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_vgg_model.trainable = False
flat_model_input = keras.Input(shape=(224, 224, 3), name="my_flat_model_input")

current_tensor = flat_model_input

for layer in base_vgg_model.layers:
    if isinstance(layer, keras.layers.InputLayer):
        # Skip VGG16's original InputLayer, weil 'flat_model_input'
        continue

    # Connect the current VGG16 layer
    # Pass `training=False` if the layer supports it, because base_vgg_model is frozen
    # and its layers (like BatchNormalization) should run in inference mode.
    if 'training' in layer.call.__code__.co_varnames:
        current_tensor = layer(current_tensor, training=False)
    else:
        current_tensor = layer(current_tensor)

x = keras.layers.GlobalAveragePooling2D(name="custom_avg_pool")(current_tensor)
predictions = keras.layers.Dense(397, activation='softmax', name="custom_predictions")(x)
flat_model = keras.models.Model(inputs=flat_model_input, outputs=predictions, name="MyFlatVGGTransferModel")

flat_model.compile(optimizer=Adam(learning_rate=0.0001),
                    loss='categorical_crossentropy',
                    metrics=['accuracy'])
# Daten vorbereiten
BATCH_SIZE = 64

In [None]:
# Load the model definition.
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Füge neue Layer hinzu für Transfer Learning auf Imagenette
x = base_model.output
x = keras.layers.GlobalAveragePooling2D()(x)  # avg pooling sonst get dense nicht
predictions = keras.layers.Dense(101, activation='softmax',name='dense_ouput')(x)  # 10 outputs für Imagenette

# Erstelle das finale Modell
model = keras.models.Model(inputs=base_model.input, outputs=predictions)

# Friere die vortrainierten Layer ein
base_model.trainable = False

# Kompiliere das Modell
model.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Daten vorbereiten
BATCH_SIZE = 64


2025-06-10 22:43:40.479738: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-06-10 22:43:40.480134: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-06-10 22:43:40.480358: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

In [None]:
# Definiere Pfad zum gespeicherten Modell
model_dir = "/mnt/data/"  # Passe dies an dein Verzeichnis an
model_path = os.path.join(model_dir, 'vgg16_cifar10_70acc.h5')

model = models.load_model(model_path)
# Load Imagenette dataset full size = full-size-v2, 160px = 160px
(ds_train, ds_test), ds_info = tfds.load(
    'sun397/tfds',
    split=['train', 'test'],
    shuffle_files=False,
    as_supervised=False,
    with_info=True,
    data_dir='/mnt/data/datasets'
)
num_train_examples = ds_info.splits['train'].num_examples
print(f"Number of pictures in the training dataset: {num_train_examples}")

In [None]:
# Anzahl der Klassen aus den Metadaten holen
NUM_CLASSES = ds_info.features['label'].num_classes
IMAGE_SIZE = [224, 224]
BATCH_SIZE = 64 # Setze deine gewünschte Batch-Größe

# Kombinierte Preprocessing-Funktion
def preprocess_data(element):
    """
    Nimmt ein Dictionary als Eingabe, verarbeitet Bild und Label
    und gibt sie als Tupel zurück.
    """
    # 1. Extrahiere Bild und Label aus dem Dictionary
    image = element['image']
    label = element['label']

    # 2. Bildverarbeitung für VGG16
    image = tf.image.resize(image, IMAGE_SIZE)
    image = preprocess_input(image) # Spezifisches VGG16-Preprocessing

    # 3. Label als One-Hot-Vektor kodieren
    label = tf.one_hot(label, NUM_CLASSES)

    return image, label

# Wende die kombinierte Funktion auf die Datensätze an
print("Preprocessing für den Trainingsdatensatz wird gestartet...")
ds_train = ds_train.map(preprocess_data, num_parallel_calls=tf.data.AUTOTUNE)
ds_train = ds_train.shuffle(buffer_size=1000)
ds_train = ds_train.batch(BATCH_SIZE)
ds_train = ds_train.prefetch(buffer_size=tf.data.AUTOTUNE)

print("Preprocessing für den Testdatensatz wird gestartet...")
ds_test = ds_test.map(preprocess_data, num_parallel_calls=tf.data.AUTOTUNE)
ds_test = ds_test.batch(BATCH_SIZE)
ds_test = ds_test.prefetch(buffer_size=tf.data.AUTOTUNE)

print("\n abgeschlossen!")

In [None]:
def preprocess_vgg(image, label):
    # Resize to VGG16's expected input size
    image = tf.image.resize(image, [224, 224])
    # Apply VGG16 preprocessing (convert to float32 + mean subtraction)
    image = preprocess_input(image)
    return image, label

In [None]:
# Daten vorbereiten
# Wende Preprocessing auf beide Datasets an und bereite sie für das Training vor
#ds_train = ds_train.take(80000)
ds_train = ds_train.map(preprocess_vgg)
ds_train = ds_train.shuffle(buffer_size=1000)
ds_train = ds_train.batch(BATCH_SIZE)
ds_train = ds_train.prefetch(buffer_size=tf.data.AUTOTUNE)

ds_test = ds_test.map(preprocess_vgg)
ds_test = ds_test.batch(BATCH_SIZE)
ds_test = ds_test.prefetch(buffer_size=tf.data.AUTOTUNE)

def process_labels(image, label):
    # One-hot encoding für 10 Klassen
    label = tf.one_hot(label, 101)

    return image, label

ds_train = ds_train.map(process_labels)
ds_test = ds_test.map(process_labels)


In [None]:
#check ob richtige split
train_images_list = []
train_labels_list = []
# Iterate through the dataset and append batches to the lists
for images, labels in tfds.as_numpy(ds_test):
    train_images_list.append(images)
    train_labels_list.append(labels)

# Concatenate the batches into single NumPy arrays
train_images = np.concatenate(train_images_list, axis=0)
train_labels = np.concatenate(train_labels_list, axis=0)

#check ob tensor alle richtig.
print("Train images shape:", train_images.shape)  # Should be (N, 224, 224, 3)
print("Train images dtype:", train_images.dtype)  # Should be float32
print("Train labels shape:", train_labels.shape)  # Should be (N,)
print("Train labels dtype:", train_labels.dtype)  # Should be float32

Train images shape: (21750, 224, 224, 3)
Train images dtype: float32
Train labels shape: (21750, 397)
Train labels dtype: float32


In [None]:
EPOCHS = 20

# Model-Checkpoint zum Speichern des besten Modells
checkpoint_path = "./models/weights/sun397.weights.h5"
checkpoint_dir = os.path.dirname(checkpoint_path)

cp_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path,
    save_weights_only=False,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True,
    verbose=1
)

# Early Stopping um Überanpassung zu vermeiden
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    mode='min',
    patience=5,
    restore_best_weights=False
)

# Training starten
history = flat_model.fit(
    ds_train,
    epochs=EPOCHS,
    validation_data=ds_test,
    callbacks=[cp_callback, early_stopping]
)
flat_model = tf.keras.models.load_model(checkpoint_path)

In [None]:
# Evaluieren Sie es auf den Validierungsdaten (ds_test)
# Das gibt Ihnen val_loss und val_accuracy
val_loss, val_accuracy = model.evaluate(ds_test, verbose=0)

# Evaluieren Sie es auf den Trainingsdaten (ds_train)
# Das gibt Ihnen loss und accuracy
loss, accuracy = model.evaluate(ds_train, verbose=0)

print(f"loss: {loss:.4f} - accuracy: {accuracy:.4f} - val_loss: {val_loss:.4f} - val_accuracy: {val_accuracy:.4f}")


PERFORMANCE DURCH NEUE EVALUIERUNG:
loss: 1.2951 - accuracy: 0.6745 - val_loss: 2.1746 - val_accuracy: 0.5514


In [None]:
flat_model.save('./models/vgg16_sun397_a79_va58_l761_vl1768.h5')

In [None]:
# Hilfsfunktion für eine confusion matrix
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

cm = confusion_matrix(true_classes, predicted_classes)

plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Confusion Matrix')
plt.show()

In [None]:
#Hilfsfunktion um nochmal val acc auszurechnen.
#manuel iterieren durch ds_test, und batch wise prediction
all_predictions = []
all_true_labels = []

for images, labels in ds_test:
    batch_predictions = model.predict(images, verbose=0)
    all_predictions.append(batch_predictions)
    all_true_labels.append(labels.numpy())
    #print("shape der ersten predict: ", batch_predictions.shape)
    tf1_pred_classes_batch = np.argmax(images, axis=1)
    tf1_true_classes_batch = np.argmax(labels, axis=1)
    #print("Vorhergesagte Klassen (TF1, erster Batch):", tf1_pred_classes_batch[:5])
    #print("Wahre Klassen (TF1, erster Batch):", tf1_true_classes_batch[:5])



# vstack = Concatenate
all_predictions = np.vstack(all_predictions)
all_true_labels = np.vstack(all_true_labels)

# argmax return index mit höchstem wert = klasse die predicted wurde
pred_classes = np.argmax(all_predictions, axis=1)
true_classes = np.argmax(all_true_labels, axis=1)

accuracy = np.mean(pred_classes == true_classes)
print(f"Overall accuracy: {accuracy * 100:.2f}%")

# Print a few examples
#for i in range(100):
#   print(f"Sample {i}: True={true_classes[i]}, Pred={pred_classes[i]}")