In [None]:
# ---- Importiere benötigte Bibliotheken ----
import numpy as np
import pandas as pd
from datetime import datetime
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix, roc_curve, auc
from sklearn.model_selection import ParameterGrid
from tensorflow.keras import layers, Model, regularizers, callbacks
from tensorflow.keras.applications import MobileNetV3Large
from tensorflow.keras.applications.mobilenet_v3 import preprocess_input
from tensorflow.keras.optimizers.legacy import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
# ---- Setze Konstanten und Parameter ----
SEED = 42
NUM_EPOCHS = 150
BATCH_SIZE = 64
IMAGE_SIZE = (224, 224)
TARGET_LABEL = "dx"
BALANCE_LABEL = "dx"
FILEPATH_JPGS = './../data/jpgs/'
FILEPATH_PROCESSED = './../data/processed/'
FILEPATH_OUTPUT = './../data/bjzim-models/'

In [None]:
# ---- Initialisiere sonstige Variablen ----
pbar = None

In [None]:
# ---- Dateipfade und Set-Namen ----
filepaths = [
    ("Trainingsset", FILEPATH_PROCESSED + "train_from_Metadata_processed.csv"),
    ("Validierungsset", FILEPATH_PROCESSED + "validation_from_Metadata_processed.csv"),
    ("Testset", FILEPATH_PROCESSED + "test_from_Metadata_processed.csv")

In [None]:
# ---- Funktion zur Überprüfung von augmentierten Daten ----
def check_augmented_data(df, set_name):
    if df['image_id'].str.startswith('aug_').any():
        print(f"Warnung: Augmentierte Daten im {set_name} gefunden.")

# ---- Überprüfung ----
for set_name, filepath in filepaths:
    df = pd.read_csv(filepath)
    check_augmented_data(df, set_name)

In [None]:
# Define your augmentation parameters


# aug_params_loss = {
#     'height_shift_range': 0.1,
#     'horizontal_flip': True,
#     'rotation_range': 0,
#     'vertical_flip': True,
#     'width_shift_range': 0.2,
#     'zoom_range': 0.05
# }

aug_params_recall = {
    'height_shift_range': 0.05,
    'horizontal_flip': False,
    'rotation_range': 30,
    'vertical_flip': True,
    'width_shift_range': 0.2,
    'zoom_range': 0.05
}
# Create a grid of hyperparameters to search
param_grid = {
    'learning_rate': [0.001, 0.0001],
    'conv2d_filters': [128],
    'dense_units': [64, 128],
    'dropout_rate': [0.5, 0.7],
    'batch_size': [64],
    'optimizer': ['Adam'],
    'weight_regularization': ['l2']
}

In [None]:
datagen_train = ImageDataGenerator(
    rescale=1.0 / 255.0,
    preprocessing_function=preprocess_input,
    rotation_range=aug_params_recall['rotation_range'],
    width_shift_range=aug_params_recall['width_shift_range'],
    height_shift_range=aug_params_recall['height_shift_range'],
    zoom_range=aug_params_recall['zoom_range'],
    horizontal_flip=aug_params_recall['horizontal_flip'],
    vertical_flip=aug_params_recall['vertical_flip'],
    fill_mode='nearest'
)

train_data_generator = datagen_train.flow_from_dataframe(
    dataframe=train_df,
    directory=FILEPATH_JPGS,
    x_col="image_id",
    y_col=TARGET_LABEL,
    class_mode="categorical",
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE
)

datagen_validation = ImageDataGenerator(
    rescale=1.0 / 255.0, #see above
    preprocessing_function=preprocess_input
)

validation_generator = datagen_validation.flow_from_dataframe(
    dataframe=validation_df,
    directory=FILEPATH_JPGS,
    x_col="image_id",
    y_col=TARGET_LABEL,
    class_mode="categorical",
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE
)

In [None]:
early_stopping = EarlyStopping(
    monitor='val_f1',
    mode='max',
    patience=20, #15,
    verbose=1,
    restore_best_weights=True
)


In [None]:
reduce_lr = ReduceLROnPlateau(
    monitor='val_f1',
    mode='max',
    factor=0.5, #0.1,
    patience=12, #8,
    verbose=1,
    min_lr=1e-6
)

In [None]:


model_checkpoint = ModelCheckpoint(
    'model_best_weights.h5', 
    save_best_only=True, 
    save_weights_only=True, 
    monitor='val_f1', 
    mode='max', 
    verbose=1
)

## Mit GlobalAveragepooling

## Tuning

In [None]:
best_models = []

In [None]:
class CustomMetrics(Callback):
    def __init__(self, validation_generator):
        super().__init__()
        self.validation_generator = validation_generator

    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        val_preds = np.argmax(self.model.predict(self.validation_generator), axis=1)
        val_true = self.validation_generator.classes
        val_recall = recall_score(val_true, val_preds, average='weighted')
        val_f1 = f1_score(val_true, val_preds, average='weighted')
        val_auc = roc_auc_score(val_true, self.model.predict(self.validation_generator), multi_class='ovr', average='weighted')
        logs['val_recall'] = val_recall
        logs['val_f1'] = val_f1
        logs['val_auc'] = val_auc
        print(f" - val_recall: {val_recall: .5f} - val_f1: {val_f1: .5f} - val_auc: {val_auc: .5f}")
        print("-----------------------------------------------------------------------------------")

In [None]:
def train_evaluate_model(params, train_df, validation_df, FILEPATH_JPGS, TARGET_LABEL, IMAGE_SIZE, BATCH_SIZE):

    base_model = MobileNetV3Large(weights='imagenet', include_top=False, input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3))

    # Unfreeze some of the 269 layers for fine-tuning
    for layer in base_model.layers[:150]:
        layer.trainable = False
    for layer in base_model.layers[150:]:
        layer.trainable = True

    # Regularization
    reg_type = params.get('weight_regularization', None)
    if reg_type == 'l1':
        reg = regularizers.l1(0.01)
    elif reg_type == 'l2':
        reg = regularizers.l2(0.01)
    else:
        reg = None
    
    x = layers.GlobalAveragePooling2D()(base_model.output)
    x = layers.Dense(params['dense_units'], activation='relu', kernel_regularizer=reg)(x)
    x = layers.Dropout(params['dropout_rate'])(x)
    x = layers.Dense(params['dense_units'] // 2, activation='relu', kernel_regularizer=reg)(x)  # Zusätzlicher Dense-Layer
    x = layers.Dropout(params['dropout_rate'])(x)  # Zusätzlicher Dropout-Layer
    x = layers.Dense(7, activation='softmax')(x)
    
    model = Model(inputs=base_model.input, outputs=x)

    model.compile(optimizer=Adam(learning_rate=params['learning_rate']), 
                  loss='categorical_crossentropy', 
                  metrics=['accuracy'])

    # Define data generators with augmentation
    datagen_train = ImageDataGenerator(
        rescale=1.0 / 255.0,
        preprocessing_function=preprocess_input,
        rotation_range=aug_params_recall['rotation_range'],
        width_shift_range=aug_params_recall['width_shift_range'],
        height_shift_range=aug_params_recall['height_shift_range'],
        zoom_range=aug_params_recall['zoom_range'],
        horizontal_flip=aug_params_recall['horizontal_flip'],
        vertical_flip=aug_params_recall['vertical_flip'],
        fill_mode='nearest'
    )

    datagen_validation = ImageDataGenerator(
        rescale=1.0 / 255.0,
        preprocessing_function=preprocess_input
    )

    train_generator = datagen_train.flow_from_dataframe(
        dataframe=train_df,
        directory=FILEPATH_JPGS,
        x_col="image_id",
        y_col=TARGET_LABEL,
        target_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
        class_mode="categorical",
        shuffle=True,
        seed=SEED
    )

    validation_generator = datagen_validation.flow_from_dataframe(
        dataframe=validation_df,
        directory=FILEPATH_JPGS,
        x_col="image_id",
        y_col=TARGET_LABEL,
        target_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
        class_mode="categorical",
        shuffle=False,
        seed=SEED
    )

    custom_metrics = CustomMetrics(validation_generator=validation_generator)

    # Train the model with callbacks
    history = model.fit(
        train_generator,
        validation_data=validation_generator,
        epochs=NUM_EPOCHS,
        callbacks=[custom_metrics, early_stopping, reduce_lr, model_checkpoint],
        verbose=1
    )
    

    # Calculate F1 score
    val_preds = model.predict(validation_generator)
    val_true_labels = validation_generator.labels  # Änderung hier
    val_pred_labels = np.argmax(val_preds, axis=1)  # Neue Zeile
    f1 = f1_score(val_true_labels, val_pred_labels, average='weighted')

    return model, f1, history


In [None]:
from tqdm import tqdm
from datetime import datetime

# Initialize variables
results_df = pd.DataFrame(columns=['learning_rate', 'conv2d_filters', 'dense_units', 'dropout_rate', 'val_f1'])
best_models, completed_iterations = [], 0
total_iterations = len(ParameterGrid(param_grid))

def print_remaining_iterations(total, completed):
    return f"{total - completed} iterations remaining"

# Main loop for hyperparameter tuning
pbar = tqdm(total=total_iterations, desc="Hyperparameter Optimization")
for params in tqdm(ParameterGrid(param_grid), total=total_iterations, desc="Hyperparameter Optimization"):
    model, f1, _ = train_evaluate_model(params, train_df, validation_df, FILEPATH_JPGS, TARGET_LABEL, IMAGE_SIZE, BATCH_SIZE)

    # Update results DataFrame and best models list
    results_df = results_df.append({**params, 'val_f1': f1}, ignore_index=True)
    best_models = sorted(best_models + [(f1, model)], key=lambda x: x[0], reverse=True)[:5]
    
    # Progress update
    completed_iterations += 1
    print(print_remaining_iterations(total_iterations, completed_iterations))
    pbar.update(1)
pbar.close()

# Save the top 5 models and results DataFrame
for i, (f1, model) in enumerate(best_models):
    model.save(f'best_model_{i + 1}_{datetime.now().strftime("%Y%m%d-%H%M%S")}.h5')
    print(f"Saved best_model_{i + 1} with F1: {f1}")
    
results_df.to_csv(FILEPATH_OUTPUT + 'hyperparameter_tuning_results.csv', index=False)

print("Best F1 Scores:", [f1 for f1, _ in best_models])


In [None]:
# show me results sorted by val_f1
results_df


In [None]:
len(model.layers)

In [None]:
# Plotte die Lernkurven
plt.figure(figsize=(12, 6))

# Plotte die Genauigkeit
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training')
plt.plot(history.history['val_accuracy'], label='Validation')
plt.title('Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

# Plotte den Loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training')
plt.plot(history.history['val_loss'], label='Validation')
plt.title('Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()


## Mit BatchNormalization

In [None]:
def create_compile_model(learning_rate, conv2d_filters, dense_units, dropout_rate):
    # Initialize the MobileNetV3Large model with fixed layers
    base_model = MobileNetV3Large(weights='imagenet', include_top=False, input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3))

    # Unfreeze some of the layers for fine-tuning
    for layer in base_model.layers[:100]:
        layer.trainable = False
    for layer in base_model.layers[100:]:
        layer.trainable = True

    # Add custom layers on top
    x = layers.Conv2D(conv2d_filters, (3, 3), activation='relu')(base_model.output)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(dropout_rate)(x)
    x = layers.Flatten()(x)
    x = layers.Dense(dense_units, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(dropout_rate)(x)
    x = layers.Dense(7, activation='softmax')(x)

    # Create the full model
    model = Model(inputs=base_model.input, outputs=x)

    # Compile the model
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model


# Optimizing Augmentation

## **Build Model:**

In [None]:
# Initialize the MobileNetV3Large model
base_model = MobileNetV3Large(weights='imagenet', include_top=False, input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3))

# Unfreeze some of the layers for fine-tuning
for layer in base_model.layers[:100]:
    layer.trainable = False
for layer in base_model.layers[100:]:
    layer.trainable = True


# Add custom layers on top
x = layers.Conv2D(128, (3, 3), activation='relu')(base_model.output)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.3)(x)
x = layers.Flatten()(x)
x = layers.Dense(128, activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(7, activation='softmax')(x)


# Create the full model
model = Model(inputs=base_model.input, outputs=x, name="MobilneNetV3Large_pretrained-weights_fixed-layers_custom-conv2D")


## Parameter Grid: Data Preprocessing and Augmentation, Model

In [None]:

import os

# Beste Metrik initialisieren
best_val_recall = 0.0
best_params = None
best_model_path = "best_model_recall.h5"

# Bestehende Erkenntnisse laden
learning_rate = 0.01
batch_size = 128



# Modell-Kompilierung (vor der Schleife, da die Lernrate konstant bleibt)
model.compile(
    optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=learning_rate),  # Deine feste Lernrate
    loss='categorical_crossentropy',
    metrics=[
        'accuracy',
        Precision(name='precision'),
        Recall(name='recall'),
        AUC(name='auc')
    ]
)

# Erstelle eine Liste aller Kombinationen der Hyperparameter
grid = ParameterGrid(param_grid)
total_iterations = len(grid)  # Gesamtanzahl der Iterationen
iteration = 1  # Aktuelle Iteration

# Durchlaufe jede Kombination der Hyperparameter
for params in grid:
    print(f"Training with params: {params}")
    print(f"Iteration {iteration} of {total_iterations}")
    
    # Definiere den TensorBoard-Log-Pfad für diese Hyperparameter-Kombination
#    log_dir = f"./tensorboard_logs/{params}"
#    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

    # Erstelle einen neuen ImageDataGenerator mit den aktuellen Parametern
    datagen_train = ImageDataGenerator(
        rescale=1.0 / 255.0,
        preprocessing_function=preprocess_input,
        rotation_range=params['rotation_range'],
        width_shift_range=params['width_shift_range'],
        height_shift_range=params['height_shift_range'],
        zoom_range=params['zoom_range'],
        horizontal_flip=params['horizontal_flip'],
        vertical_flip=params['vertical_flip'],
        fill_mode='nearest'
    )
    
    train_data_generator = datagen_train.flow_from_dataframe(
        dataframe=train_df,
        directory=FILEPATH_JPGS,
        x_col="image_id",
        y_col=TARGET_LABEL,
        class_mode="categorical",
        target_size=IMAGE_SIZE,
        batch_size=batch_size
    )
    
    datagen_validation = ImageDataGenerator(
        rescale=1.0 / 255.0, #see above
        preprocessing_function=preprocess_input
    )

    validation_generator = datagen_validation.flow_from_dataframe(
        dataframe=validation_df,
        directory=FILEPATH_JPGS,
        x_col="image_id",
        y_col=TARGET_LABEL,
        class_mode="categorical",
        target_size=IMAGE_SIZE,
        batch_size=batch_size
    )

    # Trainiere das Modell mit den aktuellen Data Augmentation Parametern
    # (Der Rest des Trainingscodes bleibt gleich)
    # Train the model

    history = model.fit(
        train_data_generator,
        epochs=NUM_EPOCHS,
        verbose=1,                      # Adjust verbosity level
        batch_size=batch_size,                # Set the batch size, default is 32, can be increased to speed up training, but memory consumption increases
        callbacks=[early_stopping, reduce_lr],                 # removed tensorboard_callback
        validation_split=0.0,           # not needed as we use a validation data generator
        validation_data=validation_generator,
        shuffle=True,                   # Shuffle the training data before each epoch
        sample_weight=None,             # Set the weights for the train data set !
        class_weight=None,              # Set the weights for the classes, not needed if we use sample weights
        initial_epoch=0,                # Use this to continue training from a specific epoch
        steps_per_epoch=None,           # Set the number of steps per epoch, default is len(x_train) // batch_size
        validation_steps=None,          # Set the number of steps for validation, default is len(x_val) // batch_size
        validation_batch_size=None,     # Set the batch size for validation, default is batch_size
        validation_freq=1,              # Only relevant if validation data is a generator. Set the frequency to validate the model on the validation set
        max_queue_size=10,              # Set the max size for the generator queue
        workers=-1,                     # Set the max number of processes to generate the data in parallel, -1 means all CPUs
        use_multiprocessing=True       # Set to True if you use a generator in parallel, e.g. model.predict_generator()
    )

    # Überprüfe, ob dieses Modell besser ist als das bisher beste
    final_val_recall = history.history['val_recall'][-1]
    if final_val_recall > best_val_recall:
        print(f"New best model found! val_recall: {final_val_recall}")
        best_val_recall = final_val_recall
        best_params = params

        # Speichere das beste Modell
        model.save(best_model_path)

    iteration += 1  # Aktualisiere die Iterationszählung



In [None]:
# Lade das beste Modell
best_model = load_model(best_model_path)


print(f"Best val_recall: {best_val_recall}")
print(f"Best params: {best_params}")
print(f"Best model path: {best_model_path}")
print(f"Best model name: {best_model.name}")
print(f"Best model optimizer: {best_model.optimizer}")
print(f"Best model metrics: {best_model.metrics_names}")
print(f"Best model loss & metrics: {best_model.evaluate(validation_generator)}")
print(f"Best model summary: {best_model.summary()}")
print(f"Best model layers: {best_model.layers}")
best_model_weights = best_model.get_weights()
print(f"Best model weights: {best_model_weights}")
print(f"Best model history: {best_model.history}")

# sns heatmap for confusion matrix of best model
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

# Predict the values from the validation dataset
Y_pred = best_model.predict(validation_generator)
# Convert predictions classes to one hot vectors
Y_pred_classes = np.argmax(Y_pred, axis=1)
# Convert validation observations to one hot vectors
Y_true = validation_generator.classes
# compute the confusion matrix
confusion_mtx = confusion_matrix(Y_true, Y_pred_classes)


# print classification report 
print('Classification Report')
target_names = ['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc']
print(classification_report(validation_generator.classes, Y_pred_classes, target_names=target_names))



In [None]:
# plot the confusion matrix
f, ax = plt.subplots(figsize=(8, 8))
sns.heatmap(confusion_mtx, annot=True, linewidths=0.01, cmap="Greens", linecolor="gray", fmt='.1f', ax=ax)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
#subtitle what is swhon in confusion matrix
plt.text(0.5, 0.5, "", horizontalalignment='center', verticalalignment='center', fontsize=18, fontweight='bold')
plt.show()

In [None]:
target_names = ['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc']

#plot roc-auc curve for best model
from sklearn.metrics import roc_curve, auc

# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(len(target_names)):
    fpr[i], tpr[i], _ = roc_curve(np.array(validation_generator.classes)[:, i], Y_pred[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot of a ROC curve for a specific class
for i in range(len(target_names)):
    plt.figure()
    plt.plot(fpr[i], tpr[i], label='ROC curve (area = %0.2f)' % roc_auc[i], color='green')
    plt.plot([0, 1], [0, 1], 'k--', color='red')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate or (1 - Specifity)')
    plt.ylabel('True Positive Rate or (Sensitivity)')
    plt.title('Receiver Operating Characteristic for ' + target_names[i])
    plt.legend(loc="lower right")
    plt.show()



# plot precision-recall curve for each class and iso-f1 curves
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import average_precision_score
from sklearn.metrics import f1_score

# For each class
precision = dict()
recall = dict()
f1 = dict()
average_precision = dict()
for i in range(len(target_names)):
    precision[i], recall[i], _ = precision_recall_curve(validation_generator.classes[:, i], Y_pred[:, i])
    average_precision[i] = average_precision_score(validation_generator.classes[:, i], Y_pred[:, i])
    f1[i] = f1_score(validation_generator.classes[:, i], Y_pred_classes[:, i])

# A "micro-average": quantifying score on all classes jointly
precision["micro"], recall["micro"], _ = precision_recall_curve(validation_generator.classes.ravel(), Y_pred.ravel())
average_precision["micro"] = average_precision_score(validation_generator.classes, Y_pred, average="micro")
f1["micro"] = f1_score(validation_generator.classes, Y_pred_classes, average="micro")
print('Average precision score, micro-averaged over all classes: {0:0.2f}'.format(average_precision["micro"]))
print('Average F1 score, micro-averaged over all classes: {0:0.2f}'.format(f1["micro"]))
plt.figure()
plt.step(recall['micro'], precision['micro'], where='post', color='green')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0])
plt.title('Average precision score, micro-averaged over all classes: AP={0:0.2f}'.format(average_precision["micro"]))
plt.show()



In [None]:
# plot all grid params against val_recall, val_precision, val_accuracy, val_auc, val_loss,f1
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

# Create a dataframe from the grid search results
df_grid_aug = pd.DataFrame(grid)

# Add the validation metrics to the dataframe
df_grid_aug['val_recall'] = history.history['val_recall']
df_grid_aug['val_precision'] = history.history['val_precision']
df_grid_aug['val_accuracy'] = history.history['val_accuracy']
df_grid_aug['val_auc'] = history.history['val_auc']
df_grid_aug['val_loss'] = history.history['val_loss']

# calculate the f1 score for the validation data as a metric and add it to the dataframe
df_grid_aug['val_f1'] = 2 * (df_grid_aug['val_precision'] * df_grid_aug['val_recall']) / (df_grid_aug['val_precision'] + df_grid_aug['val_recall'])

#show the dataframe in better optical order and sortable with jupyter ipython format, bad values in red good in green
from IPython.display import display
def color_negative_red(val):
    color = 'red' if val < 0.5 else 'green'
    return 'color: %s' % color

df_grid_aug = df_grid_aug.style.applymap(color_negative_red, subset=['val_recall', 'val_precision', 'val_accuracy', 'val_auc', 'val_loss', 'val_f1'])
display(df_grid_aug)

In [None]:
df_grid_aug.head()

In [None]:
df_grid_aug = df_grid_aug[['rotation_range', 'width_shift_range', 'height_shift_range', 'zoom_range', 'horizontal_flip', 'vertical_flip', 'val_recall', 'val_precision', 'val_accuracy', 'val_auc', 'val_loss', 'val_f1']]
df_grid_aug.sort_values(by=['val_f1'], ascending=False, inplace=True)
print(df_grid_aug.head(20))

In [None]:
#export the dataframe to csv
df_grid_aug.to_csv(FILEPATH_OUTPUT+"df_grid_aug.csv", index=False)

In [None]:
#Define df_grid as df_grid_aug
df_grid = df_grid_aug

# Plot all validation metrics for each parameter combination in one figure grid of 3x2
# Set the figure size
plt.figure(figsize=(15, 10))

# Lineplot the validation recall and train recall
plt.subplot(2, 3, 1)
sns.lineplot(data=df_grid, x='rotation_range', y='val_recall', label='val_recall')
sns.lineplot(data=df_grid, x='rotation_range', y='recall', label='train_recall')
plt.title('Recall')
plt.xlabel('rotation_range')
plt.ylabel('Recall')
plt.legend(loc='upper left')

# Lineplot the validation precision and train precision
plt.subplot(2, 3, 2)
sns.lineplot(data=df_grid, x='rotation_range', y='val_precision', label='val_precision')
sns.lineplot(data=df_grid, x='rotation_range', y='precision', label='train_precision')
plt.title('Precision')
plt.xlabel('rotation_range')
plt.ylabel('Precision')
plt.legend(loc='upper left')

# Lineplot the validation accuracy and train accuracy
plt.subplot(2, 3, 3)
sns.lineplot(data=df_grid, x='rotation_range', y='val_accuracy', label='val_accuracy')
sns.lineplot(data=df_grid, x='rotation_range', y='accuracy', label='train_accuracy')
plt.title('Accuracy')
plt.xlabel('rotation_range')
plt.ylabel('Accuracy')
plt.legend(loc='upper left')

# Lineplot the validation auc and train auc
plt.subplot(2, 3, 4)
sns.lineplot(data=df_grid, x='rotation_range', y='val_auc', label='val_auc')
sns.lineplot(data=df_grid, x='rotation_range', y='auc', label='train_auc')
plt.title('AUC')
plt.xlabel('rotation_range')
plt.ylabel('AUC')
plt.legend(loc='upper left')

# Lineplot the validation loss and train loss
plt.subplot(2, 3, 5)
sns.lineplot(data=df_grid, x='rotation_range', y='val_loss', label='val_loss')
sns.lineplot(data=df_grid, x='rotation_range', y='loss', label='train_loss')
plt.title('Loss')
plt.xlabel('rotation_range')
plt.ylabel('Loss')
plt.legend(loc='upper left')

# Lineplot the validation f1 and train f1
plt.subplot(2, 3, 6)
sns.lineplot(data=df_grid, x='rotation_range', y='val_f1', label='val_f1')
sns.lineplot(data=df_grid, x='rotation_range', y='f1', label='train_f1')
plt.title('F1')
plt.xlabel('rotation_range')
plt.ylabel('F1')
plt.legend(loc='upper left')

#add the fixed learning rate to the grid title and batch size and Model technique
plt.suptitle(f"Learning rate: {learning_rate}, batch size: {batch_size}, MobilNetV3Large_pretrained-weights_Partially_fixed-layers_custom-conv2D")


# Adjust the layout
plt.tight_layout()

plt.show()






In [None]:
# show which grid params are the best for val_recall, val_precision, val_accuracy, val_auc, val_loss, f1
# Sort the dataframe by the validation recall
df_grid.sort_values(by='val_recall', ascending=False, inplace=True)

# Print the top 5 rows
print(df_grid.head())

# Sort the dataframe by the validation precision
df_grid.sort_values(by='val_precision', ascending=False, inplace=True)

# Print the top 5 rows
print(df_grid.head())

# Sort the dataframe by the validation accuracy
df_grid.sort_values(by='val_accuracy', ascending=False, inplace=True)

# Print the top 5 rows
print(df_grid.head())

# Sort the dataframe by the validation auc
df_grid.sort_values(by='val_auc', ascending=False, inplace=True)

# Print the top 5 rows
print(df_grid.head())

# Sort the dataframe by the validation loss
df_grid.sort_values(by='val_loss', ascending=True, inplace=True)

# Print the top 5 rows
print(df_grid.head())

# Sort the dataframe by the validation f1
df_grid.sort_values(by='val_f1', ascending=False, inplace=True)

# Print the top 5 rows
print(df_grid.head())



In [None]:
#scatterplot val_AUC and val_recall for all grid params from df_grid with a for loop
# Set the figure size
plt.figure(figsize=(15, 10))

# Scatterplot the validation AUC and validation recall
plt.subplot(2, 3, 1)
sns.scatterplot(data=df_grid, x='val_auc', y='val_recall', hue='rotation_range')
plt.title('AUC vs. Recall')
plt.xlabel('AUC')
plt.ylabel('Recall')
plt.legend(loc='upper left')

# Scatterplot the validation AUC and validation precision
plt.subplot(2, 3, 2)
sns.scatterplot(data=df_grid, x='val_auc', y='val_precision', hue='rotation_range')
plt.title('AUC vs. Precision')
plt.xlabel('AUC')
plt.ylabel('Precision')
plt.legend(loc='upper left')

# Scatterplot the validation AUC and validation accuracy
plt.subplot(2, 3, 3)

sns.scatterplot(data=df_grid, x='val_auc', y='val_accuracy', hue='rotation_range')
plt.title('AUC vs. Accuracy')
plt.xlabel('AUC')
plt.ylabel('Accuracy')
plt.legend(loc='upper left')

# Scatterplot the validation AUC and validation loss
plt.subplot(2, 3, 4)
sns.scatterplot(data=df_grid, x='val_auc', y='val_loss', hue='rotation_range')
plt.title('AUC vs. Loss')
plt.xlabel('AUC')
plt.ylabel('Loss')
plt.legend(loc='upper left')

# Scatterplot the validation AUC and validation f1
plt.subplot(2, 3, 5)
sns.scatterplot(data=df_grid, x='val_auc', y='val_f1', hue='rotation_range')
plt.title('AUC vs. F1')
plt.xlabel('AUC')
plt.ylabel('F1')
plt.legend(loc='upper left')

# empty subplot
plt.subplot(2, 3, 6)
plt.axis('off')

# Add the fixed learning rate to the grid title and batch size and Model technique
plt.suptitle(f"Learning rate: {learning_rate}, batch size: {batch_size}, MobilNetV3Large")


# Adjust the layout
plt.tight_layout()

plt.show()


In [None]:
from datetime import datetime
timestamp = datetime.now()
model_path = f"{FILEPATH_OUTPUT}model_bjzim_MobileNetV3Large{timestamp}.h5"
model.save(model_path)

