In [None]:
# ---- Importiere benötigte Bibliotheken ----
import numpy as np
import pandas as pd
from datetime import datetime
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix, roc_curve, auc
from sklearn.model_selection import ParameterGrid
from tensorflow.keras import layers, Model, regularizers, callbacks
from tensorflow.keras.applications import MobileNetV3Large
from tensorflow.keras.applications.mobilenet_v3 import preprocess_input
from tensorflow.keras.optimizers.legacy import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, Callback
from tensorflow.keras.layers import BatchNormalization, GlobalAveragePooling2D, Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Dropout
from sklearn.metrics import classification_report, roc_auc_score, roc_curve, auc, recall_score, precision_score, f1_score, confusion_matrix

In [None]:
# ---- Setze Konstanten und Parameter ----
SEED = 42
NUM_EPOCHS = 15
BATCH_SIZE = 64
IMAGE_SIZE = (224, 224)
TARGET_LABEL = "dx"
BALANCE_LABEL = "dx"
FILEPATH_JPGS = './../data/jpgs/'
FILEPATH_PROCESSED = './../data/processed/'
FILEPATH_OUTPUT = './../data/bjzim-models/'

In [None]:
# ---- Initialisiere sonstige Variablen ----
pbar = None

In [None]:
# ---- Dateipfade und Set-Namen ----
filepaths = [
    ("Trainingsset", FILEPATH_PROCESSED + "train_from_Metadata_processed.csv"),
    ("Validierungsset", FILEPATH_PROCESSED + "validation_from_Metadata_processed.csv"),
    ("Testset", FILEPATH_PROCESSED + "test_from_Metadata_processed.csv")
]

In [None]:
# ---- Funktion zur Überprüfung von augmentierten Daten ----
def check_augmented_data(df, set_name):
    if df['image_id'].str.startswith('aug_').any():
        print(f"Warnung: Augmentierte Daten im {set_name} gefunden.")

# ---- Überprüfung ----
for set_name, filepath in filepaths:
    df = pd.read_csv(filepath)
    check_augmented_data(df, set_name)

In [None]:
train_df = pd.read_csv(FILEPATH_PROCESSED+"train_from_Metadata_processed.csv")
validation_df = pd.read_csv(FILEPATH_PROCESSED+"validation_from_Metadata_processed.csv")
test_df = pd.read_csv(FILEPATH_PROCESSED+"test_from_Metadata_processed.csv")

In [None]:
# Define your augmentation parameters


aug_params_recall = {
    'height_shift_range': 0.1,
    'horizontal_flip': True,
    'rotation_range': 0,
    'vertical_flip': True,
    'width_shift_range': 0.2,
    'zoom_range': 0.05
}

# aug_params_recall = {
#     'height_shift_range': 0.05,
#     'horizontal_flip': False,
#     'rotation_range': 30,
#     'vertical_flip': True,
#     'width_shift_range': 0.2,
#     'zoom_range': 0.05
# }
# Create a grid of hyperparameters to search
param_grid = {
    'learning_rate': [0.001, 0.0001],
    'conv2d_filters': [128],
    'dense_units': [64, 128],
    'dropout_rate': [0.5, 0.7],
    'batch_size': [64],
    'optimizer': ['Adam'],
    'weight_regularization': ['l2']
}

In [None]:
datagen_train = ImageDataGenerator(
    rescale=1.0 / 255.0,
    preprocessing_function=preprocess_input,
    rotation_range=aug_params_recall['rotation_range'],
    width_shift_range=aug_params_recall['width_shift_range'],
    height_shift_range=aug_params_recall['height_shift_range'],
    zoom_range=aug_params_recall['zoom_range'],
    horizontal_flip=aug_params_recall['horizontal_flip'],
    vertical_flip=aug_params_recall['vertical_flip'],
    fill_mode='nearest'
)

train_data_generator = datagen_train.flow_from_dataframe(
    dataframe=train_df,
    directory=FILEPATH_JPGS,
    x_col="image_id",
    y_col=TARGET_LABEL,
    class_mode="categorical",
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE
)

datagen_validation = ImageDataGenerator(
    rescale=1.0 / 255.0, #see above
    preprocessing_function=preprocess_input
)

validation_generator = datagen_validation.flow_from_dataframe(
    dataframe=validation_df,
    directory=FILEPATH_JPGS,
    x_col="image_id",
    y_col=TARGET_LABEL,
    class_mode="categorical",
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE
)

In [None]:
early_stopping = EarlyStopping(
    monitor='val_f1',
    mode='max',
    patience=8, #20,15,
    verbose=1,
    restore_best_weights=True
)


In [None]:
reduce_lr = ReduceLROnPlateau(
    monitor='val_f1',
    mode='max',
    factor=0.5, #0.1,
    patience=3, #12,8,
    verbose=1,
    min_lr=1e-6
)

In [None]:


model_checkpoint = ModelCheckpoint(
    'model_best_weights.h5', 
    save_best_only=True, 
    save_weights_only=True, 
    monitor='val_f1', 
    mode='max', 
    verbose=1
)

## Mit GlobalAveragepooling

## Tuning

In [None]:
best_models = []

In [None]:
class CustomMetrics(Callback):
    def __init__(self, validation_generator):
        super().__init__()
        self.validation_generator = validation_generator

    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        val_preds = np.argmax(self.model.predict(self.validation_generator), axis=1)
        val_true = self.validation_generator.classes
        val_recall = recall_score(val_true, val_preds, average='weighted')
        val_f1 = f1_score(val_true, val_preds, average='weighted')
        val_auc = roc_auc_score(val_true, self.model.predict(self.validation_generator), multi_class='ovr', average='weighted')
        logs['val_recall'] = val_recall
        logs['val_f1'] = val_f1
        logs['val_auc'] = val_auc
        print(f" - val_recall: {val_recall: .5f} - val_f1: {val_f1: .5f} - val_auc: {val_auc: .5f}")
        print("-----------------------------------------------------------------------------------")

In [None]:
def train_evaluate_model(params, train_df, validation_df, FILEPATH_JPGS, TARGET_LABEL, IMAGE_SIZE, BATCH_SIZE):

    base_model = MobileNetV3Large(weights='imagenet', include_top=False, input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3))

    # Unfreeze some of the 269 layers for fine-tuning
    for layer in base_model.layers[:150]:
        layer.trainable = False
    for layer in base_model.layers[150:]:
        layer.trainable = True

    # Regularization
    reg_type = params.get('weight_regularization', None)
    if reg_type == 'l1':
        reg = regularizers.l1(0.01)
    elif reg_type == 'l2':
        reg = regularizers.l2(0.01)
    else:
        reg = None
    
    x = layers.GlobalAveragePooling2D()(base_model.output)
    x = layers.Dense(params['dense_units'], activation='relu', kernel_regularizer=reg)(x)
    x = layers.Dropout(params['dropout_rate'])(x)
    x = layers.Dense(params['dense_units'] // 2, activation='relu', kernel_regularizer=reg)(x)  # Zusätzlicher Dense-Layer
    x = layers.Dropout(params['dropout_rate'])(x)  # Zusätzlicher Dropout-Layer
    x = layers.Dense(7, activation='softmax')(x)
    
    model = Model(inputs=base_model.input, outputs=x)

    model.compile(optimizer=Adam(learning_rate=params['learning_rate']), 
                  loss='categorical_crossentropy', 
                  metrics=['accuracy'])

    # Define data generators with augmentation
    datagen_train = ImageDataGenerator(
        rescale=1.0 / 255.0,
        preprocessing_function=preprocess_input,
        rotation_range=aug_params_recall['rotation_range'],
        width_shift_range=aug_params_recall['width_shift_range'],
        height_shift_range=aug_params_recall['height_shift_range'],
        zoom_range=aug_params_recall['zoom_range'],
        horizontal_flip=aug_params_recall['horizontal_flip'],
        vertical_flip=aug_params_recall['vertical_flip'],
        fill_mode='nearest'
    )

    datagen_validation = ImageDataGenerator(
        rescale=1.0 / 255.0,
        preprocessing_function=preprocess_input
    )

    train_generator = datagen_train.flow_from_dataframe(
        dataframe=train_df,
        directory=FILEPATH_JPGS,
        x_col="image_id",
        y_col=TARGET_LABEL,
        target_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
        class_mode="categorical",
        shuffle=True,
        seed=SEED
    )

    validation_generator = datagen_validation.flow_from_dataframe(
        dataframe=validation_df,
        directory=FILEPATH_JPGS,
        x_col="image_id",
        y_col=TARGET_LABEL,
        target_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
        class_mode="categorical",
        shuffle=False,
        seed=SEED
    )

    custom_metrics = CustomMetrics(validation_generator=validation_generator)

    # Train the model with callbacks
    history = model.fit(
        train_generator,
        validation_data=validation_generator,
        epochs=NUM_EPOCHS,
        callbacks=[custom_metrics, early_stopping, reduce_lr, model_checkpoint],
        verbose=1
    )
    

    # Calculate F1 score
    val_preds = model.predict(validation_generator)
    val_true_labels = validation_generator.labels  # Änderung hier
    val_pred_labels = np.argmax(val_preds, axis=1)  # Neue Zeile
    f1 = f1_score(val_true_labels, val_pred_labels, average='weighted')

    return model, f1, history


In [None]:
from tqdm import tqdm
from datetime import datetime

# Initialize variables
results_df = pd.DataFrame(columns=['learning_rate', 'conv2d_filters', 'dense_units', 'dropout_rate', 'val_f1'])
best_models, completed_iterations = [], 0
total_iterations = len(ParameterGrid(param_grid))

def print_remaining_iterations(total, completed):
    return f"{total - completed} iterations remaining"

# Main loop for hyperparameter tuning
pbar = tqdm(total=total_iterations, desc="Hyperparameter Optimization")
for params in tqdm(ParameterGrid(param_grid), total=total_iterations, desc="Hyperparameter Optimization"):
    model, f1, _ = train_evaluate_model(params, train_df, validation_df, FILEPATH_JPGS, TARGET_LABEL, IMAGE_SIZE, BATCH_SIZE)

    # Update results DataFrame and best models list
    results_df = results_df.append({**params, 'val_f1': f1}, ignore_index=True)
    best_models = sorted(best_models + [(f1, model)], key=lambda x: x[0], reverse=True)[:5]
    
    # Progress update
    completed_iterations += 1
    print(print_remaining_iterations(total_iterations, completed_iterations))
    pbar.update(1)
pbar.close()

# Save the top 5 models and results DataFrame
for i, (f1, model) in enumerate(best_models):
    model.save(f'best_model_{i + 1}_{datetime.now().strftime("%Y%m%d-%H%M%S")}.h5')
    print(f"Saved best_model_{i + 1} with F1: {f1}")
    
results_df.to_csv(FILEPATH_OUTPUT + 'hyperparameter_tuning_results.csv', index=False)

print("Best F1 Scores:", [f1 for f1, _ in best_models])


In [None]:
# show me results sorted by val_f1
results_df


In [None]:
len(model.layers)

In [None]:
# Plotte die Lernkurven
plt.figure(figsize=(12, 6))

# Plotte die Genauigkeit
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training')
plt.plot(history.history['val_accuracy'], label='Validation')
plt.title('Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

# Plotte den Loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training')
plt.plot(history.history['val_loss'], label='Validation')
plt.title('Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()
