In [64]:
!pip install keras-tuner



In [65]:
from kerastuner.tuners import RandomSearch

In [66]:
import numpy as np
import pandas as pd
from PIL import Image
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.applications import MobileNetV3Large
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.mobilenet_v3 import preprocess_input
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
from tensorflow.keras import metrics  # Import metrics
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

In [67]:
# Konstanten und Einstellungen
SEED = 421
NUM_EPOCHS = 30
TARGET_LABEL = "dx_binary"
IMAGE_SIZE = (224, 224)
BATCH_SIZE = 64
FILEPATH_JPGS = './../data/jpgs/'
FILEPATH_PROCESSED="./../data/processed/"
FILEPATH_OUTPUT = './../data/jpgs/'  # Replace with your folder path



In [68]:
# Daten lesen
train_df = pd.read_csv(FILEPATH_PROCESSED + "train_from_Metadata_processed.csv")
validation_df = pd.read_csv(FILEPATH_PROCESSED + "validation_from_Metadata_processed.csv")
test_df = pd.read_csv(FILEPATH_PROCESSED + "test_from_Metadata_processed.csv")



In [69]:
train_df.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,dataset,dx_binary,image_path
0,HAM_0002681,ISIC_0029381.jpg,nv,follow_up,30.0,male,back,vidir_molemax,not_skin_cancer,./../data/jpgs/ISIC_0029381.jpg
1,HAM_0005320,ISIC_0025356.jpg,nv,follow_up,35.0,female,back,vidir_molemax,not_skin_cancer,./../data/jpgs/ISIC_0025356.jpg
2,HAM_0003724,ISIC_0025036.jpg,nv,follow_up,50.0,female,abdomen,vidir_molemax,not_skin_cancer,./../data/jpgs/ISIC_0025036.jpg
3,HAM_0006809,ISIC_0031690.jpg,nv,follow_up,40.0,male,abdomen,vidir_molemax,not_skin_cancer,./../data/jpgs/ISIC_0031690.jpg
4,HAM_0003443,ISIC_0032485.jpg,nv,follow_up,35.0,female,abdomen,vidir_molemax,not_skin_cancer,./../data/jpgs/ISIC_0032485.jpg


In [70]:
def center_crop_image(np_image: np.ndarray, target_size: tuple = (224, 224)) -> np.ndarray:
    """
    Center crop an image to a square and resize it to the target size.

    Args:
        np_image (np.ndarray): Image to be cropped in numpy array format.
        target_size (tuple): The target size to resize the cropped image, default is (224, 224).

    Returns:
        np.ndarray: Cropped and resized image in numpy array format.
    """

    # Convert the scaled numpy array to a PIL Image with original pixel values (0-255)
    image = Image.fromarray((np_image * 255).astype(np.uint8))

    # Get dimensions
    width, height = image.size

    # Calculate the dimensions of the cropped area (choose the shorter side)
    new_dimension = min(width, height)

    # Calculate cropping box
    left = (width - new_dimension) / 2
    top = (height - new_dimension) / 2
    right = (width + new_dimension) / 2
    bottom = (height + new_dimension) / 2

    # Crop and resize
    image = image.crop((left, top, right, bottom)).resize(target_size)

    # Convert back to scaled numpy array (0-1)
    np_image = np.array(image) / 255.0

    return np_image

In [71]:
def resize_as_preprocess(np_image: np.ndarray, image_size: tuple) -> np.ndarray:
    """
    Resize an image to the target size.

    Args:
        np_image (np.ndarray): The image to be resized, in numpy array format.
        image_size (tuple): The target size to resize the image to, format (width, height).

    Returns:
        np.ndarray: The resized image in numpy array format.
    """

    # Convert the scaled numpy array (0-1) to a PIL Image with original pixel values (0-255)
    image = Image.fromarray((np_image * 255).astype(np.uint8))

    # Resize the image
    # image = image.resize(image_size, Image.ANTIALIAS) This is old
    image = image.resize(image_size, Image.LANCZOS)


    # Convert back to a scaled numpy array (0-1)
    np_image = np.array(image) / 255.0

    return np_image

In [72]:
from tensorflow.keras.applications.mobilenet_v3 import preprocess_input

from tensorflow.keras.applications.mobilenet_v3 import preprocess_input

def custom_preprocessing(np_image: np.ndarray, image_size: tuple) -> np.ndarray:
    """
    Custom preprocessing function that combines center cropping, resizing, and MobileNetV3Large's preprocess_input.

    Args:
        np_image (np.ndarray): The image to be preprocessed, in numpy array format.
        image_size (tuple): The target size to resize the image to, format (width, height).

    Returns:
        np.ndarray: The preprocessed image in numpy array format.
    """

    # Step 1: Center crop the image to square format
    np_image = center_crop_image(np_image)

    # Step 2: Resize the image to the target dimensions
    np_image = resize_as_preprocess(np_image, image_size)

    # Step 3: Apply MobileNetV3Large's preprocess_input function
    np_image = preprocess_input(np_image)

    return np_image


In [73]:
def wrapped_custom_preprocessing(x):
    return custom_preprocessing(x, IMAGE_SIZE)

In [74]:
# Setting up the Image Data Generator for the train data set
datagen_train = ImageDataGenerator(
    rescale=1.0 / 255.0,  # Rescale pixel values to [0, 1], important for neural networks
    preprocessing_function=wrapped_custom_preprocessing  # Apply custom preprocessing
)

In [75]:
# Setting up the Image Data Generator for the validation data set
datagen_validation = ImageDataGenerator(
    rescale=1.0 / 255.0,  # Rescale pixel values to [0, 1], important for neural networks
    preprocessing_function=wrapped_custom_preprocessing  # Apply custom preprocessing
)

In [76]:
# Initialize the validation data generator using flow_from_dataframe method
validation_generator = datagen_validation.flow_from_dataframe(
    dataframe=validation_df,            # DataFrame containing the filepaths and labels
    directory=FILEPATH_JPGS,            # Path to the directory to read images from
    x_col="image_id",                   # Column in DataFrame containing the filepaths
    y_col=TARGET_LABEL,                 # Column in DataFrame containing the labels
    class_mode="categorical",           # Mode for yielding the labels (categorical for multi-class problems)
    target_size=IMAGE_SIZE,             # Target size for resizing the images
    batch_size=BATCH_SIZE               # Number of images to load at each iteration
)


Found 2003 validated image filenames belonging to 2 classes.


In [77]:
# Initialize the training data generator using the flow_from_dataframe method
train_data_generator = datagen_train.flow_from_dataframe(
    dataframe=train_df,                 # DataFrame containing the filepaths and labels
    directory=FILEPATH_JPGS,            # Path to the directory to read images from
    x_col="image_id",                   # Column in DataFrame containing the filepaths
    y_col=TARGET_LABEL,                 # Column in DataFrame containing the labels
    class_mode="categorical",           # Mode for yielding the labels (categorical for multi-class problems)
    target_size=IMAGE_SIZE,             # Target size for resizing the images
    batch_size=BATCH_SIZE               # Number of images to load at each iteration
)


Found 5256 validated image filenames belonging to 2 classes.


**Build Model:**

In [78]:
def build_model(hp):
    # Hyperparameter
    learning_rate = hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])
    dense_units = hp.Int('dense_units', min_value=32, max_value=512, step=32)

    # MobileNetV3Large Modell mit eingefrorenen Gewichtungen
    base_model = MobileNetV3Large(weights='imagenet', include_top=False, input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3))
    for layer in base_model.layers:
        layer.trainable = False

    # Anpassbare Schichten oben
    x = base_model.output
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(dense_units, activation='relu')(x)
    predictions = layers.Dense(train_df[TARGET_LABEL].nunique(), activation='softmax')(x)

    # Gesamtmodell erstellen
    model = Model(inputs=base_model.input, outputs=predictions)

    # Modell kompilieren
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='categorical_crossentropy',
                  metrics=['accuracy', metrics.Recall(), metrics.Precision(),
                           tf.keras.metrics.AUC(curve='PR', name='f1_score')])

    return model



In [79]:
from kerastuner.tuners import BayesianOptimization

# Bayesian Optimization
tuner = BayesianOptimization(
    build_model,
    objective='val_accuracy',
    max_trials=5,  # Reduzierte Anzahl der Trials
    num_initial_points=2,  # Anzahl der Randomisierungspunkte vor Beginn der Optimierung
    directory='"./../models/bjzim/"',
    project_name='MobileNetV3_HAM10000_Tuning_binary'
)

# Reduziere die Anzahl der Epochen
tuner.search(train_data_generator,
             epochs=10,  # Reduzierte Epochen
             validation_data=validation_generator,
             callbacks=[EarlyStopping(monitor='val_accuracy', patience=1)]  # Reduzierte Geduld
)

Trial 5 Complete [00h 01m 55s]
val_accuracy: 0.8047928214073181

Best val_accuracy So Far: 0.8047928214073181
Total elapsed time: 00h 05m 31s


In [80]:
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

In [81]:
model = tuner.hypermodel.build(best_hps)



In [82]:


# Definieren des PlotLearning Callbacks
class PlotLearning(tf.keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.metrics = {}
        if 'metrics' in self.params:
            for metric in self.params['metrics']:
                self.metrics[metric] = []

    def on_epoch_end(self, epoch, logs={}):
        for metric in self.params.get('metrics', []):
            if metric in logs:
                self.metrics[metric].append(logs[metric])

        # Plot the metrics
        f, ax = plt.subplots(1, len(self.metrics), figsize=(20, 5))
        f.suptitle('Epoch {}'.format(epoch))
        for i, metric in enumerate(self.metrics.keys()):
            ax[i].plot(self.metrics[metric])
            ax[i].set_title(metric)
        plt.show()

In [83]:
plot_learning = PlotLearning()

# Aktualisierte Callbacks
early_stopping = EarlyStopping(monitor='val_recall_10', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_recall_10', factor=0.2, patience=2, min_lr=0.0001)

callbacks_list = [early_stopping, reduce_lr, plot_learning]




In [84]:
from tensorflow.keras.optimizers import Adam

# Initialisiere den Adam-Optimizer mit einer benutzerdefinierten Lernrate
optimizer = Adam(learning_rate=0.001)  # Du kannst die Lernrate nach Bedarf anpassen

# Kompilieren des Modells
model.compile(optimizer=optimizer,
              loss='categorical_crossentropy',
              metrics=['accuracy', 'Recall', 'Precision',  # oder die genauen Namen der Metriken, wenn du benutzerdefinierte Metriken verwendest
                       tf.keras.metrics.AUC(curve='PR', name='f1_score')])



In [85]:
# Trainingsaufruf
history = model.fit(
    train_data_generator,
    epochs=NUM_EPOCHS,
    verbose=1,
    validation_data=validation_generator,
    shuffle=True,
    callbacks=callbacks_list,  # Verwende Callbacks für frühzeitiges Anhalten und Reduzieren der Lernrate
    workers=-1,
    use_multiprocessing=True  # Aktiviere dies, wenn du einen Generator parallel verwendest
)

Epoch 1/30








ValueError: Number of columns must be a positive integer, not 0

<Figure size 2000x500 with 0 Axes>

In [None]:
from sklearn.metrics import classification_report, accuracy_score

# Evaluate the model on the test set
test_generator = datagen_test.flow_from_dataframe(
    dataframe=test_df,
    directory=FILEPATH_JPGS,
    x_col="image_id",
    y_col=TARGET_LABEL,
    class_mode="categorical",
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    shuffle=False
)

# Get the true labels and predicted labels
true_labels = test_generator.classes
predictions = model.predict(test_generator)
predicted_labels = np.argmax(predictions, axis=1)

# Calculate and print the overall accuracy
overall_accuracy = accuracy_score(true_labels, predicted_labels)
print(f"Accuracy of the network on the test images: {overall_accuracy * 100:.2f} %")

# Calculate and print class-wise accuracies
class_labels = list(test_generator.class_indices.keys())
report = classification_report(true_labels, predicted_labels, target_names=class_labels)
print("Class-wise evaluation:")
print(report)


In [None]:
from tensorflow.keras.applications import ResNet50

# Initialize and compile ResNet model
resnet_base = ResNet50(weights='imagenet', include_top=False, input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3))
x = resnet_base.output
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(128, activation='relu')(x)
predictions = layers.Dense(train_df[TARGET_LABEL].nunique(), activation='softmax')(x)
resnet_model = Model(inputs=resnet_base.input, outputs=predictions)

resnet_model.compile(optimizer='adam',
                     loss='categorical_crossentropy',
                     metrics=['accuracy'])

# Train ResNet model
resnet_history = resnet_model.fit(train_data_generator, epochs=NUM_EPOCHS, validation_data=validation_data_generator, callbacks=callbacks_list)


In [None]:
# Get predictions from both models
mobilenet_preds = history.predict(validation_data_generator)
resnet_preds = resnet_model.predict(validation_data_generator)

# Average the predictions
avg_preds = (mobilenet_preds + resnet_preds) / 2

# Convert averaged predictions to class labels
final_preds = np.argmax(avg_preds, axis=1)


In [None]:
# Extract ground truth labels from validation data generator
ground_truth = validation_data_generator.classes


In [None]:
from sklearn.metrics import accuracy_score, classification_report

# Convert averaged predictions to class labels
final_preds = np.argmax(avg_preds, axis=1)

# Compute overall accuracy
ensemble_accuracy = accuracy_score(ground_truth, final_preds)
print(f'Ensemble Accuracy: {ensemble_accuracy * 100:.2f}%')

# Compute accuracy, precision, recall, and F1-score for each class
report = classification_report(ground_truth, final_preds, target_names=validation_data_generator.class_indices.keys())
print("Classification Report:")
print(report)
