### Creation of the RESNET Model using the Original Data

In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import cv2
import matplotlib.pyplot as plt
import albumentations as A
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.callbacks import LearningRateScheduler

# Updated Paths and parameters
image_folder = "../augment_data/augmented_images"
segmentation_folder = "../augment_data/augmented_masks"
metadata_file = "../augment_data/augmented_metadata.csv"

# Consistent parameters
image_size = 224
batch_size = 32
epochs = 100

class CustomImageGeneratorWithMask(tf.keras.utils.Sequence):
    def __init__(self, image_ids, labels, batch_size, label_encoder, augment=False):
        # Validate and filter image files
        self.valid_image_ids = []
        for image_id in image_ids:
            image_path = os.path.join(image_folder, f"{image_id}.jpg")
            mask_path = os.path.join(segmentation_folder, f"{image_id}_segmentation.png")
            
            if os.path.exists(image_path) and os.path.exists(mask_path):
                self.valid_image_ids.append(image_id)
            else:
                print(f"Warning: Missing files for {image_id}")
        
        self.image_ids = self.valid_image_ids
        self.labels = labels
        self.batch_size = batch_size
        self.label_encoder = label_encoder
        self.augment = augment

        # Updated Albumentations augmentation pipeline
        self.augmentation = A.Compose([
            A.RandomRotate90(p=0.5),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=15, p=0.5),
            A.RandomBrightnessContrast(p=0.3),
            A.GaussNoise(p=0.2)
        ])

    def __len__(self):
        return int(np.ceil(len(self.image_ids) / self.batch_size))

    def __getitem__(self, index):
        batch_image_ids = self.image_ids[index * self.batch_size:(index + 1) * self.batch_size]
        batch_original_images = []
        batch_segmented_images = []
        batch_labels = []

        for image_id in batch_image_ids:
            # Paths for image and segmentation mask
            image_path = os.path.join(image_folder, f"{image_id}.jpg")
            mask_path = os.path.join(segmentation_folder, f"{image_id}_segmentation.png")

            # Read original and mask images with error handling
            try:
                original_image = cv2.imread(image_path)
                if original_image is None:
                    print(f"Failed to read image: {image_path}")
                    continue
                
                original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
                
                mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
                if mask is None:
                    print(f"Failed to read mask: {mask_path}")
                    continue

                # Resize images
                original_image = cv2.resize(original_image, (image_size, image_size))
                mask = cv2.resize(mask, (image_size, image_size))

                # Apply augmentations if training
                if self.augment:
                    # Apply augmentation separately to image and mask
                    augmented_image = self.augmentation(image=original_image)['image']
                    augmented_mask = self.augmentation(image=mask)['image']
                    
                    original_image = augmented_image
                    mask = augmented_mask

                # Create segmented image
                mask_3ch = cv2.merge([mask] * 3)
                segmented_image = cv2.addWeighted(original_image, 0.8, mask_3ch, 0.2, 0)

                batch_original_images.append(original_image)
                batch_segmented_images.append(segmented_image)

                # Get label and encode
                label = self.labels.get(image_id, None)
                batch_labels.append(label)

            except Exception as e:
                print(f"Error processing {image_id}: {e}")
                continue

        # Preprocess images for MobileNetV2
        batch_original_images = preprocess_input(np.array(batch_original_images))
        batch_segmented_images = preprocess_input(np.array(batch_segmented_images))

        # Convert labels to categorical
        batch_labels = self.label_encoder.transform(batch_labels)
        batch_labels = tf.keras.utils.to_categorical(batch_labels, num_classes=len(self.label_encoder.classes_))

        return (batch_original_images, batch_segmented_images), batch_labels

def create_classwise_accuracy_metrics(num_classes):
    class_metrics = []
    for i in range(num_classes):
        def get_class_accuracy(i):
            def class_accuracy(y_true, y_pred):
                class_pred = tf.argmax(y_pred, axis=-1)
                class_true = tf.argmax(y_true, axis=-1)
                class_mask = tf.equal(class_true, i)
                class_acc = tf.reduce_mean(
                    tf.cast(tf.equal(class_pred[class_mask], tf.cast(class_mask[class_mask], tf.int64)), tf.float32)
                )
                return class_acc
            class_accuracy.__name__ = f'accuracy_class_{i}'
            return class_accuracy
        class_metrics.append(get_class_accuracy(i))
    return class_metrics

def step_decay_schedule(initial_lr=0.0001, decay_factor=0.5, step_size=10):
    def schedule(epoch):
        return initial_lr * (decay_factor ** np.floor(epoch / step_size))
    return LearningRateScheduler(schedule)

# Load metadata
metadata = pd.read_csv(metadata_file)
label_encoder = LabelEncoder()
label_encoder.fit(metadata["dx"])

# Create label dictionary
labels = dict(zip(metadata["image_id"], metadata["dx"]))

# Compute class weights
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=label_encoder.classes_,
    y=metadata["dx"].values
)
class_weights_dict = dict(enumerate(class_weights))

# Split the dataset
image_ids = metadata["image_id"].values
train_image_ids, val_image_ids = train_test_split(image_ids, test_size=0.2, random_state=42)

# Create data generators with augmentation
train_generator = CustomImageGeneratorWithMask(
    train_image_ids, labels, batch_size, label_encoder, augment=True
)
val_generator = CustomImageGeneratorWithMask(
    val_image_ids, labels, batch_size, label_encoder, augment=False
)

# Create ResNet50 models with unique names
base_model_original = ResNet50(
    weights="imagenet", 
    include_top=False, 
    input_shape=(image_size, image_size, 3),
    name="resnet50_original"
)

base_model_segmented = ResNet50(
    weights="imagenet", 
    include_top=False, 
    input_shape=(image_size, image_size, 3),
    name="resnet50_segmented"
)

# Freeze base models initially
base_model_original.trainable = False
base_model_segmented.trainable = False

# Define input layers
input_original = tf.keras.layers.Input(shape=(image_size, image_size, 3), name="original_input")
input_segmented = tf.keras.layers.Input(shape=(image_size, image_size, 3), name="segmented_input")

# Pass through separate ResNet50 models
features_original = base_model_original(input_original)
features_segmented = base_model_segmented(input_segmented)

# Concatenate features
combined_features = tf.keras.layers.Concatenate()([
    tf.keras.layers.GlobalAveragePooling2D()(features_original),
    tf.keras.layers.GlobalAveragePooling2D()(features_segmented)
])

# Add custom classification layers
x = tf.keras.layers.Dense(1024, activation="relu", 
                           kernel_regularizer=tf.keras.regularizers.l2(0.001))(combined_features)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Dropout(0.5)(x)
output = tf.keras.layers.Dense(
    len(label_encoder.classes_), 
    activation="softmax",
    kernel_regularizer=tf.keras.regularizers.l2(0.001)
)(x)

# Create and compile the model
model = tf.keras.Model(inputs=[input_original, input_segmented], outputs=output)

# Prepare metrics (including class-wise accuracy)
metrics = ['accuracy'] + create_classwise_accuracy_metrics(len(label_encoder.classes_))

# Compile model
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), 
    loss="categorical_crossentropy", 
    metrics=metrics
)

# Callbacks
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    "new_resnet_classification_model.keras", 
    save_best_only=False, 
    monitor="val_accuracy", 
    mode="max", 
    save_weights_only=False,
    verbose = 1
)
early_stopping_callback = tf.keras.callbacks.EarlyStopping(
    monitor="val_accuracy", 
    patience=15, 
    restore_best_weights=True
)
reduce_lr_callback = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss', 
    factor=0.5, 
    patience=7, 
    min_lr=0.000005
)

# Modify callbacks to include learning rate scheduler
lr_scheduler = step_decay_schedule(initial_lr=0.0001, decay_factor=0.5, step_size=10)

# Train the model
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=epochs,
    class_weight=class_weights_dict,
    callbacks=[checkpoint_callback, 
               early_stopping_callback, 
               reduce_lr_callback,
               lr_scheduler]
)

# Visualize training history
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('ResNet Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('ResNet Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.tight_layout()
plt.savefig('resnet_training_history.png')

# Save the final model
model.save("resnet_classification_model.keras")

# Print label mapping and class-wise metrics
print("\nLabel Mapping:")
print(dict(zip(label_encoder.transform(label_encoder.classes_), label_encoder.classes_)))

# Evaluate on validation set
val_metrics = model.evaluate(val_generator)
print("\nValidation Metrics:")
for metric_name, metric_value in zip(model.metrics_names, val_metrics):
    print(f"{metric_name}: {metric_value}")


2024-11-28 08:30:54.009435: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:266] failed call to cuInit: CUDA_ERROR_UNKNOWN: unknown error


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 0us/step
Epoch 1/100


  self._warn_if_super_not_called()


[1m1174/1174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.5816 - accuracy_class_0: nan - accuracy_class_1: nan - accuracy_class_2: nan - accuracy_class_3: nan - accuracy_class_4: nan - accuracy_class_5: nan - accuracy_class_6: nan - loss: 2.8658

  self._warn_if_super_not_called()



Epoch 1: saving model to new_resnet_classification_model.keras
[1m1174/1174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2618s[0m 2s/step - accuracy: 0.5816 - accuracy_class_0: nan - accuracy_class_1: nan - accuracy_class_2: nan - accuracy_class_3: nan - accuracy_class_4: nan - accuracy_class_5: nan - accuracy_class_6: nan - loss: 2.8655 - val_accuracy: 0.7968 - val_accuracy_class_0: nan - val_accuracy_class_1: 0.7893 - val_accuracy_class_2: nan - val_accuracy_class_3: nan - val_accuracy_class_4: nan - val_accuracy_class_5: nan - val_accuracy_class_6: nan - val_loss: 1.9029 - learning_rate: 1.0000e-04
Epoch 2/100
[1m1174/1174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.7382 - accuracy_class_0: nan - accuracy_class_1: nan - accuracy_class_2: nan - accuracy_class_3: nan - accuracy_class_4: nan - accuracy_class_5: nan - accuracy_class_6: nan - loss: 2.0117
Epoch 2: saving model to new_resnet_classification_model.keras
[1m1174/1174[0m [32m━━━━━━━━━━

### Testing the created RESNET Model

In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import load_model
from sklearn.model_selection import train_test_split
import cv2
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input


# Paths to necessary files
model_path = "../new_resnet_classification_model.keras"
metadata_file = "../augment_data/augmented_metadata.csv"
image_folder = "../augment_data/augmented_images"
segmentation_folder = "../augment_data/augmented_masks"

# Parameters
image_size = 224
batch_size = 32

# Load metadata and encode labels
metadata = pd.read_csv(metadata_file)
label_encoder = LabelEncoder()
label_encoder.fit(metadata["dx"])

# Create a dictionary of labels
labels = dict(zip(metadata["image_id"], metadata["dx"]))

# Split the dataset for validation
image_ids = metadata["image_id"].values
_, val_image_ids = train_test_split(image_ids, test_size=0.2, random_state=42)

class CustomImageGeneratorWithMask(tf.keras.utils.Sequence):
    def __init__(self, image_ids, labels, label_encoder):
        self.image_ids = image_ids
        self.labels = labels
        self.label_encoder = label_encoder

    def __len__(self):
        return int(np.ceil(len(self.image_ids) / batch_size))

    def __getitem__(self, index):
        batch_image_ids = self.image_ids[index * batch_size:(index + 1) * batch_size]
        original_images = []
        segmented_images = []
        batch_labels = []

        for image_id in batch_image_ids:
            image_path = os.path.join(image_folder, f"{image_id}.jpg")
            mask_path = os.path.join(segmentation_folder, f"{image_id}_segmentation.png")

            if os.path.exists(image_path) and os.path.exists(mask_path):
                # Original image
                original_image = cv2.imread(image_path)
                original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
                original_image = cv2.resize(original_image, (image_size, image_size))
                original_images.append(preprocess_input(original_image))

                # Segmented image (mask)
                mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
                mask = cv2.resize(mask, (image_size, image_size))
                mask = np.expand_dims(mask, axis=-1)  # Add channel dimension
                mask = np.concatenate([mask] * 3, axis=-1)  # Make it 3-channel
                segmented_images.append(preprocess_input(mask))

                # Label
                label = self.labels[image_id]
                batch_labels.append(label)

        original_images = np.array(original_images)
        segmented_images = np.array(segmented_images)
        batch_labels = label_encoder.transform(batch_labels)
        batch_labels = tf.keras.utils.to_categorical(batch_labels, num_classes=len(label_encoder.classes_))
        return [original_images, segmented_images], batch_labels


val_generator = CustomImageGeneratorWithMask(val_image_ids, labels, label_encoder)


# Recreate the custom metrics
def get_class_accuracy(i):
    def class_accuracy(y_true, y_pred):
        class_pred = tf.argmax(y_pred, axis=-1)
        class_true = tf.argmax(y_true, axis=-1)
        class_mask = tf.equal(class_true, i)
        class_acc = tf.reduce_mean(
            tf.cast(tf.equal(class_pred[class_mask], tf.cast(class_mask[class_mask], tf.int64)), tf.float32)
        )
        return class_acc
    class_accuracy.__name__ = f'accuracy_class_{i}'
    return class_accuracy

custom_objects = {
    f"accuracy_class_{i}": get_class_accuracy(i)
    for i in range(len(label_encoder.classes_))
}
# Load the model
model = load_model(model_path, custom_objects=custom_objects)

# Evaluate the model on the validation set
y_true = []
y_pred = []
y_prob = []


for (original_images, segmented_images), labels in val_generator:
    # Check for empty batches
    if original_images.shape[0] == 0 or segmented_images.shape[0] == 0 or labels.shape[0] == 0:
        print("Skipping empty batch and stopping the loop")
        break  # Stop the loop if an empty batch is found
    
    preds = model.predict([original_images, segmented_images])
    y_true.extend(np.argmax(labels, axis=1))
    y_prob.extend(preds)
    y_pred.extend(np.argmax(preds, axis=1))



2024-12-02 10:46:51.418409: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-12-02 10:46:51.419472: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-12-02 10:46:51.423063: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-12-02 10:46:51.433203: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1733136411.449646 3528669 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1733136411.45

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 640ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 635ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 636ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 625ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 627ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 611ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 637ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 664ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 644ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 611ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 601ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 623ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1