In [21]:
from tensorflow.keras.utils import to_categorical


In [24]:
import os
import cv2
import numpy as np
import pandas as pd
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report, roc_auc_score, f1_score, average_precision_score

In [33]:
# Paths in Google Drive
image_folder = '/content/drive/MyDrive/KCDH2024_Test_Input/KCDH2024_Test_Input'
train_ground_truth_path = '/content/TrainingGroundTruth.xlsx'
test_ground_truth_path = '/content/TestGroundTruth.xlsx'
train_lesion_groupings_path = '/content/TrainingLesionGroupings.xlsx'

In [26]:

# Load and preprocess images
def load_and_preprocess_image(image_path, target_size=(128, 128)):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, target_size)
    image = image / 255.0  # Normalize to [0, 1]
    return image

def create_image_array(image_paths, target_size=(128, 128)):
    images = np.array([load_and_preprocess_image(os.path.join(image_folder, path), target_size) for path in image_paths])
    return images


In [28]:
# Load the data
train_ground_truth = pd.read_excel(train_ground_truth_path)
test_ground_truth = pd.read_excel(test_ground_truth_path)

# Print column names
print("Train Ground Truth Columns:", train_ground_truth.columns)
print("Test Ground Truth Columns:", test_ground_truth.columns)


Train Ground Truth Columns: Index(['image', 'MEL', 'NV', 'BCC', 'AKIEC', 'BKL', 'DF', 'VASC'], dtype='object')
Test Ground Truth Columns: Index(['image', 'MEL', 'NV', 'BCC', 'AKIEC', 'BKL', 'DF', 'VASC'], dtype='object')


In [35]:

# Prepare image paths
train_image_paths = [f'{id}.jpg' for id in train_ground_truth['image']]
test_image_paths = [f'{id}.jpg' for id in test_ground_truth['image']]


In [31]:
# Load and preprocess images
def load_and_preprocess_image(image_path, target_size=(128, 128)):
    full_path = os.path.join(image_folder, image_path)
    if not os.path.isfile(full_path):
        print(f"Warning: File {full_path} not found.")
        return np.zeros((target_size[0], target_size[1], 3))  # Return an empty image if file is not found
    image = cv2.imread(full_path)
    if image is None:
        print(f"Error loading image {full_path}.")
        return np.zeros((target_size[0], target_size[1], 3))  # Return an empty image if loading fails
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, target_size)
    image = image / 255.0  # Normalize to [0, 1]
    return image

def create_image_array(image_paths, target_size=(128, 128)):
    images = []
    for path in image_paths:
        image = load_and_preprocess_image(path, target_size)
        images.append(image)
    return np.array(images)


In [None]:


# Load and preprocess images
X_train_images = create_image_array(train_image_paths)
X_test_images = create_image_array(test_image_paths)

# Load and prepare labels
train_labels = train_ground_truth['dx']
test_labels = test_ground_truth['dx']

# Encode labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)
test_labels_encoded = label_encoder.transform(test_labels)

# Convert to categorical
categorical_train_labels = to_categorical(train_labels_encoded)
categorical_test_labels = to_categorical(test_labels_encoded)

print(f'Number of classes: {len(label_encoder.classes_)}')

In [None]:
# Build the model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(len(label_encoder.classes_), activation='softmax')  # Output layer for number of classes
])

model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
early_stopping = EarlyStopping(monitor='val_loss', patience=3)
history = model.fit(X_train_images, categorical_train_labels, validation_split=0.2, epochs=20, callbacks=[early_stopping])

# Evaluate the model
test_predictions = model.predict(X_test_images)
test_predictions_labels = np.argmax(test_predictions, axis=1)

# Calculate metrics
test_labels_true = np.argmax(categorical_test_labels, axis=1)
accuracy = np.mean(test_predictions_labels == test_labels_true)
f1 = f1_score(test_labels_true, test_predictions_labels, average='weighted')
auc = roc_auc_score(categorical_test_labels, test_predictions, multi_class='ovr')
map_score = average_precision_score(categorical_test_labels, test_predictions, average='weighted')

print(f'Accuracy: {accuracy}')
print(f'F1 Score: {f1}')
print(f'AUC: {auc}')
print(f'Mean Average Precision: {map_score}')

# Prepare predictions DataFrame
test_ground_truth['predicted_dx'] = label_encoder.inverse_transform(test_predictions_labels)
test_ground_truth.to_csv('/content/test_predictions.csv', index=False)