<a href="https://colab.research.google.com/github/AliciaFalconCaro/MedicalImageClassificationExample/blob/main/MedicalimageClassificationSVM_KNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

We will perform some basic medical image classification techniques.
The purpose is to compare traditional techniques with DL techniques.

For this mini-project, we will use the public dataset available here: https://www.kaggle.com/datasets/nodoubttome/skin-cancer9-classesisic/data

The dataset is already separated in two folders: train and test. It contains multiple images from different skin cancers. In total, there are 6 classes of cancer in the dataset.

Let's start with loading the data and having a quick look at it

In [None]:
import os
import cv2
import numpy as np
import pandas as pd

# Function to load images and labels
def load_data_from_folder(base_dir, img_size=(64, 64)):
    images = []
    labels = []
    label_map = {}  # Mapping folder names to numeric labels
    current_label = 0

    for label_name in sorted(os.listdir(base_dir)):  # Sort for consistent ordering
        label_path = os.path.join(base_dir, label_name)
        if os.path.isdir(label_path):  # Ensure it's a folder
            label_map[label_name] = current_label
            for img_file in os.listdir(label_path):
                img_path = os.path.join(label_path, img_file)
                # Read the image
                img = cv2.imread(img_path)
                if img is not None:
                    img = cv2.resize(img, img_size)  # Resize to consistent size
                    images.append(img)
                    labels.append(current_label)  # Append numeric label
            current_label += 1

    return np.array(images), np.array(labels), label_map

# Load the data (it will be resized)
X_train, y_train, train_label_map = load_data_from_folder('./Skin_Cancer_ISIC_Data/train')
X_test, y_test, test_label_map = load_data_from_folder('./Skin_Cancer_ISIC_Data/test')

print(f"Training data: {X_train.shape}, Labels: {y_train.shape}")
print(f"Test data: {X_test.shape}, Labels: {y_test.shape}")
print(f"Label Map: {train_label_map}")

In [None]:
#now we normalize the data
# Normalize the images
X_train = X_train / 255.0 #image pixels are usually between [0 255]
X_test = X_test / 255.0

print(f"Training data: {X_train.shape}, Labels: {y_train.shape}")
print(f"Test data: {X_test.shape}, Labels: {y_test.shape}")
print(f"Label Map: {train_label_map}")

In [None]:
#feature extraction through HOG (Histogram of Oriented Gradients)

from skimage.feature import hog

# Extract HOG features
def extract_hog_features(images):
    features = []
    for img in images:
        # Convert to grayscale for HOG
        gray = cv2.cvtColor((img * 255).astype('uint8'), cv2.COLOR_RGB2GRAY)
        feature = hog(gray, pixels_per_cell=(8, 8), cells_per_block=(2, 2), block_norm='L2-Hys')
        features.append(feature)
    return np.array(features)

X_train_hog = extract_hog_features(X_train)
X_test_hog = extract_hog_features(X_test)


In [None]:
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

# Train SVM, KNN and KNN (5 neighbours) classifier
svmClassifier = SVC(kernel='linear', random_state=42)
svmClassifier.fit(X_train_hog, y_train)

KNNclassifier = KNeighborsClassifier(n_neighbors=5)
KNNclassifier.fit(X_train_hog, y_train)



# Evaluate
y_predSVM = svmClassifier.predict(X_test_hog)
print("Accuracy:", accuracy_score(y_test, y_predSVM))
print("Confusion Matrix SVM:\n",confusion_matrix(y_test, y_predSVM))

ypredKNN = KNNclassifier.predict(X_test_hog)
print("Accuracy:", accuracy_score(y_test, ypredKNN))
print("Confusion Matrix KNN:\n",confusion_matrix(y_test, ypredKNN))


In [None]:
#visualise results
import matplotlib.pyplot as plt

# Plot examples
def plot_results(images, true_labels, pred_labels, le, n=5):
    fig, axes = plt.subplots(1, n, figsize=(15, 5))
    for i, ax in enumerate(axes):
        ax.imshow(images[i])
        title = f"True: {le.inverse_transform([true_labels[i]])[0]}\nPred: {le.inverse_transform([pred_labels[i]])[0]}"
        ax.set_title(title, fontsize=10)
        ax.axis('off')
    plt.show()

print("Plot for SVM:\n")
plot_results(X_test, y_test, y_predSVM, le)

print ("Plot for KNN:\n")
plot_results(X_test, y_test, y_predKNN, le)

