# Comparison of SIFT and SIFT-SVM (BoVW)

In [None]:
import os
import cv2
import random
import numpy as np
import matplotlib.pyplot as plt

from sklearn.svm import SVC
from sklearn.cluster import KMeans
from sklearn.metrics import precision_score, recall_score, accuracy_score

In [None]:
k_histogram = 100
min_image_num = 10
min_sift_matches = 20
training_set_size = 10
training_image_num = 5
num_misclassified_to_show = 5
lowes_sift_compare_ratio = 0.75


base_folder = "../data/face/lfw-deepfunneled/lfw-deepfunneled"

In [None]:
data = {}

for person in os.listdir(base_folder):
    person_dir = os.path.join(base_folder, person)
    if os.path.isdir(person_dir):
        images = []
        for img_name in os.listdir(person_dir):
            img_path = os.path.join(person_dir, img_name)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img is not None:
                images.append(img)
        if len(images) >= min_image_num:
            data[person] = images

In [None]:
x_train = []
y_train = []
x_test = []
y_test = []

people_names = list(data.keys())

for person, images in data.items():
    train_images = []
    test_images = []
    random.shuffle(images)

    train_images.extend(images[:training_image_num])
    test_images.extend(images[training_image_num:])
    train_labels = [person] * training_image_num
    test_labels = [person] * (len(images) - training_image_num)

    other_people = random.sample(
        [p for p in people_names if p != person],
        2 * (training_set_size - training_image_num),
    )

    for i, other_person in enumerate(other_people):
        chosen_image = random.sample(data[other_person], 1)[0]
        if i % 2 == 0:
            train_images.append(chosen_image)
            train_labels.append("Unknown")
        else:
            test_images.append(chosen_image)
            test_labels.append("Unknown")

    x_train.append(train_images)
    x_test.append(test_images)
    y_train.append(train_labels)
    y_test.append(test_labels)

In [None]:
sift = cv2.SIFT_create()
bf = cv2.BFMatcher()


def extract_sift_features(image_list):
    keypoints_list = []
    descriptors_list = []

    for img in image_list:
        keypoints, descriptors = sift.detectAndCompute(img, None)

        keypoints_list.append(keypoints)
        descriptors_list.append(descriptors)

    return keypoints_list, descriptors_list


def compare_sift_features(descriptors_1, descriptors_2):
    descriptors_1 = np.array(descriptors_1).astype("float32")
    descriptors_2 = np.array(descriptors_2).astype("float32")

    good = []
    matches = bf.knnMatch(descriptors_1, descriptors_2, k=2)
    for m, n in matches:
        if m.distance < lowes_sift_compare_ratio * n.distance:
            good.append([m])
    return len(good) > min_sift_matches, good


def create_bow_histogram(descriptors, kmeans):
    if descriptors is None or len(descriptors) == 0:
        return np.zeros(kmeans.cluster_centers_.shape[0])

    cluster_indices = kmeans.predict(descriptors)
    hist, _ = np.histogram(
        cluster_indices,
        bins=np.arange(kmeans.n_clusters + 1),
        range=(0, kmeans.n_clusters),
    )

    return hist


def train_svm(descriptors_list, label_list):
    all_descriptors = np.vstack(descriptors_list)

    kmeans = KMeans(n_clusters=k_histogram)
    kmeans.fit(all_descriptors)

    histograms = []
    for descriptors in descriptors_list:
        hist = create_bow_histogram(descriptors, kmeans)
        histograms.append(hist)

    svm = SVC(kernel="linear")
    svm.fit(histograms, label_list)

    return svm, kmeans


def predict_with_svm(svm, kmeans, new_image):
    _, new_descriptors = sift.detectAndCompute(new_image, None)
    if new_descriptors is None or len(new_descriptors) == 0:
        return None

    new_hist = create_bow_histogram(new_descriptors, kmeans)
    prediction = svm.predict([new_hist])
    return prediction[0]

In [None]:
def calculate_metrics(true_labels, predicted_labels):
    precision = precision_score(
        true_labels,
        predicted_labels,
        average="weighted",
        labels=np.unique(true_labels),
        zero_division=0,
    )

    recall = recall_score(
        true_labels,
        predicted_labels,
        average="weighted",
        labels=np.unique(true_labels),
        zero_division=0,
    )

    accuracy = accuracy_score(true_labels, predicted_labels)
    return precision, recall, accuracy


def track_misclassifications(test_images, true_labels, predicted_labels):
    misclassified_images = []
    misclassified_true_labels = []
    misclassified_pred_labels = []

    true_labels = np.array(true_labels)
    misclassified_indices = np.where(predicted_labels != true_labels)[0]

    for idx in misclassified_indices:
        misclassified_images.append(test_images[idx])
        misclassified_true_labels.append(true_labels[idx])
        misclassified_pred_labels.append(predicted_labels[idx])

    return misclassified_images, misclassified_true_labels, misclassified_pred_labels


def visualize_misclassifications(
    title, misclassified_images, misclassified_true_labels, misclassified_pred_labels
):
    misclassified_indices_sample = random.sample(
        range(len(misclassified_images)),
        min(num_misclassified_to_show, len(misclassified_images)),
    )

    plt.figure(figsize=(15, 4))

    for idx, misclassified_idx in enumerate(misclassified_indices_sample):
        image = misclassified_images[misclassified_idx]
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        true_label = misclassified_true_labels[misclassified_idx]
        predicted_label = misclassified_pred_labels[misclassified_idx]

        plt.subplot(1, num_misclassified_to_show, idx + 1)
        plt.imshow(image_rgb)
        plt.title(f"True: {true_label}\nPred: {predicted_label}")
        plt.axis("off")

    plt.suptitle(title, fontweight="bold")
    plt.tight_layout()
    plt.show()


def print_metrics(title, all_precision, all_recall, all_accuracy):
    average_precision = np.mean(all_precision)
    average_recall = np.mean(all_recall)
    average_accuracy = np.mean(all_accuracy)

    print(title)
    print(f"Average Precision: {average_precision}")
    print(f"Average Recall: {average_recall}")
    print(f"Average Accuracy: {average_accuracy}")

In [None]:
def eval_bovw():
    all_precision = []
    all_recall = []
    all_accuracy = []

    misclassified_images = []
    misclassified_true_labels = []
    misclassified_pred_labels = []

    for i in range(len(data)):
        training_set_person_i = x_train[i]
        training_label_set_person_i = y_train[i]
        testing_set_person_i = x_test[i]
        testing_label_set_person_i = y_test[i]

        _, train_desc = extract_sift_features(training_set_person_i)
        trained_svm, kmeans = train_svm(train_desc, training_label_set_person_i)
        test_predictions = [
            predict_with_svm(trained_svm, kmeans, xi) for xi in testing_set_person_i
        ]

        precision, recall, accuracy = calculate_metrics(
            testing_label_set_person_i, test_predictions
        )

        all_precision.append(precision)
        all_recall.append(recall)
        all_accuracy.append(accuracy)

        (
            misclassified_batch_images,
            misclassified_batch_true_labels,
            misclassified_batch_pred_labels,
        ) = track_misclassifications(
            testing_set_person_i,
            testing_label_set_person_i,
            test_predictions,
        )

        misclassified_images.extend(misclassified_batch_images)
        misclassified_true_labels.extend(misclassified_batch_true_labels)
        misclassified_pred_labels.extend(misclassified_batch_pred_labels)

    return (all_precision, all_recall, all_accuracy), (
        misclassified_images,
        misclassified_true_labels,
        misclassified_pred_labels,
    )

In [None]:
def eval_sift():
    all_precision = []
    all_recall = []
    all_accuracy = []

    misclassified_images = []
    misclassified_true_labels = []
    misclassified_pred_labels = []

    for i in range(len(data)):
        training_set_person_i = x_train[i]
        training_label_set_person_i = y_train[i]
        testing_set_person_i = x_test[i]
        testing_label_set_person_i = y_test[i]

        _, train_desc = extract_sift_features(training_set_person_i)
        descriptor_1 = train_desc[0]

        test_desc_sift_features = extract_sift_features(testing_set_person_i)[1]
        matches = [
            compare_sift_features(descriptor_1, descriptor_2)[0]
            for descriptor_2 in test_desc_sift_features
        ]
        predicted_labels = [
            training_label_set_person_i[0] if match else "Unknown" for match in matches
        ]

        precision, recall, accuracy = calculate_metrics(
            testing_label_set_person_i, predicted_labels
        )

        all_precision.append(precision)
        all_recall.append(recall)
        all_accuracy.append(accuracy)

        (
            misclassified_batch_images,
            misclassified_batch_true_labels,
            misclassified_batch_pred_labels,
        ) = track_misclassifications(
            testing_set_person_i, testing_label_set_person_i, predicted_labels
        )

        misclassified_images.extend(misclassified_batch_images)
        misclassified_true_labels.extend(misclassified_batch_true_labels)
        misclassified_pred_labels.extend(misclassified_batch_pred_labels)

    return (all_precision, all_recall, all_accuracy), (
        misclassified_images,
        misclassified_true_labels,
        misclassified_pred_labels,
    )

In [None]:
metrics_bovw, misclassified_bovw = eval_bovw()

In [None]:
print_metrics("Using BoVW with SIFT", *metrics_bovw)

In [None]:
visualize_misclassifications("Using BoVW with SIFT", *misclassified_bovw)

In [None]:
metrics_sift, misclassified_sift = eval_sift()

In [None]:
print_metrics("Vanilla SIFT", *metrics_sift)

In [None]:
visualize_misclassifications("Vanilla SIFT", *misclassified_sift)