# CV Coursework: Model 1: SIFT & SVM on original dataset

- **Module:** Computer Vision - IN3060/INM460
- **Module leader:** [Giacomo Tarroni](mailto:giacomo.tarroni@city.ac.uk)
- **CW Owner:** Mousuf C Zaman - Student No: 180021356

# Google Colab & Env Setup

In [None]:
!pip install opencv-python==4.5.5.64

import os
from google.colab import drive
import numpy as np
from skimage import io
from collections import Counter
import sys
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import cv2
from skimage import color, img_as_ubyte
from sklearn.cluster import MiniBatchKMeans
import random
from joblib import dump, load
from sklearn import metrics

drive.mount('/content/drive')

# TODO: Fill in the Google Drive path where you uploaded the lab materials
# Example: GOOGLE_DRIVE_PATH_AFTER_MYDRIVE = 'Colab Notebooks/Lab materials 01-20210104'

GOOGLE_DRIVE_PATH_AFTER_MYDRIVE = '11 - University/Colab Notebooks/Computer Vision Lab/CW_Folder_UG' 
GOOGLE_DRIVE_PATH = os.path.join('drive', 'My Drive', GOOGLE_DRIVE_PATH_AFTER_MYDRIVE)
print(os.listdir(GOOGLE_DRIVE_PATH))

%load_ext autoreload
%autoreload 2

# Data loading

In [None]:
# Identify path to zipped dataset
zip_path = os.path.join(GOOGLE_DRIVE_PATH, 'CW_Dataset/CV2023_CW_Dataset.zip')

# Copy it to Colab
!cp '{zip_path}' .

# Unzip it
!yes|unzip -q CV2023_CW_Dataset.zip

# Delete zipped version from Colab (not from Drive)
!rm CV2023_CW_Dataset.zip


def load_images_and_labels(train_path, test_path):
    images = []
    labels = []

    # Load images and labels from train folder
    train_images_path = os.path.join(train_path, 'images')
    train_labels_path = os.path.join(train_path, 'labels')
    train_img_files = [f for f in os.listdir(train_images_path) if f.endswith('.jpeg')]
    for img_file in train_img_files:
        # Load the image and append to images list
        img_path = os.path.join(train_images_path, img_file)
        print('Loading image:', img_path)
        image = io.imread(img_path)
        images.append(image)

        # Load the label if it exists and append to labels list
        label_path = os.path.join(train_labels_path, img_file[:-5] + '.txt')
        if os.path.exists(label_path):
            with open(label_path, 'r') as f:
                label = int(f.read().strip())
            labels.append(label)
            print('Label for image', img_path, ':', label)
        else:
            print('No label found for image:', img_path)

    # Load images and labels from test folder
    test_images_path = os.path.join(test_path, 'images')
    test_labels_path = os.path.join(test_path, 'labels')
    test_img_files = [f for f in os.listdir(test_images_path) if f.endswith('.jpeg')]
    for img_file in test_img_files:
        # Load the image and append to images list
        img_path = os.path.join(test_images_path, img_file)
        #print('Loading image:', img_path)
        image = io.imread(img_path)
        images.append(image)

        # Load the label if it exists and append to labels list
        label_path = os.path.join(test_labels_path, img_file[:-5] + '.txt')
        if os.path.exists(label_path):
            with open(label_path, 'r') as f:
                label = int(f.read().strip())
            labels.append(label)
            #print('Label for image', img_path, ':', label)
        else:
            print('No label found for image:', img_path)

    # Count the number of images per label class
    label_counts = Counter(labels)
    print('Number of images per label class:', label_counts)

    return images, labels, label_counts

###############################################
train_path = 'train'
test_path = 'test'
images, labels, label_counts = load_images_and_labels(train_path, test_path)

# Tes if image has loaded correctly
# Generate 1 random index for a train image
train_idx = np.random.choice(range(len(images)), size=1, replace=False)[0]
train_filtered_image = images[train_idx]
train_filtered_label = labels[train_idx]

# Generate 1 random index for a test image
test_index = np.random.choice(range(len(images)-1), size=1, replace=False)[0]
test_filtered_image = images[test_index]
test_filtered_label = labels[test_index]

# Plot the random train image
fig, axs = plt.subplots(1, 2, figsize=(4, 2))
axs[0].imshow(train_filtered_image)
axs[0].set_title('Train Image Label: ' + str(train_filtered_label))

# Plot the random test image
axs[1].imshow(test_filtered_image)
axs[1].set_title('Test Image Label: ' + str(test_filtered_label))

plt.tight_layout()
plt.show()

# Implementing feature descriptor

In [None]:
# Unbalanced problem
# Split the data into training and test sets, stratified by the labels
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, shuffle=True, stratify=labels, random_state=42)

# Initialize SIFT detector
sift = cv2.SIFT_create()

# Create empty lists for feature descriptors and labels
des_list = []
y_train_list = []

fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True)

# Define target size
target_size = (256, 256)

# Loop over each image in the training set
for i in range(len(X_train)):
    # Load the image and resize to target size
    img = cv2.resize(X_train[i], target_size)
    
    # Convert to grayscale
    img_gray = img_as_ubyte(color.rgb2gray(img))
    
    # Detect keypoints and extract descriptors with SIFT
    kp, des = sift.detectAndCompute(img_gray, None)

    # Show results for first 10 images
    if i < 10:
        row, col = i // 5, i % 5
        img_with_SIFT = cv2.drawKeypoints(img_gray, kp, img_gray)
        ax[row][col].imshow(img_with_SIFT)
        ax[row][col].set_axis_off()

    # Append list of descriptors and label to respective lists
    if des is not None:
        des_list.append(des)
        y_train_list.append(y_train[i])

# Convert to array for easier handling
des_array = np.vstack(des_list)

#########################################################################
## Clustering the descriptors
# Number of centroids/codewords: good rule of thumb is 10*num_classes
k = len(np.unique(y_train)) * 10

# Use MiniBatchKMeans for faster computation and lower memory usage
batch_size = des_array.shape[0] // 4
kmeans = MiniBatchKMeans(n_clusters=k, batch_size=batch_size).fit(des_array)


###################################################################
# Testing clustering
# Load a test image
test_img = X_train[0]
test_img_gray = img_as_ubyte(color.rgb2gray(test_img))

# Detect keypoints and extract descriptors with SIFT
kp, des = sift.detectAndCompute(test_img_gray, None)

# Predict visual words for the descriptors using the trained KMeans model
visual_words = kmeans.predict(des)

print(visual_words)

###################################################################
# Histogram
# Convert descriptors into histograms of codewords for each image
hist_list = []
idx_list = []

for des in des_list:
    hist = np.zeros(k)

    idx = kmeans.predict(des)
    idx_list.append(idx)
    for j in idx:
        hist[j] = hist[j] + (1 / len(des))
    hist_list.append(hist)

hist_array = np.vstack(hist_list)

fig, ax = plt.subplots(figsize=(8, 3))
ax.hist(np.array(idx_list, dtype=object), bins=k)
ax.set_title('Codewords occurrence in training set')
plt.show()

# Training a classifier

In [None]:
from train_SVM import train_rbf_SVM
##############################################################################
# TODO: Implementing a different type of SVM                                 #
##############################################################################
classifier = train_rbf_SVM(hist_array, y_train_list)
##############################################################################
#                             END OF YOUR CODE                               #
##############################################################################

dump(classifier, 'SVM_SIFT_CWDataset.joblib') 
classifier = load(os.path.join(MODEL_PATH, 'SVM_SIFT_CWDataset.joblib'))

# Carry out feature detector on the test set data

In [None]:
hist_list = []

for i in range(len(X_test)):
    img = img_as_ubyte(color.rgb2gray(X_test[i]))
    kp, des = sift.detectAndCompute(img, None)

    if des is not None:
        hist = np.zeros(k)

        idx = kmeans.predict(des)

        for j in idx:
            hist[j] = hist[j] + (1 / len(des))

        # hist = scale.transform(hist.reshape(1, -1))
        hist_list.append(hist)

    else:
        hist_list.append(None)

# Remove potential cases of images with no descriptors
idx_not_empty = [i for i, x in enumerate(hist_list) if x is not None]
hist_list = [hist_list[i] for i in idx_not_empty]
y_test = [y_test[i] for i in idx_not_empty]
hist_array = np.vstack(hist_list)

##########################################################################
# Predict the classes of each test image
y_pred = classifier.predict(hist_array).tolist()

## Result

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(4, 4), sharex=True, sharey=True)
ax = axes.ravel()

random_indices = random.sample(range(len(X_test)), 4)

for i in range(4):
    ax[i].imshow(X_test[random_indices[i]])
    ax[i].set_title(f'Label: {y_test[random_indices[i]]} \n Prediction: {y_pred[random_indices[i]]}')
    ax[i].set_axis_off()

fig.tight_layout()
plt.show()

print(f"""Classification report for classifier {classifier}:
      {metrics.classification_report(y_test, y_pred, zero_division=0)}\n""")

metrics.ConfusionMatrixDisplay.from_predictions(y_test, y_pred)
plt.show()