In [1]:
# import libraries
import cv2
import os
import glob
import numpy as np
from sklearn import svm
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
from sklearn.metrics import accuracy_score

In [2]:
# load the data
images_train = glob.glob('../lfi-03/images/db/train/*/*.jpg')
images_train

['../lfi-03/images/db/train\\cars\\1137646735_2fb2752249.jpg',
 '../lfi-03/images/db/train\\cars\\2539497709_756f025f62.jpg',
 '../lfi-03/images/db/train\\cars\\2847324790_6dd07ffb54.jpg',
 '../lfi-03/images/db/train\\cars\\car001.jpg',
 '../lfi-03/images/db/train\\cars\\car002.jpg',
 '../lfi-03/images/db/train\\cars\\car003.jpg',
 '../lfi-03/images/db/train\\faces\\3573927657_df093bae27.jpg',
 '../lfi-03/images/db/train\\faces\\3775982780_d5ea306ce6.jpg',
 '../lfi-03/images/db/train\\faces\\397642272_cedf622248_z.jpg',
 '../lfi-03/images/db/train\\faces\\61060671.jpg',
 '../lfi-03/images/db/train\\faces\\face.jpg',
 '../lfi-03/images/db/train\\faces\\face2.jpg',
 '../lfi-03/images/db/train\\faces\\images.jpg',
 '../lfi-03/images/db/train\\faces\\img_1577_crazy-face.jpg',
 '../lfi-03/images/db/train\\flowers\\0106476_1.jpg',
 '../lfi-03/images/db/train\\flowers\\2682530432_e470494b40.jpg',
 '../lfi-03/images/db/train\\flowers\\3826419553_c00dc0e91c.jpg',
 '../lfi-03/images/db/train\\fl

In [3]:
def create_keypoints(w, h):
    keypoints = []
    keypointSize = 21
    # please sample the image uniformly in a grid
    # find the keypoint size and number of sample points
    # as hyperparameters

    keypoints = [cv2.KeyPoint(i, j, keypointSize) for i in range(h) for j in range(w)]
    
    return keypoints

In [4]:
# 1. Implement a SIFT feature extraction for a set of training images ./images/db/train/** (see 2.3 image retrieval)
# use ~15x15 keypoints on each image with subwindow of 21px (diameter)

def sift_feature_extraction(images):
    
    # 2. create keypoints on a regular grid (cv2.KeyPoint(r, c, keypointSize), as keypoint size use e.g. 11)
    descriptors = []
    keypoints = create_keypoints(15, 15)
    
    labels = []

    # 3. use the keypoints for each image and compute SIFT descriptors
    #    for each keypoint. this compute one descriptor for each image.

    for image in images:
        
        img_type = image.split('\\')[1]
        labels.append(img_type)
        
        # read the images
        image = cv2.imread(image)

        if image.shape != (15, 15):
            #print(f'not shaped: {image.shape}')
            image = cv2.resize(image, (int(15), int(15)))   

        # convert to gray scale
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        # sift transformation
        sift = cv2.SIFT_create()
        kp, descriptor = sift.compute(gray, keypoints)

        # add the descriptor in the decriptors
        descriptors.append(descriptor.flatten())
        
    return descriptors, labels

In [5]:
# 2. each descriptor (set of features) need to be flattened in one vector
# That means you need a X_train matrix containing a shape of (num_train_images, num_keypoints*num_entry_per_keypoint)
# num_entry_per_keypoint = histogram orientations as talked about in class
# You also need a y_train vector containing the labels encoded as integers

training_data = sift_feature_extraction(images_train)

x_train = training_data[0]
x_train = np.stack(x_train, axis = 0)
print(x_train.shape)

train_lables = training_data[1]

# Create a dictionary to map labels to integers
label_mapping = {label: index for index, label in enumerate(set(train_lables))}

y_trains = np.array([label_mapping[x] for x in train_lables])

print(y_trains)

(20, 28800)
[2 2 2 2 2 2 1 1 1 1 1 1 1 1 0 0 0 0 0 0]


In [6]:
# Initialize StratifiedShuffleSplit with the desired test_size
stratified_splitter = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=201)

# Create indices for the train and validation sets using the splitter
train_indices, val_indices = next(stratified_splitter.split(x_train, y_trains))

# Create the training and validation sets
X_train, X_val = x_train[train_indices], x_train[val_indices]
y_train, y_val = y_trains[train_indices], y_trains[val_indices]

print(X_train.shape, X_val.shape, len(y_train), len(y_val))

(16, 28800) (4, 28800) 16 4


In [7]:
# Define a list of kernel options to test
kernel_options = ['linear', 'poly', 'rbf', 'sigmoid']

for kernel in kernel_options:
    # Create SVM classifier with the specified kernel
    clf = svm.SVC(kernel=kernel)

    # Train the classifier
    clf.fit(X_train, y_train)

    # Predict on the validation set
    y_pred = clf.predict(X_val)

    # Evaluate the performance
    accuracy = accuracy_score(y_val, y_pred)
    print(f"Kernel: {kernel}, Validation Accuracy: {accuracy * 100:.2f}%")

Kernel: linear, Validation Accuracy: 75.00%
Kernel: poly, Validation Accuracy: 75.00%
Kernel: rbf, Validation Accuracy: 100.00%
Kernel: sigmoid, Validation Accuracy: 50.00%


In [9]:
# make the test data

test_path = glob.glob('../lfi-03/images/db/test/*.jpg')

test_info = sift_feature_extraction(test_path)

test_labels = test_info[1]

test_labels = [x.replace('.jpg', '') for x in test_labels]

for label in test_labels:
    if label == 'car':
        test_labels[test_labels.index(label)] = 'cars'
    elif label == 'face':
        test_labels[test_labels.index(label)] = 'faces'
    else:
        test_labels[test_labels.index(label)] = 'flowers'

y_test = np.array([label_mapping[x] for x in test_labels])

print(y_test)
print(label_mapping)

X_test = test_info[0]
X_test = np.stack(X_test, axis = 0)
print(X_test.shape, y_test.shape)

[2 1 0 0]
{'flowers': 0, 'faces': 1, 'cars': 2}
(4, 28800) (4,)


### Apply Model

In [10]:
# Create SVM classifier with the linear kernel
clf = svm.SVC(kernel = 'rbf')

# Train the classifier
clf.fit(x_train, y_trains)

# predic the SVM on test data
y_pred = clf.predict(X_test)

print(f'Predicted Values: {y_pred}')
print(f'Original test data: {y_test}')

# Evaluate the performance
accuracy = accuracy_score(y_test, y_pred)

print(f'Final Accuracy: {accuracy * 100} %')

print(f'Mapped Class name: {label_mapping}')

Predicted Values: [2 1 0 0]
Original test data: [2 1 0 0]
Final Accuracy: 100.0 %
Mapped Class name: {'flowers': 0, 'faces': 1, 'cars': 2}
