In [None]:
import numpy as np 
import matplotlib.pyplot as plt 
import cv2
import os
import glob
import pandas as pd 
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

In [None]:
print(os.getcwd())

In [None]:
def read_dataset():
    image_paths_horses = glob.glob("./Assignment2_BikeHorses/Assignment2_BikeHorses/Horses/*.jpg")
    image_paths_bike = glob.glob("./Assignment2_BikeHorses/Assignment2_BikeHorses/Bikes/*.jpg")
    labels = []
    images = []
    for i in image_paths_horses:
        img = cv2.imread(i,0)
        images.append(img)
        labels.append(0)
    for i in image_paths_bike:
        img = cv2.imread(i,0)
        images.append(img)
        labels.append(1)
    
    return np.asarray(images), np.asarray(labels)

In [None]:
def load_dataset(folder_path, image_size):
    images = []
    labels = []
    for label, class_name in enumerate(['Bikes', 'Horses']):
        class_folder = os.path.join(folder_path, class_name)
        for file_name in os.listdir(class_folder):
            img = cv2.imread(os.path.join(class_folder, file_name), cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, image_size)  # Resize image to a common size
            images.append(img)
            labels.append(label)
    return np.array(images), np.array(labels)

In [None]:
def getDescriptorsArray(images, extractor):
    descriptors = []

    for i in range(0,len(images)): 
        keypoints,descriptor= extractor.detectAndCompute(images[i], None)
        descriptors.extend(descriptor)
    
    return np.vstack(descriptors)

In [None]:
def image2vec(images,kmeans,extractor,n_clusters):
    vec = []
    for image in images:

        keypoints,descriptor = extractor.detectAndCompute(image, None)
        img_vec = [0]*n_clusters
        for d in descriptor:
            s = d.reshape(1,-1)
            c = kmeans.predict(s)
            img_vec[c[0]] +=1 
        vec.append(img_vec)

    return np.array(vec)

In [None]:
def generate_codebook(descriptors, num_clusters):
    kmeans = KMeans(n_clusters=num_clusters, random_state=42, verbose=0)
    kmeans.fit(descriptors)
    return kmeans

In [None]:
def getMisclassificationsSIFT(labels, df, model, images, type):
    dir = os.getcwd()

    predictions = model.predict(df)
    misclassified = []
    for i in range(len(predictions)):
        if (labels[i] != predictions[i]):
            misclassified.append(i)

    for i in misclassified:
        filename = os.path.join(dir, f'Misclassify_SIFT_{type}_{i}.jpg')
        cv2.imwrite(filename, images[i])

In [None]:
def getMisclassificationsORB(labels, df, model, images, type):
    dir = os.getcwd()

    predictions = model.predict(df)
    misclassified = []
    for i in range(len(predictions)):
        if (labels[i] != predictions[i]):
            misclassified.append(i)

    for i in misclassified:
        filename = os.path.join(dir, f'Misclassify_ORB_{type}_{i}.jpg')
        cv2.imwrite(filename, images[i])

In [None]:
images, labels = read_dataset()

# SIFT Detection

In [None]:
extractor = cv2.SIFT_create()
descriptors = getDescriptorsArray(images, extractor)
print(descriptors)

In [None]:
num_clusters = 200

kmeans = generate_codebook(descriptors, num_clusters)
vec = image2vec(images,kmeans,extractor,num_clusters)

In [None]:
df = pd.DataFrame(vec)
labels = pd.DataFrame({"labels":labels})

In [None]:
labels = np.asarray(labels)
X_train, X_test, y_train, y_test = train_test_split(df, labels, test_size=0.2, random_state=42, stratify=labels, shuffle=True)

In [None]:
svc = SVC()
svc.fit(X_train,y_train)
print(svc.score(X_test,y_test))

In [None]:
getMisclassificationsSIFT(labels, df, svc, images, "SVC")

In [None]:
svc_linear = SVC(kernel='linear')
svc_linear.fit(X_train,y_train)
print(svc_linear.score(X_test,y_test))

In [None]:
getMisclassificationsSIFT(labels, df, svc_linear, images, "SVC_linear")

In [None]:
lr = LogisticRegression(random_state=42)
lr.fit(X_train, y_train)
print(lr.score(X_test,y_test))

In [None]:
getMisclassificationsSIFT(labels, df, lr, images, "LR")

In [None]:
clf = KNeighborsClassifier(n_neighbors=5)
clf.fit(X_train,y_train)
print(clf.score(X_test,y_test))

In [None]:
getMisclassificationsSIFT(labels, df, clf, images, "KNN")

# ORB Detection

In [None]:
images, labels = read_dataset()

In [None]:
extractor = cv2.ORB_create()
descriptors = getDescriptorsArray(images, extractor)
print(descriptors)

In [None]:
num_clusters = 200

kmeans = generate_codebook(descriptors, num_clusters)
vec = image2vec(images,kmeans,extractor,num_clusters)

In [None]:
df = pd.DataFrame(vec)
labels = pd.DataFrame({"labels":labels})

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df, labels, test_size=0.2, random_state=42, stratify=labels, shuffle=True)
labels = np.asarray(labels)

In [None]:
svc = SVC()
svc.fit(X_train,y_train)
print(svc.score(X_test,y_test))

In [None]:
getMisclassificationsORB(labels, df, svc, images, "SVC")

In [None]:
svc_linear = SVC(kernel='linear')
svc_linear.fit(X_train,y_train)
print(svc_linear.score(X_test,y_test))

In [None]:
getMisclassificationsORB(labels, df, svc_linear, images, "SVC_linear")

In [None]:
lr = LogisticRegression(random_state=42)
lr.fit(X_train, y_train)
print(lr.score(X_test,y_test))

In [None]:
getMisclassificationsORB(labels, df, lr, images, "LR")

In [None]:
clf = KNeighborsClassifier(n_neighbors=5)
clf.fit(X_train,y_train)
print(clf.score(X_test,y_test))

In [None]:
getMisclassificationsORB(labels, df, clf, images, "KNN")