In [42]:
import numpy as np 
import matplotlib.pyplot as plt 
import cv2
import os
import glob
import pandas as pd 
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

In [43]:
print(os.getcwd())

/home/siddharth/VR_ImageProcessing/Bikes_Horses


In [44]:
def read_dataset():
    image_paths_horses = glob.glob("./Assignment2_BikeHorses/Assignment2_BikeHorses/Horses/*.jpg")
    image_paths_bike = glob.glob("./Assignment2_BikeHorses/Assignment2_BikeHorses/Bikes/*.jpg")
    labels = []
    images = []
    for i in image_paths_horses:
        img = cv2.imread(i,0)
        images.append(img)
        labels.append(0)
    for i in image_paths_bike:
        img = cv2.imread(i,0)
        images.append(img)
        labels.append(1)
    
    return np.asarray(images), np.asarray(labels)

In [45]:
def load_dataset(folder_path, image_size):
    images = []
    labels = []
    for label, class_name in enumerate(['Bikes', 'Horses']):
        class_folder = os.path.join(folder_path, class_name)
        for file_name in os.listdir(class_folder):
            img = cv2.imread(os.path.join(class_folder, file_name), cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, image_size)  # Resize image to a common size
            images.append(img)
            labels.append(label)
    return np.array(images), np.array(labels)

In [46]:
def getDescriptorsArray(images, extractor):
    descriptors = []

    for i in range(0,len(images)): 
        keypoints,descriptor= extractor.detectAndCompute(images[i], None)
        descriptors.extend(descriptor)
    
    return np.vstack(descriptors)

In [47]:
def image2vec(images,kmeans,extractor,n_clusters):
    vec = []
    for image in images:

        keypoints,descriptor = extractor.detectAndCompute(image, None)
        img_vec = [0]*n_clusters
        for d in descriptor:
            s = d.reshape(1,-1)
            c = kmeans.predict(s)
            img_vec[c[0]] +=1 
        vec.append(img_vec)

    return np.array(vec)

In [48]:
def generate_codebook(descriptors, num_clusters):
    kmeans = KMeans(n_clusters=num_clusters, random_state=42, verbose=0)
    kmeans.fit(descriptors)
    return kmeans

In [49]:
def getMisclassificationsSIFT(labels, df, model, images, type):
    dir = os.getcwd()

    predictions = model.predict(df)
    misclassified = []
    for i in range(len(predictions)):
        if (labels[i] != predictions[i]):
            misclassified.append(i)

    for i in misclassified:
        filename = os.path.join(dir, f'Misclassify_SIFT_{type}_{i}.jpg')
        cv2.imwrite(filename, images[i])

In [50]:
def getMisclassificationsORB(labels, df, model, images, type):
    dir = os.getcwd()

    predictions = model.predict(df)
    misclassified = []
    for i in range(len(predictions)):
        if (labels[i] != predictions[i]):
            misclassified.append(i)

    for i in misclassified:
        filename = os.path.join(dir, f'Misclassify_ORB_{type}_{i}.jpg')
        cv2.imwrite(filename, images[i])

In [51]:
images, labels = read_dataset()

  return np.asarray(images), np.asarray(labels)


# SIFT Detection

In [52]:
extractor = cv2.SIFT_create()
descriptors = getDescriptorsArray(images, extractor)
print(descriptors)

[[ 0.  0.  0. ... 18.  7.  2.]
 [ 6.  9. 48. ...  1.  0.  4.]
 [ 0.  0.  1. ...  0.  0.  1.]
 ...
 [64.  4.  0. ...  0.  1.  8.]
 [22.  6.  1. ...  0.  0.  6.]
 [54. 12.  3. ...  0.  0.  1.]]


In [53]:
num_clusters = 200

kmeans = generate_codebook(descriptors, num_clusters)
vec = image2vec(images,kmeans,extractor,num_clusters)



In [54]:
df = pd.DataFrame(vec)
labels = pd.DataFrame({"labels":labels})

In [55]:
labels = np.asarray(labels)
X_train, X_test, y_train, y_test = train_test_split(df, labels, test_size=0.2, random_state=42, stratify=labels, shuffle=True)

In [56]:
svc = SVC()
svc.fit(X_train,y_train)
print(svc.score(X_test,y_test))

0.9722222222222222


  y = column_or_1d(y, warn=True)


In [57]:
getMisclassificationsSIFT(labels, df, svc, images, "SVC")

In [58]:
svc_linear = SVC(kernel='linear')
svc_linear.fit(X_train,y_train)
print(svc_linear.score(X_test,y_test))

0.9722222222222222


  y = column_or_1d(y, warn=True)


In [59]:
getMisclassificationsSIFT(labels, df, svc_linear, images, "SVC_linear")

In [60]:
lr = LogisticRegression(random_state=42)
lr.fit(X_train, y_train)
print(lr.score(X_test,y_test))

  y = column_or_1d(y, warn=True)


1.0


In [61]:
getMisclassificationsSIFT(labels, df, lr, images, "LR")

In [62]:
clf = KNeighborsClassifier(n_neighbors=5)
clf.fit(X_train,y_train)
print(clf.score(X_test,y_test))

0.9722222222222222


  return self._fit(X, y)


In [63]:
getMisclassificationsSIFT(labels, df, clf, images, "KNN")

# ORB Detection

In [70]:
images, labels = read_dataset()

  return np.asarray(images), np.asarray(labels)


In [71]:
extractor = cv2.ORB_create()
descriptors = getDescriptorsArray(images, extractor)
print(descriptors)

[[182 239  64 ... 255 120 136]
 [ 43  31  98 ...  46 121 215]
 [147 214 122 ... 140 137  29]
 ...
 [ 24 151 125 ... 128 133 249]
 [181  86 236 ... 248 216 113]
 [129  50 236 ... 192  85 125]]


In [69]:
num_clusters = 200

kmeans = generate_codebook(descriptors, num_clusters)
vec = image2vec(images,kmeans,extractor,num_clusters)



In [72]:
df = pd.DataFrame(vec)
labels = pd.DataFrame({"labels":labels})

In [76]:
X_train, X_test, y_train, y_test = train_test_split(df, labels, test_size=0.2, random_state=42, stratify=labels, shuffle=True)
labels = np.asarray(labels)

In [77]:
svc = SVC()
svc.fit(X_train,y_train)
print(svc.score(X_test,y_test))

0.9444444444444444


  y = column_or_1d(y, warn=True)


In [78]:
getMisclassificationsORB(labels, df, svc, images, "SVC")

In [79]:
svc_linear = SVC(kernel='linear')
svc_linear.fit(X_train,y_train)
print(svc_linear.score(X_test,y_test))

0.8333333333333334


  y = column_or_1d(y, warn=True)


In [80]:
getMisclassificationsORB(labels, df, svc_linear, images, "SVC_linear")

In [81]:
lr = LogisticRegression(random_state=42)
lr.fit(X_train, y_train)
print(lr.score(X_test,y_test))

0.8888888888888888


  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [82]:
getMisclassificationsORB(labels, df, lr, images, "LR")

In [83]:
clf = KNeighborsClassifier(n_neighbors=5)
clf.fit(X_train,y_train)
print(clf.score(X_test,y_test))

0.8055555555555556


  return self._fit(X, y)


In [84]:
getMisclassificationsORB(labels, df, clf, images, "KNN")