In [12]:
import numpy as np 
import cv2 
import glob 
import pandas as pd 
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [13]:
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

In [14]:
def arrayToImage(img):
    red = np.reshape(img[:img.shape[0]//3],(32,32,))
    green =  np.reshape(img[img.shape[0]//3:2*img.shape[0]//3],(32,32,))
    blue = np.reshape(img[2*img.shape[0]//3:img.shape[0]],(32,32,))
    img = np.stack([red, green, blue], axis=2)
    return img

In [15]:
def extract_features(images):
    sift = cv2.SIFT_create()
    keypoints = []
    descriptors = []
    for image in images:
        kp, desc = sift.detectAndCompute(image, None)
        keypoints.append(kp)
        descriptors.append(desc)
    return keypoints, descriptors

In [16]:
def generate_codebook(descriptors, num_clusters):
    all_descriptors = np.concatenate(descriptors, axis=0)
    kmeans = KMeans(n_clusters=num_clusters)
    kmeans.fit(all_descriptors)
    return kmeans

In [17]:
def quantize_features(descriptors, codebook):
    quantized_features = []
    for desc in descriptors:
        labels = codebook.predict(desc)
        hist, _ = np.histogram(labels, bins=range(codebook.n_clusters + 1), density=True)
        quantized_features.append(hist)
    return np.array(quantized_features)

In [18]:
def train_classifier(features, labels, option, k=None):
    if option == "SVC":
        clf = make_pipeline(StandardScaler(), SVC(kernel='linear'))
    elif option == "KNN":
        clf = make_pipeline(StandardScaler(), KNeighborsClassifier(n_neighbors=k))
    elif option == "LogR":
        clf = make_pipeline(StandardScaler(), LogisticRegression())
    else:
        print("Invalid option. Please choose from 'SVC', 'KNN', or 'LogR'.")
    
    clf.fit(features, labels)
    return clf

In [19]:
def evaluate_classifier(clf, test_features, test_labels):
    predictions = clf.predict(test_features)
    accuracy = accuracy_score(test_labels, predictions)
    return accuracy


In [20]:
image_paths_train = glob.glob('./Assignment2_BikeHorses/Assignment2_BikeHorses/cifar-10-python/cifar-10-batches-py/data_batch_*')
image_paths_test = glob.glob('./Assignment2_BikeHorses/Assignment2_BikeHorses/cifar-10-python/cifar-10-batches-py/test_batch')

image_dict_train = []
for i in image_paths_train:
    image_dict_train.append(unpickle(i))
image_dict_train.append(unpickle(image_paths_test[0]))

image_dict_test =[]
for i in image_paths_test:
    image_dict_test.append(unpickle(i))
image_dict_test.append(unpickle(image_paths_test[0]))

In [21]:
container_train = np.vstack([d[b'data'] for d in image_dict_train])
labels_train = np.vstack([d[b'labels'] for d in image_dict_train])

container_test = np.vstack([d[b'data'] for d in image_dict_test])
labels_test = np.vstack([d[b'labels'] for d in image_dict_test])


images_train = []
for i in container_train:
    images_train.append(arrayToImage(i))

images_test = []
for i in container_test:
    images_test.append(arrayToImage(i))

In [22]:
# train_images, test_images, train_labels, test_labels = train_test_split(images, labels, test_size=0.2, random_state=42)
# Feature extraction
train_keypoints, train_descriptors = extract_features(images_train)
test_keypoints, test_descriptors = extract_features(images_test)

In [None]:
num_clusters = 100  # Adjust as needed
codebook = generate_codebook(train_descriptors, num_clusters)

# Feature quantization
train_features = quantize_features(train_descriptors, codebook)
test_features = quantize_features(test_descriptors, codebook)

In [None]:
clf = train_classifier(train_features, labels_train,"LogR",5)

# Evaluate classifier
accuracy = evaluate_classifier(clf, test_features, test_labels)
print("Accuracy:", accuracy)