In [84]:
import cv2
import numpy as np
import os
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [85]:
def load_dataset(folder_path, image_size):
    images = []
    labels = []
    for label, class_name in enumerate(['Bikes', 'Horses']):
        class_folder = os.path.join(folder_path, class_name)
        for file_name in os.listdir(class_folder):
            img = cv2.imread(os.path.join(class_folder, file_name), cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, image_size)  # Resize image to a common size
            images.append(img)
            labels.append(label)
    return np.array(images), np.array(labels)

In [86]:
def extract_features(images):
    sift = cv2.SIFT_create()
    keypoints = []
    descriptors = []
    for image in images:
        kp, desc = sift.detectAndCompute(image, None)
        keypoints.append(kp)
        descriptors.append(desc)
    return keypoints, descriptors

In [87]:
def generate_codebook(descriptors, num_clusters):
    all_descriptors = np.concatenate(descriptors, axis=0)
    kmeans = KMeans(n_clusters=num_clusters)
    kmeans.fit(all_descriptors)
    return kmeans

In [88]:
def quantize_features(descriptors, codebook):
    quantized_features = []
    for desc in descriptors:
        labels = codebook.predict(desc)
        hist, _ = np.histogram(labels, bins=range(codebook.n_clusters + 1), density=True)
        quantized_features.append(hist)
    return np.array(quantized_features)


In [89]:
def train_classifier(features, labels, option, k=None):
    if option == "SVC":
        clf = make_pipeline(StandardScaler(), SVC(kernel='linear'))
    elif option == "KNN":
        clf = make_pipeline(StandardScaler(), KNeighborsClassifier(n_neighbors=k))
    elif option == "LogR":
        clf = make_pipeline(StandardScaler(), LogisticRegression())
    else:
        print("Invalid option. Please choose from 'SVC', 'KNN', or 'LogR'.")
    
    clf.fit(features, labels)
    return clf

In [90]:
def evaluate_classifier(clf, test_features, test_labels):
    predictions = clf.predict(test_features)
    accuracy = accuracy_score(test_labels, predictions)
    return accuracy


In [91]:
folder_path = "./Assignment2_BikeHorses/Assignment2_BikeHorses"
images, labels = load_dataset(folder_path,(100,100))

In [92]:
train_images, test_images, train_labels, test_labels = train_test_split(images, labels, test_size=0.2, random_state=42)
# Feature extraction
train_keypoints, train_descriptors = extract_features(train_images)
test_keypoints, test_descriptors = extract_features(test_images)

In [93]:
num_clusters = 100  # Adjust as needed
codebook = generate_codebook(train_descriptors, num_clusters)

# Feature quantization
train_features = quantize_features(train_descriptors, codebook)
test_features = quantize_features(test_descriptors, codebook)

In [94]:
clf = train_classifier(train_features, train_labels,"LogR",5)

# Evaluate classifier
accuracy = evaluate_classifier(clf, test_features, test_labels)
print("Accuracy:", accuracy)

Accuracy: 0.8888888888888888
