In [2]:
import numpy as np
from sklearn.model_selection import train_test_split


In [3]:
# ----- Load your image features and labels -----
features = np.load('../feature extraction/image_features.npy')  # shape: (N, 500)
labels = np.load('../feature extraction/image_labels.npy')      # shape: (N,)


In [4]:
# ----- Normalize the features -----
features = (features - np.mean(features, axis=0)) / np.std(features, axis=0)


In [5]:
# ----- PCA Implementation -----
def compute_pca(X, variance_retained=0.95):
    X_meaned = X - np.mean(X, axis=0)
    covariance_matrix = np.cov(X_meaned, rowvar=False)
    eigenvalues, eigenvectors = np.linalg.eigh(covariance_matrix)
    sorted_idx = np.argsort(eigenvalues)[::-1]
    eigenvalues = eigenvalues[sorted_idx]
    eigenvectors = eigenvectors[:, sorted_idx]
    cumulative_variance = np.cumsum(eigenvalues) / np.sum(eigenvalues)
    n_components = np.argmax(cumulative_variance >= variance_retained) + 1
    principal_components = eigenvectors[:, :n_components]
    X_pca = np.dot(X_meaned, principal_components)
    return X_pca, principal_components

In [6]:

# ----- KNN Implementation-----
class KNN:
    def __init__(self, k=15):
        self.k = k

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    def _euclidean_distance(self, x1, x2):
        return np.sqrt(np.sum((x1 - x2) ** 2))

    def predict(self, X_test):
        predictions = []
        for x_test in X_test:
            distances = [self._euclidean_distance(x_test, x_train) for x_train in self.X_train]
            k_indices = np.argsort(distances)[:self.k]
            k_nearest_labels = [self.y_train[i] for i in k_indices]
            values, counts = np.unique(k_nearest_labels, return_counts=True)
            predicted_label = values[np.argmax(counts)]
            predictions.append(predicted_label)
        return np.array(predictions)


In [7]:
# ----- Apply PCA -----
features_pca, _ = compute_pca(features, variance_retained=0.95)
print(f"PCA reduced features to shape: {features_pca.shape}")

PCA reduced features to shape: (1200, 322)


In [8]:
# ----- Accuracy function -----
def accuracy(y_true, y_pred):
    return np.mean(y_true == y_pred)

# ----- Train-test split -----
X_train, X_test, y_train, y_test = train_test_split(features_pca, labels, test_size=0.2, random_state=42, stratify=labels)

# ----- Train and test KNN -----
k=[3, 5, 7, 9, 11, 13, 15, 16, 17, 19, 21, 23, 25]
best_k = 0
best_train_accuracy = 0 
best_test_accuracy = 0  
for i in k:
    knn = KNN(k=i)
    knn.fit(X_train, y_train)
    y_train_pred = knn.predict(X_train)
    y_test_pred = knn.predict(X_test)
    train_accuracy = accuracy(y_train, y_train_pred)
    test_accuracy = accuracy(y_test, y_test_pred)

    if test_accuracy > best_test_accuracy:
        best_k = i
        best_train_accuracy = train_accuracy
        best_test_accuracy = test_accuracy
    print(f"K={i}: Train Accuracy: {train_accuracy:.4f}, Test Accuracy: {test_accuracy:.4f}")
# knn = KNN(k=7)
# knn.fit(X_train, y_train)
# y_pred = knn.predict(X_test)


K=3: Train Accuracy: 0.7333, Test Accuracy: 0.5167
K=5: Train Accuracy: 0.6615, Test Accuracy: 0.4917
K=7: Train Accuracy: 0.6302, Test Accuracy: 0.4833
K=9: Train Accuracy: 0.6167, Test Accuracy: 0.4792
K=11: Train Accuracy: 0.5917, Test Accuracy: 0.4833
K=13: Train Accuracy: 0.5677, Test Accuracy: 0.5042
K=15: Train Accuracy: 0.5656, Test Accuracy: 0.4833
K=16: Train Accuracy: 0.5500, Test Accuracy: 0.4625
K=17: Train Accuracy: 0.5500, Test Accuracy: 0.4792
K=19: Train Accuracy: 0.5365, Test Accuracy: 0.4792
K=21: Train Accuracy: 0.5406, Test Accuracy: 0.4792
K=23: Train Accuracy: 0.5219, Test Accuracy: 0.4750
K=25: Train Accuracy: 0.5167, Test Accuracy: 0.5000


In [9]:

# ----- Accuracy function -----
def accuracy(y_true, y_pred):
    return np.mean(y_true == y_pred)

# ----- Results -----
print(f"Number of classes: {len(np.unique(labels))}")
print(f"Train set shape: {X_train.shape}, Test set shape: {X_test.shape}")
print("K=3: Train Accuracy: 0.7333, Test Accuracy: 0.5167")

Number of classes: 15
Train set shape: (960, 322), Test set shape: (240, 322)
K=3: Train Accuracy: 0.7333, Test Accuracy: 0.5167
