In [12]:
import os
import numpy as np
from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from PIL import Image
import zipfile

In [13]:
dataset_zip_path = "/content/animals.zip"
dataset_folder = "/content/animals"


In [14]:
#Unziping the dataset
with zipfile.ZipFile(dataset_zip_path, 'r') as zip_ref:
    zip_ref.extractall(dataset_folder)

#preprocess data
def load_images(folder, label, image_size=(32, 32)):
    images = []
    labels = []
    for filename in os.listdir(folder):
        filepath = os.path.join(folder, filename)
        if filename.endswith(('.png', '.jpg', '.jpeg')):
            img = Image.open(filepath).convert('L')
            img = img.resize(image_size)
            images.append(np.array(img).flatten())
            labels.append(label)
    return np.array(images), np.array(labels)

def load_dataset():
    base_folder = dataset_folder
    classes = ['cats', 'dogs', 'pandas']
    data, labels = [], []

    for label, class_name in enumerate(classes):
        class_folder = os.path.join(base_folder, class_name)
        class_images, class_labels = load_images(class_folder, label)
        data.append(class_images)
        labels.append(class_labels)

    data = np.vstack(data)
    labels = np.concatenate(labels)
    return data, labels

X, y = load_dataset()


In [15]:
#random weights and bias
np.random.seed(42)
input_dim = X.shape[1]
output_dim = 128
W = np.random.randn(input_dim, output_dim)
b = np.random.randn(output_dim)

#Project features
X_transformed = X @ W + b

In [16]:
#5-Fold Cross-Validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
knn = KNeighborsClassifier(n_neighbors=5, metric='euclidean')  # L1 distance

fold_accuracies = []
for train_index, val_index in kf.split(X_transformed):
    X_train, X_val = X_transformed[train_index], X_transformed[val_index]
    y_train, y_val = y[train_index], y[val_index]

    #Train kNN
    knn.fit(X_train, y_train)

    # Validate
    y_pred = knn.predict(X_val)
    acc = accuracy_score(y_val, y_pred)
    fold_accuracies.append(acc)
    print(f"Fold Accuracy: {acc}")


Fold Accuracy: 0.475
Fold Accuracy: 0.425
Fold Accuracy: 0.4666666666666667
Fold Accuracy: 0.4216666666666667
Fold Accuracy: 0.415


In [17]:
# Model evalution
print(f"Average Cross-Validation Accuracy: {np.mean(fold_accuracies)}")


Average Cross-Validation Accuracy: 0.44066666666666665
