In [6]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler


transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


trainset = datasets.CIFAR10(root='/content/drive/MyDrive/cifar-10-batches-py', train=True, download=True, transform=transform)
testset = datasets.CIFAR10(root='/content/drive/MyDrive/cifar-10-batches-py', train=False, download=True, transform=transform)

Files already downloaded and verified
Files already downloaded and verified


In [61]:
train_indices = [i for i, label in enumerate(trainset.targets) if label == 1 or label == 9]
test_indices = [i for i, label in enumerate(testset.targets) if label == 1 or label == 9]



In [62]:
train_subset = torch.utils.data.Subset(trainset, train_indices)
test_subset = torch.utils.data.Subset(testset, test_indices)
trainloader = DataLoader(train_subset, batch_size=32, shuffle=True)
testloader = DataLoader(test_subset, batch_size=32, shuffle=False)


In [63]:
def prepare_data_for_knn(dataloader):
    data = []
    labels = []
    for inputs, targets in dataloader:

        inputs = inputs.view(inputs.size(0), -1).numpy()
        data.append(inputs)
        labels.append(targets.numpy())
    return np.concatenate(data), np.concatenate(labels)



In [64]:
x_train, y_train = prepare_data_for_knn(trainloader)
x_test, y_test = prepare_data_for_knn(testloader)

In [65]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)


In [66]:

knn_1 = KNeighborsClassifier(n_neighbors=1)
knn_1.fit(x_train, y_train)
y_pred_train_1 = knn_1.predict(x_train)
y_pred_test_1 = knn_1.predict(x_test)


train_accuracy_1 = accuracy_score(y_train, y_pred_train_1) * 100
test_accuracy_1 = accuracy_score(y_test, y_pred_test_1) * 100


In [67]:
knn_3 = KNeighborsClassifier(n_neighbors=3)
knn_3.fit(x_train, y_train)
y_pred_train_3 = knn_3.predict(x_train)
y_pred_test_3 = knn_3.predict(x_test)

train_accuracy_3 = accuracy_score(y_train, y_pred_train_3) * 100
test_accuracy_3 = accuracy_score(y_test, y_pred_test_3) * 100


In [69]:
print(f'Accuracy με 1 γείτονα - Train: {train_accuracy_1:.2f}% Test: {test_accuracy_1:.2f}%')
print(f'Accuracy με 3 γείτονες - Train: {train_accuracy_3:.2f}% Test: {test_accuracy_3:.2f}%')


Accuracy με 1 γείτονα - Train: 100.00% Test: 68.60%
Accuracy με 3 γείτονες - Train: 85.89% Test: 70.20%


In [73]:
class NearestClassCentroid:
    def __init__(self):
        self.centroids = None

    def fit(self, X, y):

        classes = np.unique(y)
        self.centroids = {}
        for c in classes:
            self.centroids[c] = np.mean(X[y == c], axis=0)
        return self

    def predict(self, X):

        predictions = []
        for x in X:
            distances = {c: np.linalg.norm(x - centroid) for c, centroid in self.centroids.items()}
            predictions.append(min(distances, key=distances.get))
        return np.array(predictions)


In [74]:
ncc = NearestClassCentroid()
ncc.fit(x_train, y_train)

y_pred_train_ncc = ncc.predict(x_train)
y_pred_test_ncc = ncc.predict(x_test)

train_accuracy_ncc = accuracy_score(y_train, y_pred_train_ncc) * 100
test_accuracy_ncc = accuracy_score(y_test, y_pred_test_ncc) * 100

print(f"NCC - Train Accuracy: {train_accuracy_ncc:.2f}% Test Accuracy: {test_accuracy_ncc:.2f}%")

NCC - Train Accuracy: 63.43% Test Accuracy: 64.15%
