# kNN method    

In [13]:
import torch 
import numpy as np 
import sklearn.datasets as ds
import sklearn.model_selection as ms 
import sklearn.preprocessing as preprocess
import collections as col 

np.random.seed(42)
iris_df = ds.load_iris()
X = iris_df.data
y = iris_df.target

X_train, X_val, y_train, y_val = ms.train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
scaler = preprocess.StandardScaler()
X_train = torch.tensor(scaler.fit_transform(X_train), dtype=torch.float32)
X_val = torch.tensor(scaler.transform(X_val), dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).reshape(-1, 1)
y_val = torch.tensor(y_val, dtype=torch.float32).reshape(-1, 1)

print("Train shape:", X_train.shape, y_train.shape)
print("Val shape:", X_val.shape, y_val.shape)

Train shape: torch.Size([120, 4]) torch.Size([120, 1])
Val shape: torch.Size([30, 4]) torch.Size([30, 1])


In [14]:
def knn_predict(X_train, y_train, X_test, y_test, k=5):
    y_pred = []
    for i in range(X_test.shape[0]):
        # Euclidean distances of all points from the test point
        distances = torch.norm(X_train -X_test[i], dim=1)

        # get indices of k nearest neighbhors
        _, idx = torch.topk(distances, k, largest=False) 

        # Get labels of k nearest neighbhors and find the most common label
        nearest_labels = y_train[idx]
        # Majority voting
        most_common = col.Counter(nearest_labels).most_common(1)[0][0]
        y_pred.append(most_common)
    return torch.tensor(y_pred, dtype=torch.float32).reshape(-1, 1)

k = 5
y_pred = knn_predict(X_train, y_train, X_val, y_val, k=k)

accuracy = (y_pred == y_val).sum().item() / len(y_val)
print("Validation Accuracy:", accuracy)

Validation Accuracy: 0.9666666666666667
