In [1]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, accuracy_score
from tqdm import tqdm
import numpy as np
import pandas as pd
from sklearn.datasets import load_wine
from sklearn.datasets import load_digits


def load_digits_data():
    digits = load_digits()
    data = pd.DataFrame(data=digits.data, columns=[f'pixel_{i}' for i in range(digits.data.shape[1])])
    data['Target'] = digits.target
    return data

def load_data_subset():
    digits_data = load_digits_data()

    features = digits_data.columns[:-1].tolist()  # Exclude the target column
    target = 'Target'

    X = digits_data[features].values
    y = digits_data[target].values

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    print("Train data array size: ", X_train.shape)
    print("Train truth array size: ", y_train.shape)
    print("Test data array size: ", X_test.shape)
    print("Test truth array size: ", y_test.shape)

    return X_train, X_test, y_train, y_test

# Load data using the Digits dataset
X_train, X_test, y_train, y_test = load_data_subset()

class KNN:
    def __init__(self, k):
        self.k = k
        self.x_train, self.y_train = None, None

    @staticmethod
    def calculate_distance(x, y):
        return np.linalg.norm(x - y)  # Euclidean distance

    def find_k_nearest_neighbors(self, x, version):
        distances = [self.calculate_distance(x, x_train) for x_train in self.x_train]
        sorted_indices = np.argsort(distances)
        k_nearest_neighbors = sorted_indices[:self.k]
        return k_nearest_neighbors

    def fit(self, x, y):
        self.x_train = x
        self.y_train = y

    def predict(self, X, k, version, distance_matrix):
        n_samples = X.shape[0]
        predictions = np.zeros(n_samples, dtype=int)

        for i in range(n_samples):
            # Find the k nearest neighbors for the current data point
            neighbors = self.find_k_nearest_neighbors(X[i], version)

            # Get the class labels of the k nearest neighbors
            neighbor_labels = np.array([self.y_train[idx] for idx in neighbors])

            # Assign the most common class label as the prediction
            predictions[i] = np.argmax(np.bincount(neighbor_labels))

            # Print the message in each iteration
            print("kNN for k = {}, data point # {}, predicted class = {} ".format(k, i + 1, predictions[i]))

        return predictions

# Load data using the Wine dataset
X_train, X_test, y_train, y_test = load_data_subset()

best_dev_acc = 0.0
best_k = 0
best_clf = None

# Iterate over different values of k
for k in tqdm(range(1, 11)):
    knn = KNN(k=k)  # knn object with current k
    knn.fit(X_train, y_train)  # fit with current k in the TRAINING data...

    # Use X_test and y_test instead of X_dev and y_dev for prediction
    preds = knn.predict(X_test, k, 0, np.zeros((10, len(X_test))))  # predict with current k using the TEST data...
    acc = accuracy_score(y_test, preds)  # calculate accuracy

    # if the current accuracy is higher than the previously best...
    if best_dev_acc == 0.0 or acc > best_dev_acc:
        best_dev_acc = acc
        best_k = k
        best_clf = knn

print('\nBest dev accuracy:', best_dev_acc)
print('Best K:', best_k)

# Predict on the actual test data using the best classifier
test_preds = best_clf.predict(X_test, best_k, 0, np.zeros((10, len(X_test))))
print('Test accuracy:', accuracy_score(y_test, test_preds))

# Print the classification report
print('\nClassification Report:')
print(classification_report(y_test, test_preds))


Train data array size:  (1437, 64)
Train truth array size:  (1437,)
Test data array size:  (360, 64)
Test truth array size:  (360,)
Train data array size:  (1437, 64)
Train truth array size:  (1437,)
Test data array size:  (360, 64)
Test truth array size:  (360,)


  0%|          | 0/10 [00:00<?, ?it/s]

kNN for k = 1, data point # 1, predicted class = 6 
kNN for k = 1, data point # 2, predicted class = 9 
kNN for k = 1, data point # 3, predicted class = 3 
kNN for k = 1, data point # 4, predicted class = 7 
kNN for k = 1, data point # 5, predicted class = 2 
kNN for k = 1, data point # 6, predicted class = 1 
kNN for k = 1, data point # 7, predicted class = 5 
kNN for k = 1, data point # 8, predicted class = 2 
kNN for k = 1, data point # 9, predicted class = 5 
kNN for k = 1, data point # 10, predicted class = 2 
kNN for k = 1, data point # 11, predicted class = 1 
kNN for k = 1, data point # 12, predicted class = 9 
kNN for k = 1, data point # 13, predicted class = 4 
kNN for k = 1, data point # 14, predicted class = 0 
kNN for k = 1, data point # 15, predicted class = 4 
kNN for k = 1, data point # 16, predicted class = 2 
kNN for k = 1, data point # 17, predicted class = 3 
kNN for k = 1, data point # 18, predicted class = 7 
kNN for k = 1, data point # 19, predicted class = 8 
kN

 10%|█         | 1/10 [00:00<00:07,  1.19it/s]

kNN for k = 1, data point # 356, predicted class = 4 
kNN for k = 1, data point # 357, predicted class = 3 
kNN for k = 1, data point # 358, predicted class = 8 
kNN for k = 1, data point # 359, predicted class = 3 
kNN for k = 1, data point # 360, predicted class = 5 
kNN for k = 2, data point # 1, predicted class = 6 
kNN for k = 2, data point # 2, predicted class = 9 
kNN for k = 2, data point # 3, predicted class = 3 
kNN for k = 2, data point # 4, predicted class = 7 
kNN for k = 2, data point # 5, predicted class = 2 
kNN for k = 2, data point # 6, predicted class = 1 
kNN for k = 2, data point # 7, predicted class = 5 
kNN for k = 2, data point # 8, predicted class = 2 
kNN for k = 2, data point # 9, predicted class = 5 
kNN for k = 2, data point # 10, predicted class = 2 
kNN for k = 2, data point # 11, predicted class = 1 
kNN for k = 2, data point # 12, predicted class = 9 
kNN for k = 2, data point # 13, predicted class = 4 
kNN for k = 2, data point # 14, predicted class = 

 20%|██        | 2/10 [00:01<00:06,  1.22it/s]

kNN for k = 2, data point # 349, predicted class = 5 
kNN for k = 2, data point # 350, predicted class = 2 
kNN for k = 2, data point # 351, predicted class = 7 
kNN for k = 2, data point # 352, predicted class = 7 
kNN for k = 2, data point # 353, predicted class = 1 
kNN for k = 2, data point # 354, predicted class = 8 
kNN for k = 2, data point # 355, predicted class = 7 
kNN for k = 2, data point # 356, predicted class = 4 
kNN for k = 2, data point # 357, predicted class = 3 
kNN for k = 2, data point # 358, predicted class = 8 
kNN for k = 2, data point # 359, predicted class = 3 
kNN for k = 2, data point # 360, predicted class = 5 
kNN for k = 3, data point # 1, predicted class = 6 
kNN for k = 3, data point # 2, predicted class = 9 
kNN for k = 3, data point # 3, predicted class = 3 
kNN for k = 3, data point # 4, predicted class = 7 
kNN for k = 3, data point # 5, predicted class = 2 
kNN for k = 3, data point # 6, predicted class = 1 
kNN for k = 3, data point # 7, predicted

 30%|███       | 3/10 [00:02<00:05,  1.23it/s]

kNN for k = 3, data point # 358, predicted class = 8 
kNN for k = 3, data point # 359, predicted class = 3 
kNN for k = 3, data point # 360, predicted class = 5 
kNN for k = 4, data point # 1, predicted class = 6 
kNN for k = 4, data point # 2, predicted class = 9 
kNN for k = 4, data point # 3, predicted class = 3 
kNN for k = 4, data point # 4, predicted class = 7 
kNN for k = 4, data point # 5, predicted class = 2 
kNN for k = 4, data point # 6, predicted class = 1 
kNN for k = 4, data point # 7, predicted class = 5 
kNN for k = 4, data point # 8, predicted class = 2 
kNN for k = 4, data point # 9, predicted class = 5 
kNN for k = 4, data point # 10, predicted class = 2 
kNN for k = 4, data point # 11, predicted class = 1 
kNN for k = 4, data point # 12, predicted class = 9 
kNN for k = 4, data point # 13, predicted class = 4 
kNN for k = 4, data point # 14, predicted class = 0 
kNN for k = 4, data point # 15, predicted class = 4 
kNN for k = 4, data point # 16, predicted class = 2 

 40%|████      | 4/10 [00:03<00:04,  1.24it/s]

kNN for k = 4, data point # 287, predicted class = 5 
kNN for k = 4, data point # 288, predicted class = 9 
kNN for k = 4, data point # 289, predicted class = 9 
kNN for k = 4, data point # 290, predicted class = 8 
kNN for k = 4, data point # 291, predicted class = 5 
kNN for k = 4, data point # 292, predicted class = 3 
kNN for k = 4, data point # 293, predicted class = 3 
kNN for k = 4, data point # 294, predicted class = 2 
kNN for k = 4, data point # 295, predicted class = 0 
kNN for k = 4, data point # 296, predicted class = 5 
kNN for k = 4, data point # 297, predicted class = 8 
kNN for k = 4, data point # 298, predicted class = 3 
kNN for k = 4, data point # 299, predicted class = 4 
kNN for k = 4, data point # 300, predicted class = 0 
kNN for k = 4, data point # 301, predicted class = 2 
kNN for k = 4, data point # 302, predicted class = 4 
kNN for k = 4, data point # 303, predicted class = 6 
kNN for k = 4, data point # 304, predicted class = 4 
kNN for k = 4, data point # 

 50%|█████     | 5/10 [00:04<00:04,  1.25it/s]

kNN for k = 5, data point # 300, predicted class = 0 
kNN for k = 5, data point # 301, predicted class = 2 
kNN for k = 5, data point # 302, predicted class = 4 
kNN for k = 5, data point # 303, predicted class = 6 
kNN for k = 5, data point # 304, predicted class = 4 
kNN for k = 5, data point # 305, predicted class = 3 
kNN for k = 5, data point # 306, predicted class = 4 
kNN for k = 5, data point # 307, predicted class = 5 
kNN for k = 5, data point # 308, predicted class = 0 
kNN for k = 5, data point # 309, predicted class = 5 
kNN for k = 5, data point # 310, predicted class = 2 
kNN for k = 5, data point # 311, predicted class = 1 
kNN for k = 5, data point # 312, predicted class = 3 
kNN for k = 5, data point # 313, predicted class = 1 
kNN for k = 5, data point # 314, predicted class = 4 
kNN for k = 5, data point # 315, predicted class = 1 
kNN for k = 5, data point # 316, predicted class = 1 
kNN for k = 5, data point # 317, predicted class = 7 
kNN for k = 5, data point # 

 60%|██████    | 6/10 [00:04<00:03,  1.25it/s]

kNN for k = 6, data point # 308, predicted class = 0 
kNN for k = 6, data point # 309, predicted class = 5 
kNN for k = 6, data point # 310, predicted class = 2 
kNN for k = 6, data point # 311, predicted class = 1 
kNN for k = 6, data point # 312, predicted class = 3 
kNN for k = 6, data point # 313, predicted class = 1 
kNN for k = 6, data point # 314, predicted class = 4 
kNN for k = 6, data point # 315, predicted class = 1 
kNN for k = 6, data point # 316, predicted class = 1 
kNN for k = 6, data point # 317, predicted class = 7 
kNN for k = 6, data point # 318, predicted class = 0 
kNN for k = 6, data point # 319, predicted class = 1 
kNN for k = 6, data point # 320, predicted class = 5 
kNN for k = 6, data point # 321, predicted class = 2 
kNN for k = 6, data point # 322, predicted class = 1 
kNN for k = 6, data point # 323, predicted class = 2 
kNN for k = 6, data point # 324, predicted class = 8 
kNN for k = 6, data point # 325, predicted class = 7 
kNN for k = 6, data point # 

 70%|███████   | 7/10 [00:05<00:02,  1.24it/s]

kNN for k = 7, data point # 308, predicted class = 0 
kNN for k = 7, data point # 309, predicted class = 5 
kNN for k = 7, data point # 310, predicted class = 2 
kNN for k = 7, data point # 311, predicted class = 1 
kNN for k = 7, data point # 312, predicted class = 3 
kNN for k = 7, data point # 313, predicted class = 1 
kNN for k = 7, data point # 314, predicted class = 4 
kNN for k = 7, data point # 315, predicted class = 1 
kNN for k = 7, data point # 316, predicted class = 1 
kNN for k = 7, data point # 317, predicted class = 7 
kNN for k = 7, data point # 318, predicted class = 0 
kNN for k = 7, data point # 319, predicted class = 1 
kNN for k = 7, data point # 320, predicted class = 5 
kNN for k = 7, data point # 321, predicted class = 2 
kNN for k = 7, data point # 322, predicted class = 1 
kNN for k = 7, data point # 323, predicted class = 2 
kNN for k = 7, data point # 324, predicted class = 8 
kNN for k = 7, data point # 325, predicted class = 7 
kNN for k = 7, data point # 

 80%|████████  | 8/10 [00:06<00:01,  1.23it/s]

kNN for k = 8, data point # 310, predicted class = 2 
kNN for k = 8, data point # 311, predicted class = 1 
kNN for k = 8, data point # 312, predicted class = 3 
kNN for k = 8, data point # 313, predicted class = 1 
kNN for k = 8, data point # 314, predicted class = 4 
kNN for k = 8, data point # 315, predicted class = 1 
kNN for k = 8, data point # 316, predicted class = 1 
kNN for k = 8, data point # 317, predicted class = 7 
kNN for k = 8, data point # 318, predicted class = 0 
kNN for k = 8, data point # 319, predicted class = 1 
kNN for k = 8, data point # 320, predicted class = 5 
kNN for k = 8, data point # 321, predicted class = 2 
kNN for k = 8, data point # 322, predicted class = 1 
kNN for k = 8, data point # 323, predicted class = 2 
kNN for k = 8, data point # 324, predicted class = 8 
kNN for k = 8, data point # 325, predicted class = 7 
kNN for k = 8, data point # 326, predicted class = 0 
kNN for k = 8, data point # 327, predicted class = 6 
kNN for k = 8, data point # 

 90%|█████████ | 9/10 [00:07<00:00,  1.23it/s]

kNN for k = 9, data point # 322, predicted class = 1 
kNN for k = 9, data point # 323, predicted class = 2 
kNN for k = 9, data point # 324, predicted class = 8 
kNN for k = 9, data point # 325, predicted class = 7 
kNN for k = 9, data point # 326, predicted class = 0 
kNN for k = 9, data point # 327, predicted class = 6 
kNN for k = 9, data point # 328, predicted class = 4 
kNN for k = 9, data point # 329, predicted class = 8 
kNN for k = 9, data point # 330, predicted class = 8 
kNN for k = 9, data point # 331, predicted class = 5 
kNN for k = 9, data point # 332, predicted class = 1 
kNN for k = 9, data point # 333, predicted class = 8 
kNN for k = 9, data point # 334, predicted class = 4 
kNN for k = 9, data point # 335, predicted class = 5 
kNN for k = 9, data point # 336, predicted class = 8 
kNN for k = 9, data point # 337, predicted class = 7 
kNN for k = 9, data point # 338, predicted class = 9 
kNN for k = 9, data point # 339, predicted class = 8 
kNN for k = 9, data point # 

100%|██████████| 10/10 [00:08<00:00,  1.24it/s]

kNN for k = 10, data point # 333, predicted class = 8 
kNN for k = 10, data point # 334, predicted class = 4 
kNN for k = 10, data point # 335, predicted class = 5 
kNN for k = 10, data point # 336, predicted class = 8 
kNN for k = 10, data point # 337, predicted class = 7 
kNN for k = 10, data point # 338, predicted class = 9 
kNN for k = 10, data point # 339, predicted class = 8 
kNN for k = 10, data point # 340, predicted class = 6 
kNN for k = 10, data point # 341, predicted class = 0 
kNN for k = 10, data point # 342, predicted class = 6 
kNN for k = 10, data point # 343, predicted class = 2 
kNN for k = 10, data point # 344, predicted class = 0 
kNN for k = 10, data point # 345, predicted class = 7 
kNN for k = 10, data point # 346, predicted class = 9 
kNN for k = 10, data point # 347, predicted class = 1 
kNN for k = 10, data point # 348, predicted class = 9 
kNN for k = 10, data point # 349, predicted class = 5 
kNN for k = 10, data point # 350, predicted class = 2 
kNN for k 




kNN for k = 6, data point # 65, predicted class = 4 
kNN for k = 6, data point # 66, predicted class = 5 
kNN for k = 6, data point # 67, predicted class = 7 
kNN for k = 6, data point # 68, predicted class = 0 
kNN for k = 6, data point # 69, predicted class = 7 
kNN for k = 6, data point # 70, predicted class = 5 
kNN for k = 6, data point # 71, predicted class = 9 
kNN for k = 6, data point # 72, predicted class = 5 
kNN for k = 6, data point # 73, predicted class = 5 
kNN for k = 6, data point # 74, predicted class = 4 
kNN for k = 6, data point # 75, predicted class = 7 
kNN for k = 6, data point # 76, predicted class = 0 
kNN for k = 6, data point # 77, predicted class = 4 
kNN for k = 6, data point # 78, predicted class = 5 
kNN for k = 6, data point # 79, predicted class = 5 
kNN for k = 6, data point # 80, predicted class = 9 
kNN for k = 6, data point # 81, predicted class = 9 
kNN for k = 6, data point # 82, predicted class = 0 
kNN for k = 6, data point # 83, predicted clas

In [2]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import pandas as pd

# Initialize KNNClassifier
knn_classifier = KNeighborsClassifier()

# Fit the model
knn_classifier.fit(X_train, y_train)

# Predict on the test set
test_preds = knn_classifier.predict(X_test)

# Calculate accuracy
test_accuracy = accuracy_score(y_test, test_preds)
print('Test accuracy:', test_accuracy)

# Generate classification report
print('\nClassification Report:')
print(classification_report(y_test, test_preds))

Test accuracy: 0.9861111111111112

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        33
           1       1.00      1.00      1.00        28
           2       1.00      1.00      1.00        33
           3       1.00      1.00      1.00        34
           4       0.98      1.00      0.99        46
           5       0.98      0.96      0.97        47
           6       0.97      1.00      0.99        35
           7       1.00      0.97      0.99        34
           8       1.00      1.00      1.00        30
           9       0.95      0.95      0.95        40

    accuracy                           0.99       360
   macro avg       0.99      0.99      0.99       360
weighted avg       0.99      0.99      0.99       360

