# IT-542 PRML Assignment - 3

## Implement k-NN classifier and use it for IRIS data with k = 1, 3, 5 and 11. Repeat the experiment 10 times and calculate the average accuracy

In [0]:
# Import and load iris dataset
from math import sqrt
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier

iris = load_iris()
print(iris)

{'data': array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
     

In [0]:
# Evaluate KNN algorithm
def evaluate_algorithm(dataset, labels, k):
    train_set_data, test_set_data, \
    train_set_labels, test_set_labels = train_test_split(dataset, labels, test_size=0.2, stratify=labels)
    
    # Predict classes using our implementation
    train_set = np.concatenate([train_set_data, train_set_labels.reshape(train_set_labels.shape[0], 1)], axis=1).tolist()
    test_set = np.concatenate([test_set_data, test_set_labels.reshape(test_set_labels.shape[0], 1)], axis=1).tolist()
    predicted = k_nearest_neighbors(train_set, test_set, k)

    # Using Sklearn object to predict classes
    kNN_classifier = KNeighborsClassifier(k)
    kNN_classifier.fit(train_set_data, train_set_labels)
    sk_predicted = kNN_classifier.predict(test_set_data)

    # Calculating accuracy for both implementations
    accuracy = accuracy_score(test_set_labels, predicted)
    sk_accuracy = accuracy_score(test_set_labels, sk_predicted)

    return predicted, sk_predicted, accuracy, sk_accuracy
 
# Calculate the Euclidean distance between two vectors
def euclidean_distance(row1, row2):
	distance = 0.0
	for i in range(len(row1)-1):
		distance += (row1[i] - row2[i])**2
	return sqrt(distance)
 
# Locate the most similar neighbors
def get_neighbors(train, test_row, k):
	distances = list()
	for train_row in train:
		dist = euclidean_distance(test_row, train_row)
		distances.append((train_row, dist))
	distances.sort(key=lambda tup: tup[1])
	neighbors = list()
	for i in range(k):
		neighbors.append(distances[i][0])
	return neighbors
 
# Make a prediction with neighbors
def predict_classification(train, test_row, k):
	neighbors = get_neighbors(train, test_row, k)
	output_values = [row[-1] for row in neighbors]
	prediction = max(set(output_values), key=output_values.count)
	return prediction
 
# kNN Algorithm
def k_nearest_neighbors(train, test, k):
	predictions = list()
	for row in test:
		output = predict_classification(train, row, k)
		predictions.append(output)
	return predictions

In [0]:
# Run KNN algorithm
epochs = 10
k_list = [1, 3, 5, 11]
scores, sk_scores = [], []
for k in k_list:
    print(f"k = {k}:")
    for i in range(epochs):
        print(f"Iteration {i+1}:")
        pred, sk_pred, acc, sk_acc = evaluate_algorithm(iris.data, iris.target, k)
        acc, sk_acc = float('%.3f' % acc), float('%.3f' % sk_acc)
        scores.append(acc)
        sk_scores.append(sk_acc)

        print(f"Custom predictions: {pred}")
        print(f"sklearn predictions: {sk_pred}")
        print(f"Accuracy: {acc * 100.0}%, sklearn Accuracy: {sk_acc * 100.0}%")
        print()
    
    avg_accuracy = (sum(scores) / float(len(scores)))
    sk_avg_accuracy = (sum(sk_scores) / float(len(sk_scores)))
    print()
    print(f"Average Accuracy for k = {k}: {avg_accuracy * 100.0:.3f}%")
    print(f"sklearn Average Accuracy for k = {k}: {sk_avg_accuracy * 100.0:.3f}%")
    print("----------------------------------------------------------------------")
    print("\n")

k = 1:
Iteration 1:
Custom predictions: [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 0.0, 1.0, 2.0, 1.0, 2.0, 2.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 2.0, 0.0]
sklearn predictions: [1 0 1 0 1 0 1 0 1 2 0 1 2 1 2 2 0 1 2 1 2 2 1 0 2 0 0 2 2 0]
Accuracy: 100.0%, sklearn Accuracy: 100.0%

Iteration 2:
Custom predictions: [0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 2.0, 2.0, 1.0, 0.0, 1.0]
sklearn predictions: [0 1 0 1 0 0 2 0 2 1 2 1 2 1 2 1 0 2 1 2 0 1 0 2 0 2 2 1 0 1]
Accuracy: 93.30000000000001%, sklearn Accuracy: 93.30000000000001%

Iteration 3:
Custom predictions: [1.0, 0.0, 0.0, 2.0, 1.0, 2.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 2.0, 1.0, 1.0, 2.0, 1.0]
sklearn predictions: [1 0 0 2 1 2 1 2 2 1 1 2 0 2 0 0 1 0 2 0 1 0 2 0 0 2 1 1 2 1]
Accuracy: 93.30000000000001%, sklearn Accuracy: 93.30000000000001%

Iteration 4: