In [113]:
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.decomposition import PCA
import numpy as np
import pandas as pd
from scipy.special import expit as sigmoid
import math
import heapq as hp


iris = datasets.load_iris()
iris_x = iris.data  # data
iris_y = iris.target  # classes

In [114]:
def distance(A,x):
    '''A is a NxD matrix where N is the number of samples and D is
       the dimension of the feature set
    '''
    N = A.shape[0]
    d = np.zeros((N,1))
    
    md = np.inf
    
    for i in range(N):
        e = A[i,:]- x
        d[i] = np.sum(e*e.T)
        if d[i]<md:
            md = d[i]
            min_idx = i
    
    return d, min_idx



In [115]:
iris_x.shape

(150, 4)

In [116]:
# Define an object and overload custom comparison operators
class tup:
    def __init__(self, val, idx):
        self.val = val
        self.idx = idx
        
    def __lt__(self, other):
        '''Redefine for max-heap'''
        return self.val > other.val
    
    def __le__(self, other):
        return self.val <= other.val
 
    def __eq__(self, other):
        return self.val == other.val
    
    def __ne__(self, other):
        return self.val != other.val

    def __gt__(self, other):
        return self.val > other.val

    def __ge__(self, other):
        return self.val >= other.val

    def __str__(self):
        return '{:.3},{:d}'.format(self.val,self.idx)

In [117]:
def euclideanDistance(instance1, instance2, length):
    distance = 0
    for x in range(length):
        distance += pow((instance1[x] - instance2[x]), 2)
    return math.sqrt(distance)

In [118]:
def kNN(k, A, test, classes):
    heapSize = k
    heap = []
    N = A.shape[0]
    
    for k in range(heapSize):
        hp.heappush(heap, tup(np.inf, -1))

    for i in range(N):
        e = A[i,:] - test
        e = e.reshape((4, 1))
        tp = tup(float(np.dot(e.T, e)), i)
        if tp <= heap[0]:
            hp.heapreplace(heap, tp)
    
    # Find maximum occurence
    categories = []
    for j in range(len(heap)):
        h = hp.heappop(heap)
        categories.append(int(iris_y[h.idx]))
    return max(set(categories), key=categories.count)

In [119]:
def predict(k, A, category):   
    N = A.shape[0]
    false_positive = 0
    false_negative = 0
    true_positive = 0
    true_negative = 0
        
    for i in range(N):
        result = kNN(k, A, A[i,:], category);
        if(result == int(iris_y[i]) and int(iris_y[i]) == category):
            true_positive += 1
        if(result == int(iris_y[i]) and int(iris_y[i]) != category):
            true_negative += 1
        if(result != int(iris_y[i]) and int(iris_y[i]) == category):
            false_negative += 1
        if(result != int(iris_y[i]) and result == category):
            false_positive += 1
            
    precision = true_positive / (true_positive + false_positive) * 100
    recall = true_positive / (true_positive + false_negative) * 100
    accuracy = (true_positive + true_negative) / (true_positive + true_negative + false_positive + false_negative) * 100
        
    return (true_positive, true_negative, false_positive, false_negative, precision, recall, accuracy)

In [120]:
true_positive, true_negative, false_positive, false_negative, precision, recall, accuracy = predict(20,iris_x,0)
print("CLASS 0")
print("true_positive: ", true_positive)
print("true_negative: ", true_negative)
print("false_positive: ", false_positive)
print("false_negative: ", false_negative)
print("precision: ", precision)
print("recall: ", recall)
print("accuracy: ", accuracy)


CLASS 0
true_positive:  50
true_negative:  97
false_positive:  0
false_negative:  0
precision:  100.0
recall:  100.0
accuracy:  100.0


In [121]:
true_positive, true_negative, false_positive, false_negative, precision, recall, accuracy = predict(20,iris_x,1)
print("CLASS 1")
print("true_positive: ", true_positive)
print("true_negative: ", true_negative)
print("false_positive: ", false_positive)
print("false_negative: ", false_negative)
print("precision: ", precision)
print("recall: ", recall)
print("accuracy: ", accuracy)


CLASS 1
true_positive:  48
true_negative:  99
false_positive:  1
false_negative:  2
precision:  97.95918367346938
recall:  96.0
accuracy:  98.0


In [122]:
true_positive, true_negative, false_positive, false_negative, precision, recall, accuracy = predict(20,iris_x,2)
print("CLASS 2")
print("true_positive: ", true_positive)
print("true_negative: ", true_negative)
print("false_positive: ", false_positive)
print("false_negative: ", false_negative)
print("precision: ", precision)
print("recall: ", recall)
print("accuracy: ", accuracy)


CLASS 2
true_positive:  49
true_negative:  98
false_positive:  2
false_negative:  1
precision:  96.07843137254902
recall:  98.0
accuracy:  98.0
