In [13]:
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
import heapq as hp

In [14]:
# Read the data
df = pd.read_csv(u'data/iris.txt', sep=' ')

A = np.hstack([np.matrix(df.sl).T, np.matrix(df.sw).T, np.matrix(df.pl).T, np.matrix(df.pw).T])
c = np.matrix(df.c).T

In [15]:
#Object definition
class tup:
    def __init__(self, val, idx):
        self.val = val
        self.idx = idx
        
    def __lt__(self, other):
        '''Redefine for max-heap'''
        return self.val > other.val
    
    def __le__(self, other):
        return self.val <= other.val
 
    def __eq__(self, other):
        return self.val == other.val
    
    def __ne__(self, other):
        return self.val != other.val

    def __gt__(self, other):
        return self.val > other.val

    def __ge__(self, other):
        return self.val >= other.val

    def __str__(self):
        return '{:.3},{:d}'.format(self.val, self.idx)

In [16]:
def maxOccurrence(heap, c):
    categories = []
    for t in range(len(heap)):
        h = hp.heappop(heap)
        categories.append(int(c[h.idx]))
    return max(set(categories), key=categories.count)

def categoryPredictionK_NN(K, A, test, c):
    heap = []
    N = A.shape[0]   
    
    # Nodes(dummy)
    for i in range(K):
        hp.heappush(heap, tup(np.inf, -1))
    
    # K nearest items
    for i in range(N):
        e = A[i, :] - test
        e = e.reshape((4, 1))
        tp = tup(float(e.T * e), i)
        if tp <= heap[0]:
            hp.heapreplace(heap, tp)
            
    return maxOccurrence(heap, c)

In [26]:
def categoryPredictionFor2(K, A, c, test_category):
    t_p = 0
    t_n = 0
    f_n = 0
    f_p = 0
    
    for i in range(50 * (test_category - 1), 100 * (test_category - 1)):
        predicted_category = categoryPredictionK_NN(K, np.delete(A, i, axis=0), A[i, :], c)
            
        if predicted_category == int(c[i]):
            if int(c[i]) == test_category:
                t_p += 1
            else:
                t_n += 1
        else:
            if int(c[i]) == test_category:
                f_n += 1
            else:
                f_p += 1
    
    accuracy = (100. * (t_p + t_n)) / (t_p + t_n + f_p + f_n)
    precision = (100. * t_p) / (t_p + f_p)
    recall = (100. * t_p) / (t_p + f_n)
    
    print ("Accuracy %f, Precision %f, Recall %f" % (accuracy, precision, recall))
    
K = 3
test = np.mat([1.8, 2.1, 1.3, 1.2])
result = categoryPredictionK_NN(K, A, test, c)
print("\n")
print("------ FINAL RESULT ------")
print("Category: %d" % result)

categoryPredictionFor2(K, A, c, 2)



------ FINAL RESULT ------
Category: 1
Accuracy 94.000000, Precision 100.000000, Recall 94.000000
