In [3]:
import numpy as np
import pandas as pd
import heapq as hp

## Read and prepare the data
df_iris = pd.read_csv(u'data/iris.txt',sep=' ')

sl = np.matrix(df_iris.sl).T
sw = np.matrix(df_iris.sw).T
pl = np.matrix(df_iris.pl).T
pw = np.matrix(df_iris.pw).T

A = np.hstack((sl,sw,pl,pw))
c = np.matrix(df_iris.c).T

In [4]:
class tup:
    def __init__(self, val, idx):
        self.val = val
        self.idx = idx
        
    def __lt__(self, other):
        '''Redefine for max-heap'''
        return self.val > other.val
    
    def __le__(self, other):
        return self.val <= other.val
 
    def __eq__(self, other):
        return self.val == other.val
    
    def __ne__(self, other):
        return self.val != other.val

    def __gt__(self, other):
        return self.val > other.val

    def __ge__(self, other):
        return self.val >= other.val

    def __str__(self):
        return '{:.3},{:d}'.format(self.val,self.idx)

In [5]:
def kthNearestNeighbour(k,A,test,c,verbose):
    heap = []
    N = A.shape[0]   
    
    # Initialize heap with infinite distance values
    for i in range(k):
        hp.heappush(heap, tup(np.inf, -1))
    
    # Populate the heap with k nearest items
    for i in range(N):
        e = A[i,:] - test
        e = e.reshape((4,1))
        tp = tup(float(e.T*e), i)
        if tp <= heap[0]:
            hp.heapreplace(heap, tp)

    a = [int(c[x.idx]) for x in heap]
    classifiedCategory = max(set(a), key=a.count)
            
    # Print the k closest neighbours
    for t in range(k):
        h = hp.heappop(heap)
        category = int(c[h.idx])
        if(verbose):
            print('neighbour {0} - Distance: {1}, Category:{2}'.format(t,h.val,category))
    if(verbose):            
        print('Classified as type {0}'.format(classifiedCategory))
    return classifiedCategory            

In [6]:
test = A[1,:]+np.mat([1,1,1,1]);
kthNearestNeighbour(5,A,test,c,True)

neighbour 0 - Distance: 2.06, Category:1
neighbour 1 - Distance: 1.65, Category:1
neighbour 2 - Distance: 1.57, Category:1
neighbour 3 - Distance: 1.39, Category:1
neighbour 4 - Distance: 1.38, Category:1
Classified as type 1


1

In [7]:
##Classifying Type 2 against 1,3 using KNN
def getConditionValues(A,testCategory):
    N = A.shape[0]   
    truePostive = 0
    trueNegative = 0
    falseNegative = 0
    falsePositive = 0
    
    for i in range(N):
        estimated = kthNearestNeighbour(5,A,A[i,:],c,False);
        if(estimated == int(c[i]) and int(c[i]) == testCategory):
            truePostive += 1
        if(estimated == int(c[i]) and int(c[i]) != testCategory):
            trueNegative += 1
        if(estimated != int(c[i]) and int(c[i]) == testCategory):
            falseNegative += 1
        if(estimated != int(c[i]) and estimated == testCategory):
            falsePositive += 1
            
    return truePostive, trueNegative, falseNegative, falsePositive

truePostive, trueNegative, falseNegative, falsePositive= getConditionValues(A,2)

print('Precision is {:.3}'.format(1.*truePostive/(truePostive+falsePositive)))
print('Recall is {:.3}'.format(1.*truePostive/(truePostive+falseNegative)))
print('Acurracy is {:.3}'.format(1.*(truePostive+trueNegative)/(falsePositive+trueNegative+truePostive+falseNegative)))

Precision is 0.959
Recall is 0.94
Acurracy is 0.967
