In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import heapq as hp

In [2]:
df_iris = pd.read_csv(u'data/iris.txt',sep=' ')

In [3]:
sl = np.matrix(df_iris.sl[:]).T
sw = np.matrix(df_iris.sw[:]).T
pl = np.matrix(df_iris.pl[:]).T
pw = np.matrix(df_iris.pw[:]).T
A = np.hstack((sl,sw,pl,pw))
c = np.matrix(df_iris.c).T

In [4]:
class tup:
    def __init__(self, val, idx):
        self.val = val
        self.idx = idx
        
    def __lt__(self, other):        
        return self.val > other.val
    
    def __le__(self, other):
        return self.val <= other.val
 
    def __eq__(self, other):
        return self.val == other.val
    
    def __ne__(self, other):
        return self.val != other.val

    def __gt__(self, other):
        return self.val > other.val

    def __ge__(self, other):
        return self.val >= other.val

    def __str__(self):
        return '{:.3},{:d}'.format(self.val, self.idx)

In [5]:
def kNN(k, A, x, c):
    heap = []
    N = A.shape[0]   
    
    for i in range(k):
        hp.heappush(heap, tup(np.inf, -1))
    
    for i in range(N):
        e = A[i, :] - x
        e = e.reshape((4, 1))
        tp = tup(float(e.T * e), i)
        if tp <= heap[0]:
            hp.heapreplace(heap, tp)
            
    categories = []
    print '=============DISTANCE &  CATEGORY ============================'
    for t in range(len(heap)):
        h = hp.heappop(heap)
        categories.append(int(c[h.idx]))        
        print("DISTANCE = ",'{:.3}'.format(h.val),"TYPE: ",float(c[h.idx]))
    return max(set(categories), key=categories.count)

In [6]:
def predictionforCategories(k, A, c, testCategory):
    truePositive = 0
    trueNegative = 0
    falseNegative = 0
    falsePositive = 0
    
    for i in range(50 * (testCategory - 1), (50 + (50 * (testCategory - 1)))):
        predictedCategory = kNN(k, np.delete(A, i, axis=0), A[i,:], c)
            
        if predictedCategory == int(c[i]):
            if int(c[i]) == testCategory:
                truePositive += 1
            else:
                trueNegative += 1
        else:
            if int(c[i]) == testCategory:
                falseNegative += 1
            else:
                falsePositive += 1
    
    accuracy = (100. * (truePositive + trueNegative)) / (truePositive + trueNegative + falsePositive + falseNegative)
    precision = (100. * truePositive) / (truePositive + falsePositive)
    recall = (100. * truePositive) / (truePositive + falseNegative)
    
    print ("Accuracy %f, Precision %f, Recall %f" % (accuracy, precision, recall))

In [7]:
k = 3
test = A[1,:] + 3*np.random.randn(1,4)

print("\n")
print '=============TEST DATA ============================'
print test

result = kNN(k, A, test, c)
print("\n")
print '=============TEST DATA CATEGORY===================='
print(result)

print '===================================================================================================================='
print("\n")
print '=============TYPE 1 versus OTHERs=================='
predictionforCategories(k, A, c, 1)

print("\n")
print '=============TYPE 2 versus OTHERs=================='
predictionforCategories(k, A, c, 2)

print("\n")
print '=============TYPE 3 versus OTHERs=================='
predictionforCategories(k, A, c, 3)

print '===================================================================================================================='



[[ 5.4178171   9.37416453  1.83813851 -1.39075186]]
('DISTANCE = ', '30.2', 'TYPE: ', 1.0)
('DISTANCE = ', '29.5', 'TYPE: ', 1.0)
('DISTANCE = ', '28.1', 'TYPE: ', 1.0)


1


('DISTANCE = ', '0.02', 'TYPE: ', 1.0)
('DISTANCE = ', '0.02', 'TYPE: ', 1.0)
('DISTANCE = ', '0.01', 'TYPE: ', 1.0)
('DISTANCE = ', '0.03', 'TYPE: ', 1.0)
('DISTANCE = ', '0.02', 'TYPE: ', 1.0)
('DISTANCE = ', '0.02', 'TYPE: ', 1.0)
('DISTANCE = ', '0.07', 'TYPE: ', 1.0)
('DISTANCE = ', '0.06', 'TYPE: ', 1.0)
('DISTANCE = ', '0.02', 'TYPE: ', 1.0)
('DISTANCE = ', '0.05', 'TYPE: ', 1.0)
('DISTANCE = ', '0.03', 'TYPE: ', 1.0)
('DISTANCE = ', '0.02', 'TYPE: ', 1.0)
('DISTANCE = ', '0.03', 'TYPE: ', 1.0)
('DISTANCE = ', '0.03', 'TYPE: ', 1.0)
('DISTANCE = ', '0.02', 'TYPE: ', 1.0)
('DISTANCE = ', '0.13', 'TYPE: ', 1.0)
('DISTANCE = ', '0.12', 'TYPE: ', 1.0)
('DISTANCE = ', '0.11', 'TYPE: ', 1.0)
('DISTANCE = ', '0.09', 'TYPE: ', 1.0)
('DISTANCE = ', '0.07', 'TYPE: ', 1.0)
('DISTANCE = ', '0.05', 'TYPE: ', 1.0)
('DI