In [1]:
import numpy as np
import sklearn.datasets as dt
import matplotlib.pyplot as plt


In [2]:
iris = dt.load_iris()

In [4]:
iris_data = iris.data
iris_labels = iris.target
print(iris_data[0],iris_labels[0])
print(iris_data[79],iris_labels[79])
print(iris_data[100],iris_labels[100])

[5.1 3.5 1.4 0.2] 0
[5.7 2.6 3.5 1. ] 1
[6.3 3.3 6.  2.5] 2


In [7]:
# #creating a learning set
np.random.seed(40)
indices = np.random.permutation(len(iris_data))
No_of_training_data = 12
learnset_data = iris_data[indices[:-No_of_training_data]]
learnset_labels = iris_labels[indices[:-No_of_training_data]]
testData = iris_data[indices[-No_of_training_data:]]
testLabel = iris_labels[indices[-No_of_training_data:]]
print(learnset_data[:4], learnset_labels[:4])
print(testData[:4],testLabel[:4])

[[4.4 3.  1.3 0.2]
 [5.6 3.  4.5 1.5]
 [6.4 3.2 5.3 2.3]
 [7.7 3.8 6.7 2.2]] [0 1 2 2]
[[5.  3.3 1.4 0.2]
 [6.1 2.8 4.7 1.2]
 [6.1 3.  4.6 1.4]
 [5.7 2.8 4.5 1.3]] [0 1 1 1]


In [14]:
# %matplotlib inline
# from mpl_toolkits.mplot3d import Axes3D as a3d
# colors = ['r','b']
# X=[]
# for iclass in range(3):
#     X.append([[],[],[]])
#     for i in range(len(learnset_data)):
#         if learnset_labels[i] == iclass:
#             X[iclass][0].append(learnset_data[i][0])
#             X[iclass][1].append(learnset_data[i][1])
#             X[iclass][2].append(sum(learnset_data[i][2:]))
# colors = ('r','g','y')
# fig = plt.figure()
# ax=fig.add_subplot(111,projection='3d')
# for iclass in range(3):
#     ax.scatter(X[iclass][0],X[iclass][1],X[iclass][2],c=colors[iclass])
# plt.show()


In [11]:
# Distance function
def dist(location1,location2):
    location1 = np.array(location1)
    location2 = np.array(location2)
    return np.linalg.norm(location1 - location2)

In [16]:
# Function for neighbor list
def get_neighs(training_set,labels,Test_instance,k,distance=distanceFunc):
    distances = []
    for index in range(len(training_set)):
        dist = distance(Test_instance,training_set[index])
        distances.append((training_set[index],dist,labels[index]))
    distances.sort(key = lambda x: x[1])
    neighbours = distance[:k]
    return neighbours

In [20]:
for i in range(5):
    neighbours = get_neighs(learnset_data,learnset_labels,testData[i],3,distance = dist)
    # print('Index=',i,' testData=',testData[i],testLabel[i],neighbours)

In [26]:
from collections import Counter
def vote(neighs):
    class_count = Counter()
    for neigh in neighs:
        class_count[neigh[2]]+=1
    return class_count.most_common(1)[0][0]

In [31]:
for i in range(No_of_training_data):
    neighbours = get_neighs(learnset_data,learnset_labels,testData[i],3,distance = dist)
    print('Index= ',i,' result of vote:',vote(neighbours), 'Label : ',testLabel[i],' data=',testData[i])

Index=  0  result of vote: 0 Label :  0  data= [5.  3.3 1.4 0.2]
Index=  1  result of vote: 1 Label :  1  data= [6.1 2.8 4.7 1.2]
Index=  2  result of vote: 1 Label :  1  data= [6.1 3.  4.6 1.4]
Index=  3  result of vote: 1 Label :  1  data= [5.7 2.8 4.5 1.3]
Index=  4  result of vote: 1 Label :  1  data= [6.4 2.9 4.3 1.3]
Index=  5  result of vote: 0 Label :  0  data= [5.1 3.8 1.5 0.3]
Index=  6  result of vote: 2 Label :  2  data= [5.8 2.8 5.1 2.4]
Index=  7  result of vote: 1 Label :  1  data= [6.1 2.8 4.  1.3]
Index=  8  result of vote: 0 Label :  0  data= [4.8 3.  1.4 0.1]
Index=  9  result of vote: 1 Label :  1  data= [7.  3.2 4.7 1.4]
Index=  10  result of vote: 0 Label :  0  data= [5.  3.4 1.5 0.2]
Index=  11  result of vote: 2 Label :  1  data= [5.9 3.2 4.8 1.8]


In [37]:
def vote_prob(neighs):
    class_counter = Counter()
    for neighbour in neighs:
        class_counter[neighbour[2]] +=1
    labels,votes = zip(*class_counter.most_common())
    winner = class_counter.most_common(1)[0][0]
    votes4winner = class_counter.most_common(1)[0][1]
    return winner, votes4winner/sum(votes)

In [41]:
for i in range(No_of_training_data):
    neighbors = get_neighs(learnset_data, 
                              learnset_labels, 
                              testData[i], 
                              5, 
                              distance=dist)
    print("index: ", i, 
          ", vote_prob: ", vote_prob(neighbors), 
          ", label: ", testLabel[i], 
          ", data: ", testData[i])

index:  0 , vote_prob:  (0, 1.0) , label:  0 , data:  [5.  3.3 1.4 0.2]
index:  1 , vote_prob:  (1, 1.0) , label:  1 , data:  [6.1 2.8 4.7 1.2]
index:  2 , vote_prob:  (1, 1.0) , label:  1 , data:  [6.1 3.  4.6 1.4]
index:  3 , vote_prob:  (1, 1.0) , label:  1 , data:  [5.7 2.8 4.5 1.3]
index:  4 , vote_prob:  (1, 1.0) , label:  1 , data:  [6.4 2.9 4.3 1.3]
index:  5 , vote_prob:  (0, 1.0) , label:  0 , data:  [5.1 3.8 1.5 0.3]
index:  6 , vote_prob:  (2, 1.0) , label:  2 , data:  [5.8 2.8 5.1 2.4]
index:  7 , vote_prob:  (1, 1.0) , label:  1 , data:  [6.1 2.8 4.  1.3]
index:  8 , vote_prob:  (0, 1.0) , label:  0 , data:  [4.8 3.  1.4 0.1]
index:  9 , vote_prob:  (1, 1.0) , label:  1 , data:  [7.  3.2 4.7 1.4]
index:  10 , vote_prob:  (0, 1.0) , label:  0 , data:  [5.  3.4 1.5 0.2]
index:  11 , vote_prob:  (2, 0.6) , label:  1 , data:  [5.9 3.2 4.8 1.8]
