In [1]:
%matplotlib inline
import numpy as np
from sklearn import datasets
iris = datasets.load_iris()
iris_data = iris.data
iris_labels = iris.target
print(iris_data[0], iris_data[79], iris_data[100])
print(iris_labels[0], iris_labels[79], iris_labels[100])
iris_data[1:10]


[5.1 3.5 1.4 0.2] [5.7 2.6 3.5 1. ] [6.3 3.3 6.  2.5]
0 1 2


array([[4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1]])

In [2]:
np.random.seed(42)
indices = np.random.permutation(len(iris_data))
n_training_samples = 12
learnset_data = iris_data[indices[:-n_training_samples]]
learnset_labels = iris_labels[indices[:-n_training_samples]]
testset_data = iris_data[indices[-n_training_samples:]]
testset_labels = iris_labels[indices[-n_training_samples:]]
print(learnset_data[:4], learnset_labels[:4])
print(testset_data[:4], testset_labels[:4])

[[6.1 2.8 4.7 1.2]
 [5.7 3.8 1.7 0.3]
 [7.7 2.6 6.9 2.3]
 [6.  2.9 4.5 1.5]] [1 0 2 1]
[[5.7 2.8 4.1 1.3]
 [6.5 3.  5.5 1.8]
 [6.3 2.3 4.4 1.3]
 [6.4 2.9 4.3 1.3]] [1 2 1 1]


In [3]:
def distance(instance1, instance2):
    # just in case, if the instances are lists or tuples:
    instance1 = np.array(instance1)
    instance2 = np.array(instance2)

    return np.linalg.norm(instance1 - instance2)
print(distance([3, 5], [1, 1]))
print(distance(learnset_data[3], learnset_data[44]))

4.47213595499958
3.4190641994557516


In [4]:
def get_neighbors(training_set,
                  labels,
                  test_instance,
                  k,
                  distance=distance):
    """
    get_neighors calculates a list of the k nearest neighbors
    of an instance 'test_instance'.
    The list neighbors contains 3-tuples with
    (index, dist, label)
    where
    index    is the index from the training_set,
    dist     is the distance between the test_instance and the
             instance training_set[index]
    distance is a reference to a function used to calculate the
             distances
    """
    distances = []
    for index in range(len(training_set)):
        dist = distance(test_instance, training_set[index])
        distances.append((training_set[index], dist, labels[index]))
    distances.sort(key=lambda x: x[1])
    neighbors = distances[:k]
    return neighbors

In [5]:
for i in range(5):
    neighbors = get_neighbors(learnset_data,
                              learnset_labels,
                              testset_data[i],
                              3,
                              distance=distance)
    print(i,
          testset_data[i],
          testset_labels[i],
          neighbors)

0 [5.7 2.8 4.1 1.3] 1 [(array([5.7, 2.9, 4.2, 1.3]), 0.14142135623730995, 1), (array([5.6, 2.7, 4.2, 1.3]), 0.17320508075688815, 1), (array([5.6, 3. , 4.1, 1.3]), 0.22360679774997935, 1)]
1 [6.5 3.  5.5 1.8] 2 [(array([6.4, 3.1, 5.5, 1.8]), 0.1414213562373093, 2), (array([6.3, 2.9, 5.6, 1.8]), 0.24494897427831783, 2), (array([6.5, 3. , 5.2, 2. ]), 0.3605551275463988, 2)]
2 [6.3 2.3 4.4 1.3] 1 [(array([6.2, 2.2, 4.5, 1.5]), 0.2645751311064586, 1), (array([6.3, 2.5, 4.9, 1.5]), 0.574456264653803, 1), (array([6. , 2.2, 4. , 1. ]), 0.5916079783099617, 1)]
3 [6.4 2.9 4.3 1.3] 1 [(array([6.2, 2.9, 4.3, 1.3]), 0.20000000000000018, 1), (array([6.6, 3. , 4.4, 1.4]), 0.2645751311064587, 1), (array([6.6, 2.9, 4.6, 1.3]), 0.3605551275463984, 1)]
4 [5.6 2.8 4.9 2. ] 2 [(array([5.8, 2.7, 5.1, 1.9]), 0.3162277660168375, 2), (array([5.8, 2.7, 5.1, 1.9]), 0.3162277660168375, 2), (array([5.7, 2.5, 5. , 2. ]), 0.33166247903553986, 2)]


In [6]:
testset_data_a = ([4.8, 2.5, 5.3, 2.4])
neighborsu = get_neighbors(learnset_data,
                              learnset_labels,
                              testset_data_a,
                              10,
                              distance=distance)
print(testset_data_a,neighborsu)

[4.8, 2.5, 5.3, 2.4] [(array([5.7, 2.5, 5. , 2. ]), 1.0295630140987002, 2), (array([5.8, 2.8, 5.1, 2.4]), 1.0630145812734648, 2), (array([5.8, 2.7, 5.1, 1.9]), 1.1532562594670797, 2), (array([5.8, 2.7, 5.1, 1.9]), 1.1532562594670797, 2), (array([5.4, 3. , 4.5, 1.5]), 1.4352700094407325, 1), (array([6. , 2.7, 5.1, 1.6]), 1.469693845669907, 1), (array([6. , 3. , 4.8, 1.8]), 1.5165750888103102, 2), (array([5.9, 3.2, 4.8, 1.8]), 1.5198684153570667, 1), (array([5.6, 3. , 4.5, 1.5]), 1.5297058540778352, 1), (array([6. , 2.2, 5. , 1.5]), 1.5588457268119895, 2)]


In [7]:
from collections import Counter
def vote(neighbors):
    class_counter = Counter()
    for neighbor in neighbors:
        class_counter[neighbor[2]] += 1
    return class_counter.most_common(1)[0][0]

In [8]:

print("index: ", 10,
          ", result of vote: ", vote(neighborsu),
          ", label: ",
          ", data: ", testset_data_a)

index:  10 , result of vote:  2 , label:  , data:  [4.8, 2.5, 5.3, 2.4]


In [9]:
for i in range(n_training_samples):
    neighbors = get_neighbors(learnset_data,
                              learnset_labels,
                              testset_data_a,
                              3,
                              distance=distance)
    print("index: ", i,
          ", result of vote: ", vote(neighborsu),
          ", data: ", testset_data_a)

index:  0 , result of vote:  2 , data:  [4.8, 2.5, 5.3, 2.4]
index:  1 , result of vote:  2 , data:  [4.8, 2.5, 5.3, 2.4]
index:  2 , result of vote:  2 , data:  [4.8, 2.5, 5.3, 2.4]
index:  3 , result of vote:  2 , data:  [4.8, 2.5, 5.3, 2.4]
index:  4 , result of vote:  2 , data:  [4.8, 2.5, 5.3, 2.4]
index:  5 , result of vote:  2 , data:  [4.8, 2.5, 5.3, 2.4]
index:  6 , result of vote:  2 , data:  [4.8, 2.5, 5.3, 2.4]
index:  7 , result of vote:  2 , data:  [4.8, 2.5, 5.3, 2.4]
index:  8 , result of vote:  2 , data:  [4.8, 2.5, 5.3, 2.4]
index:  9 , result of vote:  2 , data:  [4.8, 2.5, 5.3, 2.4]
index:  10 , result of vote:  2 , data:  [4.8, 2.5, 5.3, 2.4]
index:  11 , result of vote:  2 , data:  [4.8, 2.5, 5.3, 2.4]


result of vote:  2  -> Klasse 2 bedeutet es ist eine Iris Virginica !


