In [2]:
%matplotlib inline
import numpy as np
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
# http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_iris.html
iris = datasets.load_iris()
iris_data = iris.data
iris_labels = iris.target
list(iris.keys())

['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename']

In [3]:
np.random.seed(37)
indices = np.random.permutation(len(iris_data))
n_training_samples = 12
learnset_data = iris_data[indices[:-n_training_samples]]
learnset_labels = iris_labels[indices[:-n_training_samples]]
testset_data = iris_data[indices[-n_training_samples:]]
testset_labels = iris_labels[indices[-n_training_samples:]]
print(learnset_data[:4], learnset_labels[:4])
print(testset_data[:4], testset_labels[:4])

[[6.4 2.9 4.3 1.3]
 [5.2 3.5 1.5 0.2]
 [6.5 3.  5.8 2.2]
 [5.8 2.7 5.1 1.9]] [1 0 2 2]
[[7.9 3.8 6.4 2. ]
 [4.4 2.9 1.4 0.2]
 [6.  2.2 4.  1. ]
 [5.6 3.  4.1 1.3]] [2 0 1 1]


In [4]:
# eucludian distance
def distance(instance1, instance2):
    instance1 = np.array(instance1) 
    instance2 = np.array(instance2)
    
    return np.linalg.norm(instance1 - instance2)

In [5]:
def get_neighbors(training_set, 
                  labels, 
                  test_instance, 
                  k, 
                  distance=distance):
    """
    get_neighors calculates a list of the k nearest neighbors
    of an instance 'test_instance'.
    The list neighbors contains 3-tuples with  
    (index, dist, label)
    where 
    index    is the index from the training_set, 
    dist     is the distance between the test_instance and the 
             instance training_set[index]
    distance is a reference to a function used to calculate the 
             distances
    """
    distances = []
    for index in range(len(training_set)):
        dist = distance(test_instance, training_set[index])
        distances.append((training_set[index], dist, labels[index]))
    distances.sort(key=lambda x: x[1])
    neighbors = distances[:k]
    return neighbors

In [6]:
from collections import Counter
def vote(neighbors):
    class_counter = Counter()
    for neighbor in neighbors:
        class_counter[neighbor[2]] += 1
    return class_counter.most_common(1)[0][0]

In [7]:
unknown_flower = [4.8,2.5,5.3,2.4]

neighbors = get_neighbors(learnset_data, 
                          learnset_labels, 
                          unknown_flower, 
                          3, 
                          distance=distance)
classification = vote(neighbors)
print(f"result of vote: {classification}")
    

result of vote: 2


In [8]:
# Result is a Iris-Virginica!