In [None]:
'''
We'll code up a simple replacement 
using a scrappy version of k-Nearest Neighbors. 
'''

In [1]:
from scipy.spatial import distance # Euclidean formula
from sklearn import datasets # Iris
from sklearn.metrics import accuracy_score # Accuracy

In [2]:
iris = datasets.load_iris()

x = iris.data
y = iris.target

In [3]:
from sklearn.model_selection import train_test_split

# test_size = 0.5 means half of the dataset will be for train
# and the other half for test, randomly
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size = .5)

In [4]:
# aka Pythagorean Theorem
# Return hypotenuse lenth
def euclid(a, b):
    return distance.euclidean(a, b)

In [5]:
# k-Nearest Neighbors classifier from scratch
class KNN():
    # Save x_train and y_train as local
    def fit(self, x_train, y_train):
        self.x_train = x_train
        self.y_train = y_train
    
    # Find the closest x and return its y
    def closest(self, row_x):
        best_index = 0
        best_dist = euclid(row_x, self.x_train[best_index])
        for index in range (1, len(self.y_train)):
            dist = euclid(row_x, self.x_train[index])
            if best_dist > dist:
                best_dist = dist
                best_index = index
        return self.y_train[best_index]
    
    # Return the prediction
    def predict(self, x_test):
        # x_test is 1-D array
        # Therefore the predictions has the same length
        predictions = []
        
        # For every row x in x_test, find its y
        for x in x_test:
            label = self.closest(x)
            predictions.append(label)
        # Return the 1-D prediction
        return predictions

In [6]:
my_classifier = KNN()

# from sklearn.neighbors import KNeighborsClassifier
# my_classifier = KNeighborsClassifier()
# 0.96.. accuracy (may vary because of the dataset randomness)

my_classifier.fit(x_train, y_train)

predictions = my_classifier.predict(x_test)

# Compare predictions of the model to known ys
for i in range(len(y_test)):
    print ("Example %d: " %i, end='')
    if y_test[i] == predictions[i]:
        print ("correct (pred=%d - test=%d)" 
              %(predictions[i], y_test[i]))
    else:
        print ("\033[1;31;47mwrong (pred=%d - test=%d)\033[0m" 
              %(predictions[i], y_test[i]))


Example 0: correct (pred=0 - test=0)
Example 1: correct (pred=0 - test=0)
Example 2: correct (pred=0 - test=0)
Example 3: correct (pred=2 - test=2)
Example 4: correct (pred=2 - test=2)
Example 5: correct (pred=2 - test=2)
Example 6: correct (pred=1 - test=1)
Example 7: correct (pred=1 - test=1)
Example 8: correct (pred=0 - test=0)
Example 9: correct (pred=0 - test=0)
Example 10: correct (pred=0 - test=0)
Example 11: correct (pred=1 - test=1)
Example 12: correct (pred=0 - test=0)
Example 13: [1;31;47mwrong (pred=2 - test=1)[0m
Example 14: correct (pred=1 - test=1)
Example 15: correct (pred=2 - test=2)
Example 16: correct (pred=0 - test=0)
Example 17: correct (pred=1 - test=1)
Example 18: correct (pred=2 - test=2)
Example 19: [1;31;47mwrong (pred=1 - test=2)[0m
Example 20: correct (pred=1 - test=1)
Example 21: correct (pred=1 - test=1)
Example 22: correct (pred=1 - test=1)
Example 23: correct (pred=1 - test=1)
Example 24: correct (pred=0 - test=0)
Example 25: correct (pred=2 - test=2

In [7]:

# print the accuracy [0, 1] -> [0%, 100%]
print ("%f%% of success" %(accuracy_score(y_test, predictions)*100 ))

97.333333% of success
