In [1]:
#Inspired on Google Developers - Machine Learning Recipes with Josh Gordon 
#(https://youtu.be/AoeEHqVSNOw?list=PLOU2XLYxmsIIuiBfYad6rFYQU_jL2ryal)
#Written in Python3

from scipy.spatial import distance

def euc(a,b):
    return distance.euclidean(a,b)

class ScrappyKNN():
    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train
    
    def predict(self, X_test):
        predictions = []
        for row in X_test:
            label = self.closest(row)
            predictions.append(label)
        return predictions
    
    def closest(self, row):
        best_dist = euc(row, self.X_train[0])
        best_index = 0
        for i in range(1, len(self.X_train)):
            dist = euc(row, self.X_train[i])
            if dist < best_dist:
                best_dist = dist
                best_index = i
        return self.y_train[best_index]
            

In [2]:
#import a dataset
from sklearn import datasets
iris = datasets.load_iris()

In [3]:
X = iris.data #features
y = iris.target #labels

In [4]:
#splitting data in train and test
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .5)

In [5]:
#This is the place to for the classifier package. By replacing these lines
#it is possible to use another classifier.

#OPTION 1
#from sklearn import tree
#my_classifier = tree.DecisionTreeClassifier()

#OPTION 2
#from sklearn.neighbors import KNeighborsClassifier
#my_classifier = KNeighborsClassifier()

#OPTION 3 Writing you own classifier
my_classifier = ScrappyKNN()


In [6]:
my_classifier.fit(X_train, y_train)

In [7]:
predictions = my_classifier.predict(X_test)
print (predictions)

[1, 2, 1, 1, 2, 0, 0, 2, 2, 0, 1, 2, 1, 1, 2, 1, 0, 1, 0, 0, 2, 1, 0, 1, 1, 0, 2, 1, 0, 2, 1, 1, 2, 1, 2, 0, 0, 0, 2, 1, 1, 0, 0, 0, 0, 2, 0, 2, 0, 2, 1, 2, 1, 0, 2, 2, 2, 1, 0, 1, 1, 2, 2, 1, 1, 2, 0, 1, 1, 2, 0, 0, 2, 2, 1]


In [8]:
#how accurate is our prediction
from sklearn.metrics import accuracy_score
print (accuracy_score(y_test, predictions))

0.933333333333
