In [287]:
import random
import math
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import operator
from collections import Counter
from sklearn.metrics import classification_report, accuracy_score

In [288]:
iris = load_iris()

In [289]:
# split data, %30->test %70->train
irisDataTrain, irisDataTest, irisTargetTrain, irisTargetTest = train_test_split(iris.data, iris.target, test_size=0.3)
trainData = [ list(x) for x in zip(irisDataTrain, irisTargetTrain)]
testData = [ list(x) for x in zip(irisDataTest,irisTargetTest)]

In [321]:
def calcEuclideanDistance(point1, point2):
    """
        \brief Calculate Euclidean Distance between two point
        \return Euclidean Distance
    """
    dist = 0
    # zip items and walk them on the same time, same order
    for item1, item2 in zip(point1,point2):
        dist += pow((item1-item2),2)
    return math.sqrt(dist)

In [325]:
def calcManhattanDistance(point1, point2):
    """
        \brief Calculate Manhattan Distance between two point
        \return Manhattan Distance
    """
    dist = 0
    for p1, p2 in zip(point1,point2):
        dist += abs(p1-p2)
    return dist

In [327]:
def getNeighbours(train_data, test_data, k, distFunc):
    dists = [ (data, distFunc(test_data, data[0])) for data in train_data]
    dists.sort(key=operator.itemgetter(1))
    return dists[:k]

In [342]:
def getAccuracy(testSet, predictions):
    correct = 0
    for i, predict in zip(testSet,predictions):
        if i[1] == predict:
            correct += 1
    return (correct/float(len(testSet))) * 100.0

In [332]:
def getMaxOccurancedClass(neighbours):
    classes = []
    for item in neighbours:
        classes.append(item[0][1])
    return Counter(classes).most_common()[0][0]

In [345]:
def developerTests():
    #Print example 1 train data
    print("TrainData[0]:",trainData[0])
    #Print example 1 test data
    print("TestData[0]:",testData[0])
    
    # Test Euclidean Distance
    distance = calcEuclideanDistance(trainData[0][0],testData[0][0])
    print("EuclideanDistance:",distance)
    # Test Manhattan Distance
    distance = calcManhattanDistance(trainData[0][0],testData[0][0])
    print("ManhattanDistance:",distance)
    
    neighboursEuc = getNeighbours(trainData, testData[0][0],5,calcEuclideanDistance)
    print("First Neighbour(Euclidean):",neighboursEuc[0])
    neighboursMan = getNeighbours(trainData, testData[0][0],5,calcManhattanDistance)
    print("First Neighbour(Manhattan):",neighboursMan[0])
        
    ## TEST KNN-5 IrisData - Euclidead Dist
    k=5
    predictions = []

    for _tdata in testData:
        # get neighbours for all test data
        neighbours = getNeighbours(trainData, _tdata[0],k, calcEuclideanDistance) 
        predictions.append(getMaxOccurancedClass(neighbours))
        
    print("\n->Report for EuclideanDistance KNN:5 on iris data")
    print("->Accuracy:",getAccuracy(testData, predictions))
    report = classification_report(irisTargetTest,predictions,target_names = iris.target_names )
    print(report)

    ## TEST KNN-5 IrisData - Manhattan Dist
    k=5
    predictions = []

    for _tdata in testData:
        # get neighbours for all test data
        neighbours = getNeighbours(trainData, _tdata[0],k, calcManhattanDistance) 
        predictions.append(getMaxOccurancedClass(neighbours))
    
    print("\n->Report for ManhattanDistance KNN:5 on iris data")
    print("->Accuracy:",getAccuracy(testData, predictions))
    report = classification_report(irisTargetTest,predictions,target_names = iris.target_names )
    print(report)
    
developerTests()

TrainData[0]: [array([6.4, 3.2, 4.5, 1.5]), 1]
TestData[0]: [array([7.9, 3.8, 6.4, 2. ]), 2]
EuclideanDistance: 2.543619468395381
ManhattanDistance: 4.5
First Neighbour(Euclidean): ([array([7.7, 3.8, 6.7, 2.2]), 2], 0.4123105625617661)
First Neighbour(Manhattan): ([array([7.7, 3.8, 6.7, 2.2]), 2], 0.7000000000000002)

->Report for EuclideanDistance KNN:5 on iris data
->Accuracy: 93.33333333333333
             precision    recall  f1-score   support

     setosa       1.00      1.00      1.00        15
 versicolor       0.94      0.89      0.91        18
  virginica       0.85      0.92      0.88        12

avg / total       0.94      0.93      0.93        45


->Report for ManhattanDistance KNN:5 on iris data
->Accuracy: 95.55555555555556
             precision    recall  f1-score   support

     setosa       1.00      1.00      1.00        15
 versicolor       0.94      0.94      0.94        18
  virginica       0.92      0.92      0.92        12

avg / total       0.96      0.96     