# Código do KNN

## Imports

In [113]:
from functools import reduce
import numpy as np
import numpy.linalg as LA
import math

## Classe KNN

In [116]:
class KNN:
    @staticmethod
    def BLOCK_DISTANCE(a, b):
        '''Or distance of Manhattan. Considers only movements in axis directions.'''
        return np.sum(np.absolute(a-b))

    @staticmethod
    def EUCLIDEAN_DISTANCE(a, b):
        '''The shortest route between two points.'''
        return LA.norm(a-b)

    @staticmethod
    def NO_WEIGHT(acc, i):
        '''Just counts the frequency of every label.'''
        (label, _) = i
        if (label in acc):
            acc[label] += 1
        else:
            acc[label] = 1
        return acc

    @staticmethod
    def WEIGHTED(acc, i):
        '''Weights the values by their distance.'''
        (label, distance) = i
        if (label in acc):
            acc[label] += 1 / (distance ** 2)
        else:
            acc[label] = 1 / (distance ** 2)
        return acc

    def __init__(self, distance = None, k = 3, weight = None, adaptive = False):
        self.samples = []
        self.distance = KNN.EUCLIDEAN_DISTANCE if distance == None else distance
        self.k = k
        self.weight = KNN.NO_WEIGHT if weight == None else weight
        self.adaptive = adaptive
    
    def calculateRadius(self, sample):
        '''Calculates the radius for the adaptive distance.
        If the distance for the classification is changed this function should be called again.'''
        distances = [(s[-1], self.distance(sample[0:-1], s[0:-1])) for s in self.samples]
        distances.sort(key = lambda i: i[1])
        for i in distances:
            if i[0] != sample[-1]:
                return i[1]
        return math.inf

    def train(self, training):
        '''Trains the kNN.'''
        self.samples = training
        self.sampleRadius = [self.calculateRadius(i) for i in self.samples]
        print(self.sampleRadius)

    def classify(self, sample):
        '''Classifies a single sample.'''
        closest = self.find_closest(sample)
        votes = reduce(self.weight, closest, dict())
        chosen = max(votes.keys(), key = lambda i: votes[i])
        return chosen

    def test(self, testSamples):
        '''Tests the kNN efficiency in classifying its samples.'''
        realResults = [i[-1] for i in testSamples]
        print(realResults)
        testResults = [self.classify(i) for i in testSamples]
        print(testResults)
        matched = [a == b for (a, b) in zip(realResults, testResults)]
        return {
            "hitRate": matched.count(True) / len(realResults),
            "missRate": matched.count(False) / len(realResults)
        }

    def find_closest(self, sample):
        '''Finds the closest k samples.'''
        if self.adaptive:
            distances = [(s[-1], self.distance(sample[0:-1], s[0:-1]) / self.sampleRadius[i])\
                        for s in self.samples]
        else:
            distances = [(s[-1], self.distance(sample[0:-1], s[0:-1])) for s in self.samples]
        distances.sort(key = lambda i: i[1])
        return distances[0:self.k]

a = KNN()
a.train(np.array([[0, 1, 0], [1, 9, 1], [4, 8, 0], [2, 4, 1], [6, 3, 0]]))
a.test(np.array([[0, 1, 0], [4, 3, 1], [9, 4, 0]]))

[0, 1, 0]
[0, 0, 0]


{'hitRate': 0.6666666666666666, 'missRate': 0.3333333333333333}

# Avaliações

In [108]:
from scipy.io import arff
import numpy as np
import pandas as pd
from time import process_time

## Base de Dados 1 - 

In [94]:
data = arff.loadarff('Datasets/cm1.arff')
dataFrame = pd.DataFrame(data[0])
dataFrame.defects = [1 if i == b'true' else 0 for i in dataFrame.defects]
rawData = dataFrame.values
rawData

array([[   1.1,    1.4,    1.4, ...,    1.2,    1.4,    0. ],
       [   1. ,    1. ,    1. , ...,    1. ,    1. ,    1. ],
       [  24. ,    5. ,    1. , ...,   19. ,    9. ,    0. ],
       ..., 
       [  82. ,   11. ,    3. , ...,  190. ,   21. ,    1. ],
       [  10. ,    2. ,    1. , ...,   13. ,    3. ,    1. ],
       [  28. ,    6. ,    5. , ...,   37. ,   11. ,    1. ]])

In [95]:
# Separating samples by class
classes = {i[-1]: rawData[np.where(rawData[:,-1] == i[-1])] for i in rawData}
classes

{0.0: array([[  1.1,   1.4,   1.4, ...,   1.2,   1.4,   0. ],
        [ 24. ,   5. ,   1. , ...,  19. ,   9. ,   0. ],
        [ 20. ,   4. ,   4. , ...,  16. ,   7. ,   0. ],
        ..., 
        [  4. ,   1. ,   1. , ...,   6. ,   1. ,   0. ],
        [  3. ,   1. ,   1. , ...,   2. ,   1. ,   0. ],
        [ 12. ,   3. ,   1. , ...,  25. ,   5. ,   0. ]]),
 1.0: array([[   1.,    1.,    1., ...,    1.,    1.,    1.],
        [  31.,    4.,    1., ...,   51.,    7.,    1.],
        [  29.,    5.,    1., ...,   37.,    9.,    1.],
        ..., 
        [  82.,   11.,    3., ...,  190.,   21.,    1.],
        [  10.,    2.,    1., ...,   13.,    3.,    1.],
        [  28.,    6.,    5., ...,   37.,   11.,    1.]])}

In [98]:
# k-fold
k = 10
def k_fold(sepSamples, k):
    folds = [[] for _ in range(k)]
    for i in sepSamples:
        splitClass = np.array_split(sepSamples[i], k)
        [a.extend(b) for (a, b) in zip(folds, splitClass)]
    return folds
    
folds = k_fold(classes, k)

In [118]:
def cross_validation(knn, folds):
    results = []
    trainTimes = []
    testTimes = []
    for i in range(len(folds)):
        print("Testing on fold: " + str(i))
        train = [s for j, fold in enumerate(folds) if i != j for s in fold]
        test = folds[i]

        trainTimes.append(process_time())
        knn.train(train)
        trainTimes[-1] = process_time() - trainTimes[-1]

        testTimes.append(process_time())
        results.append(knn.test(test))
        testTimes[-1] = process_time() - testTimes[-1]
    return {'results': results, 'trainTimes': trainTimes, 'testTimes': testTimes}
        
knn = KNN()
cross_validation(knn, folds)

Testing on fold: 0
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0]
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
Testing on fold: 1
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0]
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0

{'results': [{'hitRate': 0.78, 'missRate': 0.22},
  {'hitRate': 0.9, 'missRate': 0.1},
  {'hitRate': 0.88, 'missRate': 0.12},
  {'hitRate': 0.78, 'missRate': 0.22},
  {'hitRate': 0.84, 'missRate': 0.16},
  {'hitRate': 0.88, 'missRate': 0.12},
  {'hitRate': 0.86, 'missRate': 0.14},
  {'hitRate': 0.84, 'missRate': 0.16},
  {'hitRate': 0.9, 'missRate': 0.1},
  {'hitRate': 0.8958333333333334, 'missRate': 0.10416666666666667}],
 'testTimes': [0.140625,
  0.109375,
  0.140625,
  0.140625,
  0.15625,
  0.140625,
  0.140625,
  0.125,
  0.140625,
  0.125],
 'trainTimes': [1.28125,
  1.25,
  1.5625,
  1.296875,
  1.234375,
  1.28125,
  1.25,
  1.28125,
  1.25,
  1.234375]}

### KNN sem peso

In [66]:
knn = KNN()
knn.train(rawData[0:400])
knn.test(rawData[400:-1])

[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]


{'hitRate': 0.5154639175257731, 'missRate': 0.4845360824742268}

### KNN com peso

### KNN adaptativo sem peso

## Base de Dados 2 - 

### KNN sem peso

### KNN com peso

### KNN adaptativo sem peso

# Conclusão