# 1. Importações

In [None]:
import numpy as np
import pandas as pd
from math import log, pi, sqrt

import statistics
import matplotlib.pyplot as plt
from scipy.stats import multivariate_normal
from sklearn.model_selection import train_test_split
import warnings

warnings.filterwarnings("ignore")

# 2. Definição da classe

In [None]:
class KNN():    
    def __init__(self, k=3):
        self.k = k
        
    def distance_euclidian(self, a, b):
        return sqrt(np.sum([(i - j)**2 for i, j in zip(a,b)]))

    def get_classes(self, y):
        return pd.array(y).unique()

    def fit(self, x_train, y_train, function = None):
        # Realizar grid search
        self.x_train = x_train
        self.y_train = y_train
        if function is None:
            self.function = self.distance_euclidian
    
    def predict_1nn(self, x_test):
        classes = self.get_classes(self.y_train)
        distances_to_x_test = sorted([[self.function(self.x_train[i], x_test), self.y_train[i]] for i in range(len(self.x_train))]) 
        k_neighbors = [row[1] for row in distances_to_x_test][:self.k]
        return statistics.mode(k_neighbors)

    def predict(self, x_test):
         return [self.predict_1nn(xi_test) for xi_test in x_test]

    

# 3. Leitura dos dados

In [None]:
data = pd.read_csv("/content/drive/My Drive/[2020.1] APRENDIZADO DE MÁQUINA/TRABALHO/03. Códigos/03 - KNN/data.txt", header = None)
print("Quantidade de padrões: ", len(data))
data.head()

Quantidade de padrões:  118


Unnamed: 0,0,1,2
0,0.051267,0.69956,1
1,-0.092742,0.68494,1
2,-0.21371,0.69225,1
3,-0.375,0.50219,1
4,-0.51325,0.46564,1


In [None]:
x_train = data[[0, 1]].loc[:100].values
y_train = data[2].loc[:100].values

x_test = data[[0, 1]].loc[100:].values
y_test = data[2].loc[100:].values

In [None]:
x_train

array([[ 0.051267 ,  0.69956  ],
       [-0.092742 ,  0.68494  ],
       [-0.21371  ,  0.69225  ],
       [-0.375    ,  0.50219  ],
       [-0.51325  ,  0.46564  ],
       [-0.52477  ,  0.2098   ],
       [-0.39804  ,  0.034357 ],
       [-0.30588  , -0.19225  ],
       [ 0.016705 , -0.40424  ],
       [ 0.13191  , -0.51389  ],
       [ 0.38537  , -0.56506  ],
       [ 0.52938  , -0.5212   ],
       [ 0.63882  , -0.24342  ],
       [ 0.73675  , -0.18494  ],
       [ 0.54666  ,  0.48757  ],
       [ 0.322    ,  0.5826   ],
       [ 0.16647  ,  0.53874  ],
       [-0.046659 ,  0.81652  ],
       [-0.17339  ,  0.69956  ],
       [-0.47869  ,  0.63377  ],
       [-0.60541  ,  0.59722  ],
       [-0.62846  ,  0.33406  ],
       [-0.59389  ,  0.005117 ],
       [-0.42108  , -0.27266  ],
       [-0.11578  , -0.39693  ],
       [ 0.20104  , -0.60161  ],
       [ 0.46601  , -0.53582  ],
       [ 0.67339  , -0.53582  ],
       [-0.13882  ,  0.54605  ],
       [-0.29435  ,  0.77997  ],
       [-0

In [None]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((101, 2), (18, 2), (101,), (18,))

In [None]:
x_train

array([[ 0.051267 ,  0.69956  ],
       [-0.092742 ,  0.68494  ],
       [-0.21371  ,  0.69225  ],
       [-0.375    ,  0.50219  ],
       [-0.51325  ,  0.46564  ],
       [-0.52477  ,  0.2098   ],
       [-0.39804  ,  0.034357 ],
       [-0.30588  , -0.19225  ],
       [ 0.016705 , -0.40424  ],
       [ 0.13191  , -0.51389  ],
       [ 0.38537  , -0.56506  ],
       [ 0.52938  , -0.5212   ],
       [ 0.63882  , -0.24342  ],
       [ 0.73675  , -0.18494  ],
       [ 0.54666  ,  0.48757  ],
       [ 0.322    ,  0.5826   ],
       [ 0.16647  ,  0.53874  ],
       [-0.046659 ,  0.81652  ],
       [-0.17339  ,  0.69956  ],
       [-0.47869  ,  0.63377  ],
       [-0.60541  ,  0.59722  ],
       [-0.62846  ,  0.33406  ],
       [-0.59389  ,  0.005117 ],
       [-0.42108  , -0.27266  ],
       [-0.11578  , -0.39693  ],
       [ 0.20104  , -0.60161  ],
       [ 0.46601  , -0.53582  ],
       [ 0.67339  , -0.53582  ],
       [-0.13882  ,  0.54605  ],
       [-0.29435  ,  0.77997  ],
       [-0

# 4. Exemplo de uso

In [None]:
knn = KNN()
knn.fit(x_train, y_train)
knn.predict(x_test)

[0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1]

In [None]:
y_test

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])