# Developing KNN Algorithms

Paper KNN: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.2.815&rep=rep1&type=pdf

In [2]:
# Imports
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [3]:
names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'classe']
iris_data = pd.read_csv('dados/iris.data', names = names)
iris_data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,classe
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [4]:
# Separando variáveis preditoras e variável target
X = iris_data.iloc[:,:4].values
y = iris_data.iloc[:,4]

# Labels da variável target
target_class = pd.get_dummies(iris_data['classe']).columns
target_names = np.array(target_class)


In [5]:
# Convertendo as classes para valores numéricos correspondentes
y = y.replace(target_names[0], 0)
y = y.replace(target_names[1], 1)
y = y.replace(target_names[2], 2)
y = np.array(y)

In [6]:
# Separando os dados em conjuntos de treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state = 33)
print(X_train.shape, y_train.shape)

(105, 4) (105,)


In [7]:
# Função para calcular a distância euclidiana
def distancia_euclidiana(att1, att2):
    dist = 0
    for i in range(len(att1)):
        dist += pow((att1[i] - att2[i]),2)
    return np.sqrt(dist)

In [9]:
# Algoritmo KNN
def KNN(array, k):
    d = pd.DataFrame()
    res=[]
    for j in range(len(array)):
        h=[]
        for i in range(len(X_train)):
            h.append([distancia_euclidiana(X_train[i],array[j]),y_train[i]])
        h = sorted(h)
        d[j+1] = h
        temp=[]
        for i in range(k):
            temp.append(h[i][1])
        res.append(max(set(temp), key = temp.count))
    return res


In [10]:
# Avaliando o modelo
y_test_pred = KNN(X_test, 5)
y_test_prediction = np.asarray(y_test_pred)


In [11]:
# Calculando a acurácia
acc = y_test - y_test_prediction
err = np.count_nonzero(acc)
acuracia = ((len(y_test) - err) / len(y_test)) * 100
acuracia

95.55555555555556

In [12]:
# Fazendo previsões para 5 novas plantas com K igual a 3
previsoes = KNN([[6.7,3.1,4.4,1.4],[4.6,3.2,1.4,0.2],[4.6,3.2,1.4,0.2],[6.4,3.1,5.5,1.8],[6.3,3.2,5.6,1.9]], 3)
previsoes

[1, 0, 0, 2, 2]

In [13]:
# Fazendo previsões para 5 novas plantas com K igual a 5
previsoes = KNN([[6.7,3.1,4.4,1.4],[4.6,3.2,1.4,0.2],[4.6,3.2,1.4,0.2],[6.4,3.1,5.5,1.8],[6.3,3.2,5.6,1.9]], 5)
previsoes

[1, 0, 0, 2, 2]