# 💡 Criação do Algoritmo KNN no zero

## O objetivo é entender como funciona o algoritmo por trás, realizando o passo a passo.

# Classificação

In [1]:
import numpy as np
from collections import Counter
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

class KNNClassifier:
    def __init__(self, k=3, normalize=True):
        self.k = k
        self.normalize = normalize

    def fit(self, X_train, y_train):
        self.X_train = np.array(X_train)
        self.y_train = np.array(y_train)
        
        if self.normalize:
            self.mean = self.X_train.mean(axis=0)
            self.std = self.X_train.std(axis=0) + 1e-8  # evita divisão por zero
            self.X_train = (self.X_train - self.mean) / self.std

    def _euclidean_distance(self, x1, x2):
        return np.sqrt(np.sum((x1 - x2) ** 2))

    def predict(self, X_test):
        X_test = np.array(X_test)
        if self.normalize:
            X_test = (X_test - self.mean) / self.std

        predictions = []
        all_k_indices = []
        for x in X_test:
            distances = [self._euclidean_distance(x, x_train) for x_train in self.X_train]
            k_indices = np.argsort(distances)[:self.k]
            k_nearest_labels = [self.y_train[i] for i in k_indices]
            majority_vote = Counter(k_nearest_labels).most_common(1)[0][0]
            predictions.append(majority_vote)
            all_k_indices.append(k_indices)
        return predictions, all_k_indices


X_train = [[1, 100], [2, 200], [3, 300], [4, 1000]]
y_train = [0, 0, 1, 1]

X_test = [[2.5, 250]]

knn = KNNClassifier(k=3, normalize=True)
knn.fit(X_train, y_train)
pred = knn.predict(X_test)

print("Classe prevista:", pred[0])

Classe prevista: [0]


# Regressão

In [2]:
class KNNRegressor:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X_train, y_train):
        self.X_train = np.array(X_train)
        self.y_train = np.array(y_train)

    def _euclidean_distance(self, x1, x2):
        return np.linalg.norm(x1 - x2)

    def predict(self, X_test):
        predictions = []
        for x in X_test:
            distances = [self._euclidean_distance(x, x_train) for x_train in self.X_train]
            k_indices = np.argsort(distances)[:self.k]
            values = [self.y_train[i] for i in k_indices]
            predictions.append(np.mean(values))  # média para regressão
        return predictions

np.random.seed(42)
X = np.random.rand(100, 1) * 10     # valores entre 0 e 10
y = 2 * X.squeeze() + np.random.randn(100) * 2  # y = 2x + ruído

# Separando treino e teste
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

knn_reg = KNNRegressor(k=5)
knn_reg.fit(X_train, y_train)

# Fazer previsões
y_pred = knn_reg.predict(X_test)

# Avaliação do modelo
print("MSE:", mean_squared_error(y_test, y_pred))
print("R²:", r2_score(y_test, y_pred))


MSE: 3.540243924525496
R²: 0.9035054752689758
