In [1]:
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
def distance_euclidienne(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

In [3]:
def voisins_proches(X_train, y_train, x_test, k):
    distances = []

    for i in range(len(X_train)):
        dist = distance_euclidienne(X_train[i], x_test)
        distances.append((dist, y_train[i]))

    # trier selon la distance
    distances.sort(key=lambda x: x[0])

    # retourner les k labels les plus proches
    voisins = [label for (_, label) in distances[:k]]
    return voisins

In [4]:
def prediction_knn(X_train, y_train, X_test, k):
    predictions = []

    for x in X_test:
        voisins = voisins_proches(X_train, y_train, x, k)
        # vote majoritaire
        prediction = max(set(voisins), key=voisins.count)
        predictions.append(prediction)

    return np.array(predictions)


In [5]:
def accuracy(y_true, y_pred):
    return np.mean(y_true == y_pred)

In [6]:
data = load_diabetes()
X = data.data
y = (data.target > 140).astype(int)  # classification binaire

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [9]:
k = 5
y_pred = prediction_knn(X_train, y_train, X_test, k)

In [10]:
print("Accuracy du KNN (from scratch) :", accuracy(y_test, y_pred))

Accuracy du KNN (from scratch) : 0.6853932584269663
