# Plus Proche Voisin


## Question 01 - 02:

In [1]:
import numpy as np
from sklearn import datasets

In [2]:
from sklearn.metrics.pairwise import euclidean_distances

def PPV(X, Y):
    num_samples = len(X)
    predictions = np.zeros(num_samples, dtype=int)

    for i in range(num_samples):
        # Sélectionner la donnée de test
        x_test = X[i]

        # Calculer les distances euclidiennes entre la donnée de test et toutes les autres données
        distances = euclidean_distances(X, [x_test])
        
        # Exclure la distance de la donnée de test à elle-même
        distances[i] = np.inf
        
        # Trouver l'indice du plus proche voisin
        nearest_neighbor_index = np.argmin(distances)

        # Assigner le label du plus proche voisin comme prédiction
        predictions[i] = Y[nearest_neighbor_index]
      
    accuracy = np.mean(predictions == Y)
    return predictions, accuracy

## Question 03 :

In [3]:
iris = datasets.load_iris()
X = iris.data
Y = iris.target

predictions, accuracy = PPV(X, Y)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 96.00%


## Question 04 :

In [4]:
from sklearn import neighbors
from sklearn.model_selection import LeaveOneOut

def PPV_KNeighnors(X, Y, k):
    loo = LeaveOneOut()
    predictions = []

    for train_index, test_index in loo.split(X):
        X_train, X_test = X[train_index], X[test_index]
        Y_train, Y_test = Y[train_index], Y[test_index]

        # Créer un classifieur K Plus Proches Voisins avec k=1
        knn = neighbors.KNeighborsClassifier(n_neighbors=k)
        knn.fit(X_train, Y_train)

        # Prédire l'étiquette pour la donnée de test
        prediction = knn.predict(X_test)
        predictions.append(prediction[0])
    
    accuracy = np.mean(np.array(predictions) == Y)
    return np.array(predictions), accuracy

## k = 1

In [5]:
predictions, accuracy = PPV_KNeighnors(X, Y, k=1)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 96.00%


## k = 2

In [6]:
predictions, accuracy = PPV_KNeighnors(X, Y, k=2)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 94.67%


## k = 5

In [7]:
predictions, accuracy = PPV_KNeighnors(X, Y, k=5)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 96.67%


## k = 10

In [8]:
predictions, accuracy = PPV_KNeighnors(X, Y, k=10)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 97.33%


## k = 20

In [9]:
predictions, accuracy = PPV_KNeighnors(X, Y, k=20)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 98.00%


## Question 05

In [10]:
def PPV_k(X, Y, K):
    num_samples = len(X)
    predictions = np.zeros(num_samples, dtype=int)

    for i in range(num_samples):
        # Sélectionner la donnée de test
        x_test = X[i]

        # Calculer les distances euclidiennes entre la donnée de test et toutes les autres données
        distances = euclidean_distances(X, [x_test])
        
        # Exclure la distance de la donnée de test à elle-même
        distances[i] = np.inf
        
        # Trouver les indices des K plus proches voisins
        nearest_neighbor_indices = np.argsort(distances.flatten())[:K]

        # Assigner le label de la classe majoritaire parmi les K voisins comme prédiction
        k_nearest_labels = Y[nearest_neighbor_indices]
        predicted_label = np.argmax(np.bincount(k_nearest_labels))

        predictions[i] = predicted_label
    
    accuracy = np.mean(predictions == Y)
    return predictions, accuracy

In [11]:
predictions, accuracy = PPV_k(X, Y, 5)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 96.67%


# Classifieur Bayesien Naïf

## Question 01 - 02 : 

In [12]:
def CBN(X, Y):
    num_samples = X.shape[0]
    num_classes = len(np.unique(Y))

    # Calculer les barycentres pour chaque classe
    barycenters = np.array([np.mean(X[Y == k], axis=0) for k in range(num_classes)])

    # Calculer les probabilités à priori P(ωk)
    prior_probabilities = np.array([np.mean(Y == k) for k in range(num_classes)])

    predictions = np.zeros(num_samples, dtype=int)

    for i in range(num_samples):
        x_test = X[i]

        # Calculer les distances entre la donnée de test et chaque barycentre
        distances = np.linalg.norm(barycenters - x_test, axis=1)

        # Calculer les probabilités conditionnelles P(xi/ωk) en utilisant le produit des valeurs
        # je ne sais pas comment utiliser nom_d_un_vecteur.prod dans ce cas 
        conditional_probabilities = np.exp(-distances) / np.sum(np.exp(-distances))

        # Calculer la probabilité d'appartenir à chaque classe selon l'équation (1)
        probabilities = prior_probabilities * conditional_probabilities

        # Sélectionner la classe avec la probabilité maximale comme prédiction
        predicted_class = np.argmax(probabilities)
        predictions[i] = predicted_class
    
    accuracy = np.mean(predictions == Y)
    return predictions, accuracy

In [13]:
predictions, accuracy = CBN(X, Y)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 92.67%


## Question 03 :

In [14]:
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn import datasets
from sklearn.metrics import accuracy_score

# Diviser le jeu de données en ensembles d'apprentissage et de test
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Créer un classifieur bayésien naïf gaussien
gnb = GaussianNB()

# Entraîner le modèle
gnb.fit(X_train, Y_train)

# Faire des prédictions sur l'ensemble de test
predictions = gnb.predict(X_test)

# Calculer la précision
accuracy = accuracy_score(Y_test, predictions)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 100.00%


## Question 04 :

In [15]:
from scipy.stats import norm

def CBN_dist(X, Y):
    num_classes = len(np.unique(Y))
    num_features = X.shape[1]

    # Initialiser les prédictions
    predictions = np.zeros(len(X), dtype=int)

    for i in range(len(X)):
        # Sélectionner la donnée de test
        x_test = X[i]

        # Initialiser les listes des moyennes et écart-types pour chaque classe et variable
        means = np.zeros((num_classes, num_features))
        stds = np.zeros((num_classes, num_features))

        # Estimer les paramètres (moyennes et écart-types) pour chaque classe et variable
        for k in range(num_classes):
            class_samples = X[Y == k]
            means[k] = np.mean(class_samples, axis=0)
            stds[k] = np.std(class_samples, axis=0)

        # Calculer les probabilités conditionnelles P(xi/ωk) pour chaque classe et chaque variable
        conditional_probs = np.zeros((num_classes, num_features))
        for k in range(num_classes):
            for j in range(num_features):
                conditional_probs[k, j] = norm.pdf(x_test[j], loc=means[k, j], scale=stds[k, j])

        # Calculer les probabilités finales pour chaque classe
        prior_probs = [np.sum(Y == k) / len(Y) for k in range(num_classes)]
        final_probs = prior_probs * conditional_probs.prod(axis=1)

        # Assigner la classe la plus probable comme prédiction
        predictions[i] = np.argmax(final_probs)
    
    accuracy = np.mean(predictions == Y)
    return predictions, accuracy

In [16]:
predictions, accuracy = CBN_dist(X, Y)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 96.00%
