In [20]:
import numpy as np
import pandas as pd
from sklearn.linear_model import Perceptron
from sklearn.model_selection import train_test_split, cross_val_score, validation_curve
import matplotlib.pyplot as plt
from umap import UMAP
from sklearn.metrics import accuracy_score

In [2]:
df_data = pd.read_csv('../data/train.csv')
df_data.head(1)

Unnamed: 0,id,species,margin1,margin2,margin3,margin4,margin5,margin6,margin7,margin8,...,texture55,texture56,texture57,texture58,texture59,texture60,texture61,texture62,texture63,texture64
0,1,Acer_Opalus,0.007812,0.023438,0.023438,0.003906,0.011719,0.009766,0.027344,0.0,...,0.007812,0.0,0.00293,0.00293,0.035156,0.0,0.0,0.004883,0.0,0.025391


In [4]:
X = df_data.copy().drop(['id', 'species'],axis='columns').values

species = df_data['species'].unique()

# Création des vecteurs one-hot pour la matrice de targets
t = np.zeros(df_data.shape[0])
for i in range(species.size):
    t[df_data['species'] == species[i]] = i

In [5]:
X_train, X_test, t_train, t_test = train_test_split(X, t, test_size=0.3, random_state=0)

In [6]:
alpha = 0.001
max_iter = 10000
learning_rate = 0.001

# création d'un classifieur binaire pour chaque classe
classifieur = Perceptron(penalty='l2', alpha=alpha, max_iter=max_iter, eta0=learning_rate)    
classifieur.fit(X_train, t_train)

In [7]:
classifieur.score(X_train, t_train)

0.45165945165945165

In [30]:
all_values_lr = np.logspace(-9, 1, 10)
all_values_alpha = np.logspace(-9, 1, 10)
models = []
nb_cv = 3
results = np.empty((0, nb_cv))
for val_lr in all_values_lr:
    for val_alpha in all_values_alpha:
        model = Perceptron(penalty='l2', alpha=val_alpha, max_iter=max_iter, eta0=val_lr) 
        models.append(model)
        result = cross_val_score(model, X_train, t_train, cv=nb_cv)
        results = np.vstack((results, result))

In [31]:
results

array([[0.17748918, 0.6017316 , 0.23809524],
       [0.17748918, 0.6017316 , 0.23809524],
       [0.17748918, 0.6017316 , 0.23809524],
       [0.17748918, 0.6017316 , 0.23809524],
       [0.17748918, 0.6017316 , 0.23809524],
       [0.17748918, 0.6017316 , 0.23809524],
       [0.17748918, 0.6017316 , 0.23809524],
       [0.17748918, 0.6017316 , 0.23809524],
       [0.17748918, 0.6017316 , 0.23809524],
       [0.17748918, 0.6017316 , 0.23809524],
       [0.17748918, 0.6017316 , 0.23809524],
       [0.17748918, 0.6017316 , 0.23809524],
       [0.17748918, 0.6017316 , 0.23809524],
       [0.17748918, 0.6017316 , 0.23809524],
       [0.17748918, 0.6017316 , 0.23809524],
       [0.17748918, 0.6017316 , 0.23809524],
       [0.17748918, 0.6017316 , 0.23809524],
       [0.17748918, 0.6017316 , 0.23809524],
       [0.17748918, 0.6017316 , 0.23809524],
       [0.17748918, 0.6017316 , 0.23809524],
       [0.17748918, 0.6017316 , 0.23809524],
       [0.17748918, 0.6017316 , 0.23809524],
       [0.

In [32]:
max_index = np.unravel_index(np.argmax(results, axis=None), results.shape)
max_index

(93, 1)

In [33]:
# Afficher le résultat
print("Indice du résultat le plus élevé :", max_index)
print("Valeur du résultat le plus élevé :", results[max_index])

Indice du résultat le plus élevé : (93, 1)
Valeur du résultat le plus élevé : 0.8181818181818182


In [34]:
best = models[max_index[0]]
best

In [35]:
best.fit(X_train, t_train)
best.score(X_train, t_train)

0.9278499278499278

In [36]:
best.score(X_test, t_test)

0.7744107744107744

# Visualisation de notre apprentissage

In [18]:
# Réduire la dimensionnalité à 2 avec UMAP pour les étiquettes connues
umap_model_labels = UMAP(n_components=2)
X_train_umap_labels = umap_model_labels.fit_transform(X_train)

# Prédire les classes pour l'ensemble de train
t_pred = best.predict(X_train)
accuracy = accuracy_score(t_train, t_pred)
print(f"Précision du modèle sur l'ensemble d'entraînement : {accuracy:.2f}")

umap_model_predictions = UMAP(n_components=2)
X_train_umap_predictions = umap_model_predictions.fit_transform(X_train)

plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
scatter_umap_labels = plt.scatter(X_train_umap_labels[:, 0], X_train_umap_labels[:, 1], c=t_train, cmap='viridis')
plt.title('UMAP des étiquettes connues')
plt.xlabel('UMAP Composante 1')
plt.ylabel('UMAP Composante 2')
plt.colorbar(scatter_umap_labels, label='Classes réelles')

plt.subplot(1, 2, 2)
scatter_umap_predictions = plt.scatter(X_train_umap_predictions[:, 0], X_train_umap_predictions[:, 1], c=t_pred, cmap='viridis')
plt.title('UMAP des prédictions du modèle')
plt.xlabel('UMAP Composante 1')
plt.ylabel('UMAP Composante 2')
plt.colorbar(scatter_umap_predictions, label='Classes prédites')

plt.tight_layout()
plt.show()

NameError: name 'accuracy_score' is not defined