In [4]:
import pandas as pd
import numpy as np
import joblib
from collections import Counter
from joblib import load

# --------------------
# 1. Charger les modèles
# --------------------
encoder = load(r"C:\Users\fgrol\Documents\stages\models\label_encoder.pkl")
knn_model = joblib.load(r"C:\Users\fgrol\Documents\stages\models\knn_model.pkl")
rf_model = joblib.load(r"C:\Users\fgrol\Documents\stages\models\rf_model.pkl")
catboost_model = joblib.load(r"C:\Users\fgrol\Documents\stages\models\catboost_model.pkl")
svm_model = joblib.load(r"C:\Users\fgrol\Documents\stages\models\svm_model.pkl")

# --------------------
# 2. Colonnes features
# --------------------
colonnes_features = [
    'x', 'y', 'altitude', 'hauteur', 'vitesse',
    'vitesse_calculee', 'delta_t', 'vz',
    'distance', 'temps_total', 'distance_totale',
    'longitude_depart'
]
# --------------------
# 3. Charger fichier CSV
# --------------------
fichier = r"C:\Users\fgrol\Documents\stages\csv_test.csv"
df = pd.read_csv(fichier, sep=',', quotechar='"', engine='python', nrows=10000)


# --------------------
# 4. Nettoyer DataFrame
# --------------------
X = df[colonnes_features].apply(pd.to_numeric, errors='coerce')
X = X.replace([np.inf, -np.inf], np.nan)
print("NaNs par colonne avant suppression :")
print(X.isna().sum())
X = X.dropna()
print(f"Lignes après nettoyage: {len(X)}")

# --------------------
# 5. Prédictions
# --------------------
pred_knn = encoder.inverse_transform(knn_model.predict(X))
pred_rf = encoder.inverse_transform(rf_model.predict(X))
pred_cb = encoder.inverse_transform(catboost_model.predict(X))
pred_svm = encoder.inverse_transform(svm_model.predict(X))

# --------------------
# 6. Ajouter prédictions au DataFrame
# --------------------
df_predictions = df.loc[X.index].copy()
df_predictions['KNN'] = pred_knn
df_predictions['RandomForest'] = pred_rf
df_predictions['CatBoost'] = pred_cb
df_predictions['SVM'] = pred_svm

# --------------------
# 7. Calcul vote majoritaire
# --------------------
def vote_majoritaire(row):
    votes = [row['KNN'], row['RandomForest'], row['CatBoost'], row['SVM']]
    vote_counts = Counter(votes)
    return vote_counts.most_common(1)[0][0]

df_predictions['majorité'] = df_predictions.apply(vote_majoritaire, axis=1)

# --------------------
# 8. Calcul % de justesse
# --------------------
def accuracy(y_true, y_pred):
    return (y_true == y_pred).mean() * 100

print(f"Accuracy KNN: {accuracy(df_predictions['modele_standardise'], df_predictions['KNN']):.2f}%")
print(f"Accuracy RandomForest: {accuracy(df_predictions['modele_standardise'], df_predictions['RandomForest']):.2f}%")
print(f"Accuracy CatBoost: {accuracy(df_predictions['modele_standardise'], df_predictions['CatBoost']):.2f}%")
print(f"Accuracy SVM: {accuracy(df_predictions['modele_standardise'], df_predictions['SVM']):.2f}%")
print(f"Accuracy Vote majoritaire: {accuracy(df_predictions['modele_standardise'], df_predictions['majorité']):.2f}%")


# --------------------
# 9. Afficher lignes où majorité != vraie étiquette
# --------------------
differences = df_predictions[df_predictions['majorité'] != df_predictions['modele_standardise']]
print(f"\nNombre de lignes où le vote majoritaire diffère de la vraie étiquette: {len(differences)}")
print("Exemples :")
print(differences.head(10))

# --------------------
# 10. Exporter résultats
# --------------------
colonnes_export = ['modele_standardise', 'KNN', 'RandomForest', 'CatBoost', 'SVM', 'majorité']
df_predictions[colonnes_export].to_csv(r"C:\Users\fgrol\Documents\stages\csv_predictions_output2.csv", index=False)
print("\n✅ Exporté avec vote majoritaire dans : csv_predictions_output2.csv")

print("Lignes chargées:", len(df))
print("Lignes après nettoyage:", len(X))


NaNs par colonne avant suppression :
x                     0
y                     0
altitude              0
hauteur              77
vitesse               0
vitesse_calculee      0
delta_t              28
vz                  230
distance              0
temps_total           0
distance_totale       0
longitude_depart     39
dtype: int64
Lignes après nettoyage: 9630


  y = column_or_1d(y, warn=True)


Accuracy KNN: 97.19%
Accuracy RandomForest: 99.70%
Accuracy CatBoost: 99.18%
Accuracy SVM: 0.00%
Accuracy Vote majoritaire: 99.33%

Nombre de lignes où le vote majoritaire diffère de la vraie étiquette: 65
Exemples :
                           drone_id constructeur              modele  \
1    000MCT000000000000409151886510           3W          Extra 330S   
497             1581E11VKF4D00201NS          DJI  Phantom 4 Pro V2.0   
498             1581E11VKF4D00201NS          DJI  Phantom 4 Pro V2.0   
499             1581E11VKF4D00201NS          DJI  Phantom 4 Pro V2.0   
500             1581E11VKF4D00201NS          DJI  Phantom 4 Pro V2.0   
501             1581E11VKF4D00201NS          DJI  Phantom 4 Pro V2.0   
502             1581E11VKF4D00201NS          DJI  Phantom 4 Pro V2.0   
503             1581E11VKF4D00201NS          DJI  Phantom 4 Pro V2.0   
504             1581E11VKF4D00201NS          DJI  Phantom 4 Pro V2.0   
505             1581E11VKF4D00201NS          DJI  Phantom 4 Pro

In [3]:
print(f"Nombre de lignes : {len(df)}")


Nombre de lignes : 1000
