In [2]:
# === Import des librairies ===
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_curve, auc
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

from imblearn.over_sampling import SMOTE
from itertools import cycle
import joblib


In [None]:
# === Chargement des données ===
df = pd.read_csv("KASS.csv", sep=';')

# Nettoyage des colonnes
df.columns = df.columns.str.strip()
df.head()


In [None]:
# === Nettoyage des données ===
# Suppression des valeurs manquantes
df.dropna(inplace=True)

# Renommage des colonnes
df.rename(columns={
    'Temperature': 'temperature',
    'Humidity': 'humidity',
    'Tension': 'tension',
    'Energy Consumption': 'energy_consumption',
    'External Temp': 'external_temp'
}, inplace=True)

# Conversion des colonnes numériques
for col in ['temperature', 'humidity', 'tension', 'energy_consumption', 'external_temp']:
    df[col] = df[col].replace(',', '.', regex=True).astype(float)

# Encodage de la variable cible
df['failure_type_encoded'] = df['failure_type'].replace({
    'Fonctionnement normal': 0,
    'Temperature trop basse': 1,
    'Panne compresseur': 2,
    'Panne ventilateur': 3,
    'Arret fonctionnement': 4,
    'Temperature trop haute': 5,
    'Humidite trop basse': 6
})

df.head()


In [None]:
# === Normalisation ===
scaler = StandardScaler()
df[['temperature', 'humidity', 'tension', 'energy_consumption', 'external_temp']] = scaler.fit_transform(
    df[['temperature', 'humidity', 'tension', 'energy_consumption', 'external_temp']]
)
df.head()


In [None]:
# === Séparation features / labels et SMOTE ===
def data_preparation(df, target_name):
    X = df.drop(columns=[target_name, 'failure_type_encoded'])
    y = df['failure_type_encoded']

    # Rééquilibrage des classes
    smote = SMOTE(random_state=42)
    X_res, y_res = smote.fit_resample(X, y)

    # Split
    X_train, X_test, y_train, y_test = train_test_split(
        X_res, y_res, train_size=0.7, stratify=y_res, random_state=0
    )

    return X_res, y_res, X_train, X_test, y_train, y_test

target_name = 'failure_type'
X, y, X_train, X_test, y_train, y_test = data_preparation(df, target_name)

print("Dimensions entraînement :", X_train.shape, y_train.shape)
print("Dimensions test :", X_test.shape, y_test.shape)


In [None]:
# === Définition des modèles ===
xgb_model = XGBClassifier(
    objective='multi:softmax',
    num_class=7,
    random_state=0,
    reg_lambda=1.0,
    reg_alpha=0.5
)

rf_model = RandomForestClassifier(
    n_estimators=30,
    max_depth=6,
    max_features=0.4,
    min_samples_split=11,
    min_samples_leaf=7,
    bootstrap=False,
    class_weight="balanced",
    random_state=0
)


In [1]:
# === Entraînement ===
xgb_model.fit(X_train, y_train)
y_pred_xgb = xgb_model.predict(X_test)

rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

# Accuracy
accuracy_xgb = accuracy_score(y_test, y_pred_xgb)
accuracy_rf = accuracy_score(y_test, y_pred_rf)

print(f"XGBoost Accuracy: {accuracy_xgb:.4f}")
print(f"Random Forest Accuracy: {accuracy_rf:.4f}")


NameError: name 'xgb_model' is not defined

In [None]:
# === Rapports ===
report_xgb = classification_report(y_test, y_pred_xgb, output_dict=True)
report_rf = classification_report(y_test, y_pred_rf, output_dict=True)

df_xgb = pd.DataFrame(report_xgb).transpose().round(4)
df_rf = pd.DataFrame(report_rf).transpose().round(4)

print("\n--- XGBoost Classification Report ---\n")
display(df_xgb)

print("\n--- Random Forest Classification Report ---\n")
display(df_rf)


In [None]:
# === Matrices de confusion ===
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

conf_matrix_xgb = confusion_matrix(y_test, y_pred_xgb)
sns.heatmap(conf_matrix_xgb, annot=True, fmt='d', cmap='Blues', ax=axes[0])
axes[0].set_title(f'XGBoost (Acc: {accuracy_xgb:.4f})')
axes[0].set_xlabel('Predicted')
axes[0].set_ylabel('Actual')

conf_matrix_rf = confusion_matrix(y_test, y_pred_rf)
sns.heatmap(conf_matrix_rf, annot=True, fmt='d', cmap='Greens', ax=axes[1])
axes[1].set_title(f'Random Forest (Acc: {accuracy_rf:.4f})')
axes[1].set_xlabel('Predicted')
axes[1].set_ylabel('Actual')

plt.tight_layout()
plt.show()


In [None]:
# === ROC Multi-class ===
y_test_bin = label_binarize(y_test, classes=list(range(7)))
n_classes = y_test_bin.shape[1]

y_score_xgb = xgb_model.predict_proba(X_test)
y_score_rf = rf_model.predict_proba(X_test)

fpr_xgb, tpr_xgb, roc_auc_xgb = dict(), dict(), dict()
fpr_rf, tpr_rf, roc_auc_rf = dict(), dict(), dict()

for i in range(n_classes):
    fpr_xgb[i], tpr_xgb[i], _ = roc_curve(y_test_bin[:, i], y_score_xgb[:, i])
    roc_auc_xgb[i] = auc(fpr_xgb[i], tpr_xgb[i])
    
    fpr_rf[i], tpr_rf[i], _ = roc_curve(y_test_bin[:, i], y_score_rf[:, i])
    roc_auc_rf[i] = auc(fpr_rf[i], tpr_rf[i])

colors = cycle(['aqua', 'darkorange', 'cornflowerblue', 'red', 'green', 'purple', 'brown'])

plt.figure(figsize=(12, 6))

# ROC XGBoost
plt.subplot(1, 2, 1)
for i, color in zip(range(n_classes), colors):
    plt.plot(fpr_xgb[i], tpr_xgb[i], color=color, label=f'Classe {i} (AUC = {roc_auc_xgb[i]:.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.title('ROC - XGBoost')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc='lower right')

# ROC Random Forest
colors = cycle(['aqua', 'darkorange', 'cornflowerblue', 'red', 'green', 'purple', 'brown'])
plt.subplot(1, 2, 2)
for i, color in zip(range(n_classes), colors):
    plt.plot(fpr_rf[i], tpr_rf[i], color=color, label=f'Classe {i} (AUC = {roc_auc_rf[i]:.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.title('ROC - Random Forest')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc='lower right')

plt.tight_layout()
plt.show()


In [None]:
# === Sauvegarde des modèles ===
joblib.dump(xgb_model, "xgb_model.pkl")
joblib.dump(rf_model, "rf_model.pkl")

print(" Modèles sauvegardés.")
