In [92]:
pip install pandas numpy matplotlib seaborn ipython scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [93]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display, Markdown
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

In [94]:
# Charger le fichier CSV
df = pd.read_csv('signal_features_dataset_acceleration_vert.csv')

display(Markdown("### Dataset charg√© avec succ√®s"))
display(Markdown(f"Nombre de lignes : {len(df)}"))

### Dataset charg√© avec succ√®s

Nombre de lignes : 1322

In [95]:
# Informations sur le dataset
display(Markdown("### Informations sur le dataset :"))
# Note : df.info() affiche du texte brut, on contourne avec un r√©sum√©
df_info = pd.DataFrame({
    'Column': df.columns,
    'Type': [str(t) for t in df.dtypes],
    'Non-Null Count': df.notnull().sum()
})
display(df_info)

# Statistiques descriptives
display(Markdown("### Statistiques descriptives :"))
display(df.describe())

# V√©rifier les valeurs manquantes
display(Markdown("### Valeurs manquantes par colonne :"))
display(df.isnull().sum())

### Informations sur le dataset :

Unnamed: 0,Column,Type,Non-Null Count
RMS,RMS,float64,1322
Skewness,Skewness,float64,1322
Kurtosis,Kurtosis,float64,1322
Mean,Mean,float64,1322
Max,Max,float64,1322
Label,Label,int64,1322
Signal Name,Signal Name,object,1322


### Statistiques descriptives :

Unnamed: 0,RMS,Skewness,Kurtosis,Mean,Max,Label
count,1322.0,1322.0,1322.0,1322.0,1322.0,1322.0
mean,0.14453,-0.154118,4.89123,-0.003627,1.10798,0.497731
std,0.116298,0.10163,4.704754,0.000106,1.017348,0.500184
min,0.028013,-0.481582,0.139822,-0.00394,0.086725,0.0
25%,0.028847,-0.182506,0.263598,-0.003708,0.097757,0.0
50%,0.029687,-0.134279,0.373488,-0.003667,0.126841,0.0
75%,0.261068,-0.117895,9.415309,-0.003552,2.117914,1.0
max,0.274812,0.156689,12.240314,-0.003259,2.629188,1.0


### Valeurs manquantes par colonne :

RMS            0
Skewness       0
Kurtosis       0
Mean           0
Max            0
Label          0
Signal Name    0
dtype: int64

In [96]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from IPython.display import display, Markdown

# S√©lectionner les colonnes num√©riques
numeric_cols = df.select_dtypes(include=[np.number]).columns
df_numeric = df[numeric_cols]
corr_matrix = df_numeric.corr()

# Cr√©er une heatmap avec Plotly
fig = go.Figure(data=go.Heatmap(
    z=corr_matrix.values,  # Valeurs de la matrice de corr√©lation
    x=corr_matrix.columns,  # Noms des colonnes (axe x)
    y=corr_matrix.columns,  # Noms des lignes (axe y)
    colorscale='RdBu',  # √âquivalent de 'coolwarm' (rouge-bleu)
    zmin=-1, zmax=1,  # Limites pour l'√©chelle de couleur
    text=corr_matrix.values,  # Valeurs √† afficher dans les cellules
    texttemplate="%{text:.2f}",  # Format des valeurs (2 d√©cimales)
    textfont={"size": 10},  # Taille de la police des annotations
    colorbar=dict(title="Corr√©lation"),  # Titre de la barre de couleur
))

# Mettre √† jour la mise en page
fig.update_layout(
    title="Matrice de corr√©lation des caract√©ristiques num√©riques",
    width=800,  # Largeur de la figure
    height=600,  # Hauteur de la figure
    xaxis_title="Caract√©ristiques",
    yaxis_title="Caract√©ristiques",
    xaxis=dict(tickangle=45),  # Rotation des √©tiquettes sur l'axe x
)

# Afficher la figure
fig.show()

In [97]:
threshold_high = 0.7
high_corr_pairs = []
for i in range(len(corr_matrix.columns)):
    for j in range(i + 1, len(corr_matrix.columns)):
        if abs(corr_matrix.iloc[i, j]) > threshold_high:
            high_corr_pairs.append((corr_matrix.columns[i], corr_matrix.columns[j], corr_matrix.iloc[i, j]))

display(Markdown("### Variables tr√®s corr√©l√©es entre elles (|corr| > 0.7) :"))
if high_corr_pairs:
    display(pd.DataFrame(high_corr_pairs, columns=['Variable 1', 'Variable 2', 'Corr√©lation']))
else:
    display(Markdown("- Aucune d√©tect√©e"))

### Variables tr√®s corr√©l√©es entre elles (|corr| > 0.7) :

Unnamed: 0,Variable 1,Variable 2,Corr√©lation
0,RMS,Kurtosis,0.987257
1,RMS,Mean,0.732509
2,RMS,Max,0.996607
3,RMS,Label,0.999472
4,Kurtosis,Mean,0.717781
5,Kurtosis,Max,0.987931
6,Kurtosis,Label,0.988318
7,Mean,Max,0.732663
8,Mean,Label,0.732538
9,Max,Label,0.996353


In [98]:
threshold_low = 0.3
label_corr = corr_matrix['Label'].drop('Label')
low_corr_vars = label_corr[abs(label_corr) < threshold_low]

display(Markdown("### Variables peu corr√©l√©es avec Label (|corr| < 0.3) :"))
if not low_corr_vars.empty:
    display(pd.DataFrame(low_corr_vars, columns=['Corr√©lation']))
else:
    display(Markdown("- Aucune d√©tect√©e"))

### Variables peu corr√©l√©es avec Label (|corr| < 0.3) :

Unnamed: 0,Corr√©lation


In [99]:
# --- Partie 2 : S√©lection des features (suite) ---
# Liste des features initiales (exclure 'Label')
y = df['Label']
features = [col for col in df.select_dtypes(include=[np.number]).columns if col != 'Label']

# Importance des caract√©ristiques (calcul√©e dynamiquement)
feature_importance = []
for feature in features:
    mean_0 = df[df['Label'] == 0][feature].mean()
    mean_1 = df[df['Label'] == 1][feature].mean()
    std_feature = df[feature].std()
    if std_feature != 0:  # √âviter la division par z√©ro
        importance = abs(mean_1 - mean_0) / std_feature
    else:
        importance = 0
    feature_importance.append(importance)

# Cr√©er un DataFrame pour l'importance des features
feature_importance = pd.DataFrame({
    'Feature': features,
    'Importance': feature_importance
})

# Trier par importance d√©croissante
feature_importance = feature_importance.sort_values(by='Importance', ascending=False)

# Afficher l'importance des features
display(Markdown("## S√©lection des features bas√©e sur leur importance üîç"))
display(Markdown("### Importance des features (bas√©e sur la diff√©rence normalis√©e des moyennes) :"))
display(feature_importance)

# Seuil pour garder les features (ajustable)
threshold = 0  # On garde toutes les features avec une importance > 0
features_to_keep = feature_importance[feature_importance['Importance'] >= threshold]['Feature'].tolist()

# Mettre √† jour X avec les features retenues
X = df[features_to_keep]

# Affichage pour v√©rification
display(Markdown("### Features conserv√©es apr√®s √©limination :"))
display(pd.Series(features_to_keep))
display(Markdown(f"### Nouvelle forme de X : {X.shape}"))

## S√©lection des features bas√©e sur leur importance üîç

### Importance des features (bas√©e sur la diff√©rence normalis√©e des moyennes) :

Unnamed: 0,Feature,Importance
0,RMS,1.998209
4,Max,1.991973
2,Kurtosis,1.975908
3,Mean,1.464537
1,Skewness,0.435244


### Features conserv√©es apr√®s √©limination :

0         RMS
1         Max
2    Kurtosis
3        Mean
4    Skewness
dtype: object

### Nouvelle forme de X : (1322, 5)

In [100]:
from IPython.display import display, Markdown
import numpy as np

# Calculer la matrice de corr√©lation
numeric_cols = X.select_dtypes(include=[np.number]).columns
corr_matrix = X[numeric_cols].corr()

# Seuil pour les corr√©lations fortes
threshold = 0.7

# Identifier les paires de variables tr√®s corr√©l√©es
cols_to_drop = set()  # Utiliser un set pour √©viter les doublons
for i in range(len(corr_matrix.columns)):
    for j in range(i + 1, len(corr_matrix.columns)):
        if abs(corr_matrix.iloc[i, j]) > threshold:
            # Ajouter la deuxi√®me variable de la paire √† la liste des colonnes √† supprimer
            col_to_drop = corr_matrix.columns[j]
            cols_to_drop.add(col_to_drop)

# Convertir en liste pour affichage
cols_to_drop = list(cols_to_drop)

# Afficher les colonnes √† supprimer
display(Markdown("### Variables tr√®s corr√©l√©es √† supprimer (|corr| > 0.7) :"))
if cols_to_drop:
    display(pd.Series(cols_to_drop))
else:
    display(Markdown("- Aucune variable tr√®s corr√©l√©e d√©tect√©e"))

# Supprimer les colonnes de X
X = X.drop(columns=cols_to_drop, errors='ignore')
# Afficher les features restantes
display(Markdown("### Features apr√®s suppression des variables corr√©l√©es :"))
display(pd.Series(X.columns))
display(Markdown(f"### Nouvelle forme de X : {X.shape}"))

### Variables tr√®s corr√©l√©es √† supprimer (|corr| > 0.7) :

0        Mean
1         Max
2    Kurtosis
dtype: object

### Features apr√®s suppression des variables corr√©l√©es :

0         RMS
1    Skewness
dtype: object

### Nouvelle forme de X : (1322, 2)

In [101]:
X = df[['Skewness' , 'Mean']]

In [102]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
display(Markdown(f"### Dimensions des ensembles :"))
display(Markdown(f"- Train : {X_train.shape}"))
display(Markdown(f"- Test : {X_test.shape}"))

### Dimensions des ensembles :

- Train : (1057, 2)

- Test : (265, 2)

# **Random Forest**

In [103]:
# D√©finir la pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Normalisation des features
    ('classifier', RandomForestClassifier(random_state=42))
])

# Entra√Æner la pipeline
pipeline.fit(X_train, y_train)

display(Markdown("### Pipeline entra√Æn√©e avec succ√®s"))

### Pipeline entra√Æn√©e avec succ√®s

In [104]:
import numpy as np
import plotly.figure_factory as ff
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc

def evaluate_model(model, X_train, y_train, X_test, y_test):
    
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)
    
    if hasattr(model, "predict_proba"): 
        y_train_proba = model.predict_proba(X_train)[:, 1]
        y_test_proba = model.predict_proba(X_test)[:, 1]
    else:
        y_train_proba = y_train_pred
        y_test_proba = y_test_pred

    # Cr√©ation des matrices de confusion
    cm_train = confusion_matrix(y_train, y_train_pred)
    cm_test = confusion_matrix(y_test, y_test_pred)
    labels_true = ["True 0", "True 1"]
    labels_pred = ["Pr√©diction 0", "Pr√©diction 1"]
    
    
    # Cr√©ation d'une figure avec sous-graphiques
    fig = make_subplots(rows=1, cols=2, subplot_titles=("Matrice de confusion - Train", "Matrice de confusion - Test"),
                        horizontal_spacing=0.2)

    # Ajout des heatmaps
    fig.add_trace(go.Heatmap(z=cm_train, x=labels_pred, y=labels_true, colorscale="blues", showscale=True), row=1, col=1)
    fig.add_trace(go.Heatmap(z=cm_test, x=labels_pred, y=labels_true, colorscale="blues", showscale=True), row=1, col=2)

    # Ajouter manuellement les annotations
    for i in range(cm_train.shape[0]):
        for j in range(cm_train.shape[1]):
            fig.add_annotation(text=str(cm_train[i, j]), x=labels_pred[j], y=labels_true[i],
                               xref=f"x1", yref=f"y1", showarrow=False, font=dict(color="black"))

    for i in range(cm_test.shape[0]):
        for j in range(cm_test.shape[1]):
            fig.add_annotation(text=str(cm_test[i, j]), x=labels_pred[j], y=labels_true[i],
                               xref=f"x2", yref=f"y2", showarrow=False, font=dict(color="black"))

    fig.update_layout(title_text="Matrices de confusion - Train et Test", height=500, width=1000)
    fig.show()
    # Calcul des courbes ROC
    fpr_train, tpr_train, _ = roc_curve(y_train, y_train_proba)
    fpr_test, tpr_test, _ = roc_curve(y_test, y_test_proba)
    auc_train = auc(fpr_train, tpr_train)
    auc_test = auc(fpr_test, tpr_test)
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=fpr_train, y=tpr_train, mode="lines", name=f"Train ROC (AUC={auc_train:.2f})", line=dict(color="blue")))
    fig.add_trace(go.Scatter(x=fpr_test, y=tpr_test, mode="lines", name=f"Test ROC (AUC={auc_test:.2f})", line=dict(color="red")))
    fig.add_trace(go.Scatter(x=[0, 1], y=[0, 1], mode="lines", name="Random", line=dict(dash="dash", color="gray")))
    
    fig.update_layout(title="Courbe ROC", xaxis_title="Taux de faux positifs (FPR)", yaxis_title="Taux de vrais positifs (TPR)")
    fig.show()
    
    
    print("\n=== Rapport de classification (Train) ===")
    print(classification_report(y_train, y_train_pred))
    
    print("\n=== Rapport de classification (Test) ===")
    print(classification_report(y_test, y_test_pred))

In [105]:
pip install nbformat

Note: you may need to restart the kernel to use updated packages.


In [106]:
# √âvaluer le mod√®le
display(Markdown("### √âvaluation du mod√®le"))
evaluate_model(pipeline, X_train, y_train, X_test, y_test)

### √âvaluation du mod√®le


=== Rapport de classification (Train) ===
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       518
           1       1.00      1.00      1.00       539

    accuracy                           1.00      1057
   macro avg       1.00      1.00      1.00      1057
weighted avg       1.00      1.00      1.00      1057


=== Rapport de classification (Test) ===
              precision    recall  f1-score   support

           0       0.99      1.00      0.99       146
           1       1.00      0.98      0.99       119

    accuracy                           0.99       265
   macro avg       0.99      0.99      0.99       265
weighted avg       0.99      0.99      0.99       265



In [107]:
# Extraire l'importance des features
model = pipeline.named_steps['classifier']
feature_importance = pd.DataFrame({
    'Feature': X_train.columns,
    'Importance': model.feature_importances_
}).sort_values('Importance', ascending=False)

display(Markdown("### Importance des caract√©ristiques :"))
display(feature_importance)

### Importance des caract√©ristiques :

Unnamed: 0,Feature,Importance
1,Mean,0.55451
0,Skewness,0.44549


# **Arbre de Decision**

In [108]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from IPython.display import display, Markdown

# D√©finir la pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Normalisation des features
    ('classifier', DecisionTreeClassifier(random_state=42))  # Remplacement par DecisionTreeClassifier
])

# Entra√Æner la pipeline
pipeline.fit(X_train, y_train)

display(Markdown("### Pipeline entra√Æn√©e avec succ√®s"))

### Pipeline entra√Æn√©e avec succ√®s

In [109]:
evaluate_model(pipeline, X_train, y_train, X_test, y_test)



=== Rapport de classification (Train) ===
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       518
           1       1.00      1.00      1.00       539

    accuracy                           1.00      1057
   macro avg       1.00      1.00      1.00      1057
weighted avg       1.00      1.00      1.00      1057


=== Rapport de classification (Test) ===
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       146
           1       0.99      0.98      0.99       119

    accuracy                           0.99       265
   macro avg       0.99      0.99      0.99       265
weighted avg       0.99      0.99      0.99       265



# **Mod√®le 1 : R√©seau de neurones simple (MLP)**

In [110]:
pip install tensorflow

Note: you may need to restart the kernel to use updated packages.


In [112]:
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.base import BaseEstimator, ClassifierMixin
from IPython.display import display, Markdown
import plotly.graph_objects as go
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve, auc
import pandas as pd

# --- Mod√®le 1 : R√©seau de neurones simple (MLP) ---
class KerasClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, input_dim, epochs=50, batch_size=32):
        self.input_dim = input_dim
        self.epochs = epochs
        self.batch_size = batch_size
        self.model = None

    def fit(self, X, y):
        self.model = Sequential([
            Dense(16, input_dim=self.input_dim, activation='relu'),
            Dense(8, activation='relu'),
            Dense(1, activation='sigmoid')
        ])
        self.model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        self.model.fit(X, y, epochs=self.epochs, batch_size=self.batch_size, verbose=0)
        return self

    def predict(self, X):
        return (self.model.predict(X) > 0.5).astype(int).flatten()

    def predict_proba(self, X):
        return self.model.predict(X)

# --- Entra√Ænement du mod√®le MLP Simple ---
pipeline_mlp = Pipeline([
    ('scaler', StandardScaler()),
    ('classifier', KerasClassifier(input_dim=X_train.shape[1], epochs=50, batch_size=32))
])

# Entra√Æner la pipeline
pipeline_mlp.fit(X_train, y_train)

# Pr√©dictions
y_pred_mlp = pipeline_mlp.predict(X_test)
y_proba_mlp = pipeline_mlp.predict_proba(X_test)

# Calcul des m√©triques
accuracy_mlp = accuracy_score(y_test, y_pred_mlp)
report_mlp = classification_report(y_test, y_pred_mlp, output_dict=True)

# --- Visualisation des r√©sultats ---
# 1. Tableau des m√©triques
metrics_data_mlp = {
    'Mod√®le': ['MLP Simple'],
    'Accuracy': [accuracy_mlp],
    'Pr√©cision (Classe 0)': [report_mlp['0']['precision']],
    'Rappel (Classe 0)': [report_mlp['0']['recall']],
    'F1-Score (Classe 0)': [report_mlp['0']['f1-score']],
    'Pr√©cision (Classe 1)': [report_mlp['1']['precision']],
    'Rappel (Classe 1)': [report_mlp['1']['recall']],
    'F1-Score (Classe 1)': [report_mlp['1']['f1-score']]
}

df_metrics_mlp = pd.DataFrame(metrics_data_mlp)

# Afficher le tableau avec Plotly
fig_table_mlp = go.Figure(data=[go.Table(
    header=dict(values=list(df_metrics_mlp.columns),
                fill_color='paleturquoise',
                align='center',
                font=dict(size=12)),
    cells=dict(values=[df_metrics_mlp[col] for col in df_metrics_mlp.columns],
               fill_color='lavender',
               align='center',
               format=['', '.4f', '.4f', '.4f', '.4f', '.4f', '.4f', '.4f'],
               font=dict(size=11)))
])

fig_table_mlp.update_layout(
    title="M√©triques du mod√®le MLP Simple",
    width=1000,
    height=300
)

display(Markdown("## M√©triques du mod√®le MLP Simple üìä"))
fig_table_mlp.show()

# 2. Graphique en barres pour l'accuracy
fig_bar_mlp = go.Figure(data=[
    go.Bar(name='Accuracy', x=['MLP Simple'], y=[accuracy_mlp],
           marker_color='#1f77b4', text=[f"{accuracy_mlp:.4f}"],
           textposition='auto')
])

fig_bar_mlp.update_layout(
    title="Accuracy du mod√®le MLP Simple",
    xaxis_title="Mod√®le",
    yaxis_title="Accuracy",
    yaxis=dict(range=[0, 1]),
    template="plotly_white",
    width=600,
    height=400
)

display(Markdown("## Accuracy du mod√®le MLP Simple üìà"))
fig_bar_mlp.show()

# 3. Matrice de confusion sous forme de heatmap
cm_mlp = confusion_matrix(y_test, y_pred_mlp)
fig_cm_mlp = go.Figure(data=go.Heatmap(
    z=cm_mlp,
    x=['Pr√©dit 0', 'Pr√©dit 1'],
    y=['Vrai 0', 'Vrai 1'],
    colorscale='Blues',
    text=cm_mlp,
    texttemplate="%{text}",
    textfont={"size": 12},
    colorbar=dict(title="Nombre")
))

fig_cm_mlp.update_layout(
    title="Matrice de confusion - MLP Simple",
    xaxis_title="Pr√©dictions",
    yaxis_title="Valeurs r√©elles",
    width=500,
    height=500
)

display(Markdown("## Matrice de confusion - MLP Simple üìâ"))
fig_cm_mlp.show()

# 4. Courbe ROC et AUC
fpr_mlp, tpr_mlp, _ = roc_curve(y_test, y_proba_mlp)
roc_auc_mlp = auc(fpr_mlp, tpr_mlp)

fig_roc_mlp = go.Figure()

# Courbe ROC pour MLP Simple
fig_roc_mlp.add_trace(go.Scatter(
    x=fpr_mlp,
    y=tpr_mlp,
    mode='lines',
    name=f'MLP Simple (AUC = {roc_auc_mlp:.2f})',
    line=dict(color='#1f77b4', width=2)
))

# Ajouter la diagonale (mod√®le al√©atoire)
fig_roc_mlp.add_trace(go.Scatter(
    x=[0, 1],
    y=[0, 1],
    mode='lines',
    name='Al√©atoire (AUC = 0.50)',
    line=dict(color='gray', dash='dash')
))

# Personnaliser le graphique
fig_roc_mlp.update_layout(
    title="Courbe ROC - MLP Simple",
    xaxis_title="Taux de faux positifs (FPR)",
    yaxis_title="Taux de vrais positifs (TPR)",
    xaxis=dict(range=[0, 1]),
    yaxis=dict(range=[0, 1]),
    template="plotly_white",
    width=600,
    height=600,
    legend=dict(x=0.7, y=0.1)
)

display(Markdown("## Courbe ROC et AUC - MLP Simple üìà"))
fig_roc_mlp.show()


Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m9/9[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 11ms/step
[1m9/9[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 6ms/step 








## M√©triques du mod√®le MLP Simple üìä

## Accuracy du mod√®le MLP Simple üìà

## Matrice de confusion - MLP Simple üìâ

## Courbe ROC et AUC - MLP Simple üìà

In [90]:
pip install keras-tuner

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m129.1/129.1 kB[0m [31m770.6 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting kt-legacy
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5
Note: you may need to restart the kernel to use updated packages.


# **R√©seau de neurones avec Dropout et recherche d‚Äôhyperparam√®tres**

In [113]:
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.base import BaseEstimator, ClassifierMixin
from IPython.display import display, Markdown
import keras_tuner as kt
import plotly.graph_objects as go
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve, auc
import pandas as pd

# --- Mod√®le 2 : R√©seau de neurones avec Dropout et tuning ---
class KerasTunerClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, input_dim, max_trials=10, epochs=50, batch_size=32):
        self.input_dim = input_dim
        self.max_trials = max_trials
        self.epochs = epochs
        self.batch_size = batch_size
        self.model = None

    def build_model(self, hp):
        model = Sequential()
        model.add(Dense(units=hp.Int('units_1', min_value=16, max_value=64, step=16),
                        input_dim=self.input_dim, activation='relu'))
        model.add(Dropout(hp.Float('dropout_1', min_value=0.0, max_value=0.5, step=0.1)))
        model.add(Dense(units=hp.Int('units_2', min_value=8, max_value=32, step=8),
                        activation='relu'))
        model.add(Dropout(hp.Float('dropout_2', min_value=0.0, max_value=0.5, step=0.1)))
        model.add(Dense(1, activation='sigmoid'))
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        return model

    def fit(self, X, y):
        tuner = kt.Hyperband(
            self.build_model,
            objective='val_accuracy',
            max_epochs=self.epochs,
            factor=3,
            directory='my_dir',
            project_name='tune_keras_classifier'
        )
        tuner.search(X, y, epochs=self.epochs, batch_size=self.batch_size, validation_split=0.2, verbose=0)
        self.model = tuner.get_best_models(num_models=1)[0]
        self.model.fit(X, y, epochs=self.epochs, batch_size=self.batch_size, verbose=0)
        return self

    def predict(self, X):
        return (self.model.predict(X) > 0.5).astype(int).flatten()

    def predict_proba(self, X):
        return self.model.predict(X)

# --- Entra√Ænement du mod√®le MLP avec Dropout et Tuning ---
pipeline_tuned = Pipeline([
    ('scaler', StandardScaler()),
    ('classifier', KerasTunerClassifier(input_dim=X_train.shape[1], max_trials=10, epochs=50, batch_size=32))
])

# Entra√Æner la pipeline
pipeline_tuned.fit(X_train, y_train)

# Pr√©dictions
y_pred_tuned = pipeline_tuned.predict(X_test)
y_proba_tuned = pipeline_tuned.predict_proba(X_test)

# Calcul des m√©triques
accuracy_tuned = accuracy_score(y_test, y_pred_tuned)
report_tuned = classification_report(y_test, y_pred_tuned, output_dict=True)

# --- Visualisation des r√©sultats ---
# 1. Tableau des m√©triques
metrics_data_tuned = {
    'Mod√®le': ['MLP avec Tuning'],
    'Accuracy': [accuracy_tuned],
    'Pr√©cision (Classe 0)': [report_tuned['0']['precision']],
    'Rappel (Classe 0)': [report_tuned['0']['recall']],
    'F1-Score (Classe 0)': [report_tuned['0']['f1-score']],
    'Pr√©cision (Classe 1)': [report_tuned['1']['precision']],
    'Rappel (Classe 1)': [report_tuned['1']['recall']],
    'F1-Score (Classe 1)': [report_tuned['1']['f1-score']]
}

df_metrics_tuned = pd.DataFrame(metrics_data_tuned)

# Afficher le tableau avec Plotly
fig_table_tuned = go.Figure(data=[go.Table(
    header=dict(values=list(df_metrics_tuned.columns),
                fill_color='paleturquoise',
                align='center',
                font=dict(size=12)),
    cells=dict(values=[df_metrics_tuned[col] for col in df_metrics_tuned.columns],
               fill_color='lavender',
               align='center',
               format=['', '.4f', '.4f', '.4f', '.4f', '.4f', '.4f', '.4f'],
               font=dict(size=11)))
])

fig_table_tuned.update_layout(
    title="M√©triques du mod√®le MLP avec Tuning",
    width=1000,
    height=300
)

display(Markdown("## M√©triques du mod√®le MLP avec Tuning üìä"))
fig_table_tuned.show()

# 2. Graphique en barres pour l'accuracy
fig_bar_tuned = go.Figure(data=[
    go.Bar(name='Accuracy', x=['MLP avec Tuning'], y=[accuracy_tuned],
           marker_color='#ff7f0e', text=[f"{accuracy_tuned:.4f}"],
           textposition='auto')
])

fig_bar_tuned.update_layout(
    title="Accuracy du mod√®le MLP avec Tuning",
    xaxis_title="Mod√®le",
    yaxis_title="Accuracy",
    yaxis=dict(range=[0, 1]),
    template="plotly_white",
    width=600,
    height=400
)

display(Markdown("## Accuracy du mod√®le MLP avec Tuning üìà"))
fig_bar_tuned.show()

# 3. Matrice de confusion sous forme de heatmap
cm_tuned = confusion_matrix(y_test, y_pred_tuned)
fig_cm_tuned = go.Figure(data=go.Heatmap(
    z=cm_tuned,
    x=['Pr√©dit 0', 'Pr√©dit 1'],
    y=['Vrai 0', 'Vrai 1'],
    colorscale='Blues',
    text=cm_tuned,
    texttemplate="%{text}",
    textfont={"size": 12},
    colorbar=dict(title="Nombre")
))

fig_cm_tuned.update_layout(
    title="Matrice de confusion - MLP avec Tuning",
    xaxis_title="Pr√©dictions",
    yaxis_title="Valeurs r√©elles",
    width=500,
    height=500
)

display(Markdown("## Matrice de confusion - MLP avec Tuning üìâ"))
fig_cm_tuned.show()

# 4. Courbe ROC et AUC
fpr_tuned, tpr_tuned, _ = roc_curve(y_test, y_proba_tuned)
roc_auc_tuned = auc(fpr_tuned, tpr_tuned)

fig_roc_tuned = go.Figure()

# Courbe ROC pour MLP avec Tuning
fig_roc_tuned.add_trace(go.Scatter(
    x=fpr_tuned,
    y=tpr_tuned,
    mode='lines',
    name=f'MLP avec Tuning (AUC = {roc_auc_tuned:.2f})',
    line=dict(color='#ff7f0e', width=2)
))

# Ajouter la diagonale (mod√®le al√©atoire)
fig_roc_tuned.add_trace(go.Scatter(
    x=[0, 1],
    y=[0, 1],
    mode='lines',
    name='Al√©atoire (AUC = 0.50)',
    line=dict(color='gray', dash='dash')
))

# Personnaliser le graphique
fig_roc_tuned.update_layout(
    title="Courbe ROC - MLP avec Tuning",
    xaxis_title="Taux de faux positifs (FPR)",
    yaxis_title="Taux de vrais positifs (TPR)",
    xaxis=dict(range=[0, 1]),
    yaxis=dict(range=[0, 1]),
    template="plotly_white",
    width=600,
    height=600,
    legend=dict(x=0.7, y=0.1)
)

display(Markdown("## Courbe ROC et AUC - MLP avec Tuning üìà"))
fig_roc_tuned.show()

Reloading Tuner from my_dir/tune_keras_classifier/tuner0.json



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.


Skipping variable loading for optimizer 'adam', because it has 2 variables whereas the saved optimizer has 14 variables. 



[1m9/9[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 11ms/step
[1m9/9[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 4ms/step 








## M√©triques du mod√®le MLP avec Tuning üìä

## Accuracy du mod√®le MLP avec Tuning üìà

## Matrice de confusion - MLP avec Tuning üìâ

## Courbe ROC et AUC - MLP avec Tuning üìà