In [9]:
import numpy as np
import pandas as pd
from imblearn.over_sampling import SMOTE
from imblearn.over_sampling import ADASYN
from bdd_script import get_indicateur, get_labels
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from sklearn.metrics import make_scorer, f1_score
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Perceptron


In [10]:
# === Chargement des données ===
raw_train = get_indicateur(3)
train_df = pd.json_normalize(raw_train)
colonnes_a_supprimer = [col for col in train_df.columns if col.startswith("has_unit.") or col.startswith("ratio_unit.")]
train_df = train_df.drop(columns=colonnes_a_supprimer, errors='ignore') # Utilisez errors='ignore' pour éviter les erreurs si les colonnes n'existent pas
train_df.fillna(train_df.mean(), inplace=True)

raw_test = get_indicateur(1)
val_df = pd.json_normalize(raw_test).drop(columns=colonnes_a_supprimer, errors='ignore')
val_df = val_df.reindex(columns=train_df.columns, fill_value=train_df.mean()) # Utilisez fill_value pour aligner les colonnes et gérer les valeurs manquantes
X_train = train_df
y_train = get_labels(3)
X_val = val_df
y_val = get_labels(1)

# === Normalisation ===
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)


# === Balancement ===
sm = SMOTE(random_state=42)
X_res, y_res = sm.fit_resample(X_train, y_train)

# === ADASYN ===
adasyn = ADASYN(random_state=42)
X_res_adasyn, y_res_adasyn = adasyn.fit_resample(X_train, y_train)


In [16]:
svm_clf = SVC(random_state=42)
# === Paramètres pour la recherche de grille ===
param_grid = {
    'C': [i for i in range(1, 10)],
    'gamma': [0.1, 0.2, 0.3, 0.4, "scale", "auto"],
    'kernel': ['linear', 'rbf',"poly", 'sigmoid'],
    "probability": [True, False],
    "class_weight": [None],
}
scorer = make_scorer(f1_score, average='weighted')
# === Recherche de grille ===
grid_search = GridSearchCV(svm_clf, param_grid, cv=5, scoring=scorer, n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)
y_pred = grid_search.predict(X_val)
print("Classification report :")
print(classification_report(y_val, y_pred))
print("Matrice de confusion :")
print(confusion_matrix(y_val, y_pred))
print("AUC :")
print(roc_auc_score(y_val, grid_search.predict_proba(X_val)[:, 1])) # AUC pour le meilleur modèle SVM
best_svm = grid_search.best_estimator_
print("Meilleurs paramètres trouvés :", grid_search.best_params_) # Afficher les meilleurs parametres


Fitting 5 folds for each of 432 candidates, totalling 2160 fits
Classification report :
              precision    recall  f1-score   support

           0       0.68      0.72      0.70        39
           1       0.27      0.24      0.25        17

    accuracy                           0.57        56
   macro avg       0.47      0.48      0.47        56
weighted avg       0.56      0.57      0.56        56

Matrice de confusion :
[[28 11]
 [13  4]]
AUC :
0.4524886877828054
Meilleurs paramètres trouvés : {'C': 2, 'class_weight': None, 'gamma': 0.4, 'kernel': 'sigmoid', 'probability': True}


In [4]:
random_search = RandomizedSearchCV(svm_clf, param_grid, n_iter=1000, cv=5, scoring='f1', n_jobs=-1, verbose=2)
random_search.fit(X_train, y_train)
best_svm_random = random_search.best_estimator_
print("Meilleurs paramètres trouvés (RandomizedSearchCV) :", random_search.best_params_) # Afficher les meilleurs parametres

Fitting 5 folds for each of 864 candidates, totalling 4320 fits




Meilleurs paramètres trouvés (RandomizedSearchCV) : {'probability': True, 'kernel': 'sigmoid', 'gamma': 0.1, 'class_weight': 'balanced', 'C': 9}


Non Balancer

In [17]:
best_svm.fit(X_train, y_train)
y_pred = best_svm.predict(X_val)
print("Classification report :")
print(classification_report(y_val, y_pred))
print("Matrice de confusion :")
print(confusion_matrix(y_val, y_pred))
print("AUC :")
print(roc_auc_score(y_val, best_svm.predict_proba(X_val)[:, 1])) # AUC pour le meilleur modèle SVM


Classification report :
              precision    recall  f1-score   support

           0       0.68      0.72      0.70        39
           1       0.27      0.24      0.25        17

    accuracy                           0.57        56
   macro avg       0.47      0.48      0.47        56
weighted avg       0.56      0.57      0.56        56

Matrice de confusion :
[[28 11]
 [13  4]]
AUC :
0.4524886877828054


SMOTE

In [18]:
best_svm.fit(X_res, y_res)
y_pred = best_svm.predict(X_val)
print("Classification report :")
print(classification_report(y_val, y_pred))
print("Matrice de confusion :")
print(confusion_matrix(y_val, y_pred))
print("AUC :")
print(roc_auc_score(y_val, best_svm.predict_proba(X_val)[:, 1])) # AUC pour le meilleur modèle SVM

Classification report :
              precision    recall  f1-score   support

           0       0.68      0.33      0.45        39
           1       0.30      0.65      0.41        17

    accuracy                           0.43        56
   macro avg       0.49      0.49      0.43        56
weighted avg       0.57      0.43      0.44        56

Matrice de confusion :
[[13 26]
 [ 6 11]]
AUC :
0.49924585218702866


ADASYN

In [19]:
best_svm.fit(X_res_adasyn, y_res_adasyn)
y_pred = best_svm.predict(X_val)
print("Classification report :")
print(classification_report(y_val, y_pred))
print("Matrice de confusion :")
print(confusion_matrix(y_val, y_pred))
print("AUC :")
print(roc_auc_score(y_val, best_svm.predict_proba(X_val)[:, 1])) # AUC pour le meilleur modèle SVM


Classification report :
              precision    recall  f1-score   support

           0       0.70      0.36      0.47        39
           1       0.31      0.65      0.42        17

    accuracy                           0.45        56
   macro avg       0.50      0.50      0.44        56
weighted avg       0.58      0.45      0.46        56

Matrice de confusion :
[[14 25]
 [ 6 11]]
AUC :
0.48717948717948717
