In [1]:
from imblearn.under_sampling import RandomUnderSampler
from sklearn.metrics import fbeta_score, make_scorer, classification_report, confusion_matrix, ConfusionMatrixDisplay
from utils.data_loader import load_split, prepare_features_target
from utils.learning_curve import learning_curve_with_resampling
from models.manage_models import save_model
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.svm import SVC
from imblearn.pipeline import Pipeline as ImbPipeline
from sklearn.pipeline import Pipeline
import optuna
import numpy as np
import matplotlib.pyplot as plt

In [2]:
f2_scorer = make_scorer(fbeta_score, beta=2)

In [3]:
train_df = load_split('train', data_dir='dataset/splits')
X_train, y_train = prepare_features_target(train_df, target_col='Fault')

In [4]:
def objective(trial):
    sampling_strategy = trial.suggest_categorical("sampling", ["none", "under"])

    kernel = trial.suggest_categorical('kernel', ['rbf', 'sigmoid']) 
    C = trial.suggest_float('C', 1e-2, 1e2, log=True) 
    
    gamma = trial.suggest_categorical('gamma', ['scale', 'auto'])

    steps = [('scaler', StandardScaler())]

    if sampling_strategy == "under":
        steps.append(('resample', RandomUnderSampler(random_state=42)))

    model = SVC(
        kernel=kernel,
        C=C,
        gamma=gamma,
        class_weight='balanced', 
        cache_size=2000, 
        tol=1e-3,       
        random_state=42
    )

    steps.append(('model', model))
    pipeline = ImbPipeline(steps)

    cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
    
    scores = cross_val_score(pipeline, X_train, y_train, n_jobs=-1, cv=cv, scoring=f2_scorer)
    
    return scores.mean()

In [5]:
study = optuna.create_study(direction="maximize", study_name='svm_opt')
study.optimize(objective, n_trials=30, show_progress_bar=True) 

print(f"Mejor score (f2_scorer): {study.best_value}")
print(f"Mejores parámetros: {study.best_params}")

[I 2025-12-22 13:15:43,113] A new study created in memory with name: svm_opt


  0%|          | 0/30 [00:00<?, ?it/s]

[I 2025-12-22 13:17:50,837] Trial 0 finished with value: 0.8687714818256752 and parameters: {'sampling': 'none', 'kernel': 'rbf', 'C': 69.84841896499474, 'gamma': 'auto'}. Best is trial 0 with value: 0.8687714818256752.
[I 2025-12-22 13:18:34,797] Trial 1 finished with value: 0.8576590732408196 and parameters: {'sampling': 'under', 'kernel': 'rbf', 'C': 11.700725288595471, 'gamma': 'scale'}. Best is trial 0 with value: 0.8687714818256752.
[I 2025-12-22 13:19:47,690] Trial 2 finished with value: 0.609491531274618 and parameters: {'sampling': 'none', 'kernel': 'sigmoid', 'C': 0.4278132126221895, 'gamma': 'scale'}. Best is trial 0 with value: 0.8687714818256752.
[I 2025-12-22 13:20:31,642] Trial 3 finished with value: 0.8524009363395318 and parameters: {'sampling': 'under', 'kernel': 'rbf', 'C': 6.5854009830757185, 'gamma': 'auto'}. Best is trial 0 with value: 0.8687714818256752.
[I 2025-12-22 13:21:49,452] Trial 4 finished with value: 0.6092492229429723 and parameters: {'sampling': 'none

In [6]:
df = study.trials_dataframe()

In [7]:
df.head()

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_C,params_gamma,params_kernel,params_sampling,state
0,0,0.868771,2025-12-22 13:15:43.121627,2025-12-22 13:17:50.837517,0 days 00:02:07.715890,69.848419,auto,rbf,none,COMPLETE
1,1,0.857659,2025-12-22 13:17:50.840113,2025-12-22 13:18:34.796899,0 days 00:00:43.956786,11.700725,scale,rbf,under,COMPLETE
2,2,0.609492,2025-12-22 13:18:34.801569,2025-12-22 13:19:47.690452,0 days 00:01:12.888883,0.427813,scale,sigmoid,none,COMPLETE
3,3,0.852401,2025-12-22 13:19:47.694454,2025-12-22 13:20:31.642587,0 days 00:00:43.948133,6.585401,auto,rbf,under,COMPLETE
4,4,0.609249,2025-12-22 13:20:31.646265,2025-12-22 13:21:49.452076,0 days 00:01:17.805811,0.789155,scale,sigmoid,none,COMPLETE


In [8]:
optuna.visualization.plot_optimization_history(study)

In [9]:
optuna.visualization.plot_param_importances(study)

In [10]:
optuna.visualization.plot_contour(study, params=["kernel", "gamma"])

In [11]:
params = study.best_params.copy()
sampling = params.pop('sampling')

params

{'kernel': 'rbf', 'C': 69.84841896499474, 'gamma': 'auto'}

In [12]:
final_model = SVC(**params, 
                  random_state=42, 
                  class_weight='balanced', 
                  cache_size=2000, 
                  tol=1e-3)

In [13]:
final_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', final_model)
])