In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from imblearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler
from sklearn.model_selection import learning_curve, StratifiedKFold, cross_val_score
from sklearn.metrics import f1_score, make_scorer, classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.svm import SVC
import optuna
from models.manage_models import save_model

In [2]:
f1_macro = make_scorer(f1_score, average="macro")

In [None]:
df = pd.read_csv("dataset/gas_turbine_fault_detection_simulated3.csv")  # ajusta ruta/nombre

df_fault = df[df["Fault"] == 1].copy()

cols_drop = ["Turbine ID", "TTC", "Fault", "Fault Mode"]  # ajusta nombres exactos
X = df_fault.drop(columns=cols_drop)
y = df_fault["Fault Mode"]  # multiclase

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

In [None]:
def objective(trial):
    scaler_name = trial.suggest_categorical('scaler', ['standard', 'robust', 'minmax'])
    if scaler_name == 'standard':
        scaler = StandardScaler()
    elif scaler_name == 'robust':
        scaler = RobustScaler()
    else:
        scaler = MinMaxScaler()

    kernel = trial.suggest_categorical('kernel', ['rbf', 'sigmoid'])
    
    C = trial.suggest_float('C', 1e-4, 1e3, log=True)
    
    gamma = trial.suggest_float('gamma', 1e-5, 10, log=True)

    params = {
        'kernel': kernel,
        'C': C,
        'gamma': gamma,
        'class_weight': 'balanced',
        'cache_size': 2000, 
        'random_state': 42
    }
    
    if kernel == 'sigmoid':
        params['coef0'] = trial.suggest_float('coef0', -10.0, 10.0)

    model = SVC(**params)

    pipeline = Pipeline([
        ('scaler', scaler),
        ('model', model)
    ])

    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    
    scores = cross_val_score(pipeline, X_train, y_train, n_jobs=-1, cv=cv, scoring=f1_macro)
    
    return scores.mean()