In [1]:
import numpy as np
import random
import pandas as pd
import optuna
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
SEED = 1234
SCORE = "f1-score"

In [3]:
def set_seeds(seed):
    random.seed(seed)
    np.random.seed(seed)

In [4]:
def trend_changes_score(y_test: np.array, y_pred: np.array) -> float:
    """
    Calculate the trend changes score based on the test and predicted values.
    
    Args:
        y_test (np.array): True labels.
        y_pred (np.array): Predicted labels.
        
    Returns:
        float: The trend changes score.
    """
    y_df = pd.DataFrame([y_test, y_pred]).T
    y_df.columns = ["y_test", "y_pred"]
    y_df["y_test_shifted"] = y_df["y_test"].shift(-1)
    y_df["is_changed_trend_test"] = y_df["y_test"] != y_df["y_test_shifted"]
    y_df["y_predict_shifted"] = y_df["y_pred"].shift(-1)
    y_df["is_changed_trend_predict"] = y_df["y_pred"] != y_df["y_predict_shifted"]
    return classification_report(y_df["is_changed_trend_test"][:-1], y_df["is_changed_trend_predict"][:-1], digits=4)

def trend_changes_true(y_test: np.array, y_pred: np.array) -> float:
    """
    Calculate the trend changes score based on the test and predicted values.
    
    Args:
        y_test (np.array): True labels.
        y_pred (np.array): Predicted labels.
        
    Returns:
        float: The trend changes score.
    """
    y_df = pd.DataFrame([y_test, y_pred]).T
    y_df.columns = ["y_test", "y_pred"]
    y_df["y_test_shifted"] = y_df["y_test"].shift(-1)
    y_df["is_changed_trend_test"] = y_df["y_test"] != y_df["y_test_shifted"]
    y_df["y_predict_shifted"] = y_df["y_pred"].shift(-1)
    y_df["is_changed_trend_predict"] = y_df["y_pred"] != y_df["y_predict_shifted"]
    report = classification_report(
        y_df["is_changed_trend_test"][:-1],
        y_df["is_changed_trend_predict"][:-1],
        output_dict=True,
        zero_division=0
    )
    return report["True"][SCORE]

In [5]:
# Cargar datos
train = pd.read_csv("../../../data/training_set.csv", parse_dates=["date"])
val = pd.read_csv("../../../data/validation_set.csv", parse_dates=["date"])
X_train = train.drop(columns=["date", "target_trend"]).values
y_train = train["target_trend"].values
X_val = val.drop(columns=["date", "target_trend"]).values
y_val = val["target_trend"].values

In [6]:
def objective(trial):
    set_seeds(SEED)
    C = trial.suggest_float("C", 1e-3, 100, log=True)
    kernel = trial.suggest_categorical("kernel", ["rbf", "linear", "poly", "sigmoid"])
    gamma = trial.suggest_categorical("gamma", ["scale", "auto"])
    degree = 3
    if kernel == "poly":
        degree = trial.suggest_int("degree", 2, 5)
    else:
        degree = 3  # default

    model = Pipeline([
        ("scaler", StandardScaler()),
        ("svc", SVC(
            C=C,
            kernel=kernel,
            gamma=gamma,
            degree=degree,
            random_state=SEED,
            decision_function_shape="ovr"
        ))
    ])
    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)
    score = trend_changes_true(y_val, y_pred)
    return score

In [None]:
set_seeds(SEED)
study = optuna.create_study(
    direction="maximize",
    sampler=optuna.samplers.TPESampler(seed=SEED)
)
study.optimize(objective, n_trials=150)

[I 2025-08-17 23:20:38,421] A new study created in memory with name: no-name-c315047b-3dbe-49c0-be21-061cd898e1a6
[I 2025-08-17 23:20:38,462] Trial 0 finished with value: 0.09090909090909091 and parameters: {'C': 0.009069790423538587, 'kernel': 'poly', 'gamma': 'auto', 'degree': 5}. Best is trial 0 with value: 0.09090909090909091.
[I 2025-08-17 23:20:38,496] Trial 1 finished with value: 0.24489795918367346 and parameters: {'C': 61.75850414870778, 'kernel': 'rbf', 'gamma': 'scale'}. Best is trial 1 with value: 0.24489795918367346.
[I 2025-08-17 23:20:38,526] Trial 2 finished with value: 0.0 and parameters: {'C': 0.6397067456356109, 'kernel': 'sigmoid', 'gamma': 'auto'}. Best is trial 1 with value: 0.24489795918367346.
[I 2025-08-17 23:20:38,552] Trial 3 finished with value: 0.0 and parameters: {'C': 0.002381805030702129, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 1 with value: 0.24489795918367346.
[I 2025-08-17 23:20:38,582] Trial 4 finished with value: 0.23529411764705882 and

In [8]:
print("Mejores hiperparámetros encontrados:")
print(study.best_params)
print(f"Mejor score de {SCORE}: {study.best_value:.4f}")

Mejores hiperparámetros encontrados:
{'C': 18.670850184604408, 'kernel': 'linear', 'gamma': 'scale'}
Mejor score de f1-score: 0.3404


In [9]:
# Entrenar modelo final con los mejores hiperparámetros
set_seeds(SEED)
best_params = study.best_params
final_model = Pipeline([
    ("scaler", StandardScaler()),
    ("svc", SVC(
        C=best_params["C"],
        kernel=best_params["kernel"],
        gamma=best_params["gamma"],
        degree=best_params.get("degree", 3),
        random_state=SEED,
        decision_function_shape="ovr"
    ))
])
final_model.fit(X_train, y_train)
y_pred_final = final_model.predict(X_val)
print("SVM FINAL trend_changes_score:\n", trend_changes_score(y_val, y_pred_final))

SVM FINAL trend_changes_score:
               precision    recall  f1-score   support

       False     0.9091    0.8036    0.8531       112
        True     0.2667    0.4706    0.3404        17

    accuracy                         0.7597       129
   macro avg     0.5879    0.6371    0.5968       129
weighted avg     0.8244    0.7597    0.7855       129

