In [20]:
import numpy as np
import random
import pandas as pd
import optuna
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from optuna.visualization import plot_optimization_history
import os
import json


In [9]:
SEED = 1234
SCORE = "f1-score"

In [10]:
def set_seeds(seed):
    random.seed(seed)
    np.random.seed(seed)

In [11]:
def get_trend_changes_report_dict(y_test: np.array, y_pred: np.array) -> float:
    """
    Calculate the trend changes score based on the test and predicted values.
    
    Args:
        y_test (np.array): True labels.
        y_pred (np.array): Predicted labels.
        
    Returns:
        float: The trend changes score.
    """
    y_df = pd.DataFrame([y_test, y_pred]).T
    y_df.columns = ["y_test", "y_pred"]
    y_df["y_test_shifted"] = y_df["y_test"].shift(-1)
    y_df["is_changed_trend_test"] = y_df["y_test"] != y_df["y_test_shifted"]
    y_df["y_predict_shifted"] = y_df["y_pred"].shift(-1)
    y_df["is_changed_trend_predict"] = y_df["y_pred"] != y_df["y_predict_shifted"]
    return classification_report(
        y_df["is_changed_trend_test"][:-1], 
        y_df["is_changed_trend_predict"][:-1], 
        digits=4,
        output_dict=True,
        zero_division=0
    )

def trend_changes_score(y_test: np.array, y_pred: np.array) -> float:
    """
    Calculate the trend changes score based on the test and predicted values.
    
    Args:
        y_test (np.array): True labels.
        y_pred (np.array): Predicted labels.
        
    Returns:
        float: The trend changes score.
    """
    y_df = pd.DataFrame([y_test, y_pred]).T
    y_df.columns = ["y_test", "y_pred"]
    y_df["y_test_shifted"] = y_df["y_test"].shift(-1)
    y_df["is_changed_trend_test"] = y_df["y_test"] != y_df["y_test_shifted"]
    y_df["y_predict_shifted"] = y_df["y_pred"].shift(-1)
    y_df["is_changed_trend_predict"] = y_df["y_pred"] != y_df["y_predict_shifted"]
    return classification_report(y_df["is_changed_trend_test"][:-1], y_df["is_changed_trend_predict"][:-1], digits=4)

def trend_changes_true(y_test: np.array, y_pred: np.array) -> float:
    """
    Calculate the trend changes score based on the test and predicted values.
    
    Args:
        y_test (np.array): True labels.
        y_pred (np.array): Predicted labels.
        
    Returns:
        float: The trend changes score.
    """
    y_df = pd.DataFrame([y_test, y_pred]).T
    y_df.columns = ["y_test", "y_pred"]
    y_df["y_test_shifted"] = y_df["y_test"].shift(-1)
    y_df["is_changed_trend_test"] = y_df["y_test"] != y_df["y_test_shifted"]
    y_df["y_predict_shifted"] = y_df["y_pred"].shift(-1)
    y_df["is_changed_trend_predict"] = y_df["y_pred"] != y_df["y_predict_shifted"]
    report = classification_report(
        y_df["is_changed_trend_test"][:-1],
        y_df["is_changed_trend_predict"][:-1],
        output_dict=True,
        zero_division=0
    )
    return report["True"][SCORE]

In [12]:
# Cargar datos
train = pd.read_csv("../../../data/post_cleaning/training_set.csv", parse_dates=["date"])
val = pd.read_csv("../../../data/post_cleaning/validation_set.csv", parse_dates=["date"])
test_set = pd.read_csv("../../../data/post_cleaning/test_set.csv", parse_dates=['date'])
X_train = train.drop(columns=["date", "target_trend"]).values
y_train = train["target_trend"].values
X_val = val.drop(columns=["date", "target_trend"]).values
y_val = val["target_trend"].values
X_test = test_set.drop(columns=["date", "target_trend"]).values
y_test = test_set["target_trend"].values

In [13]:
def objective(trial):
    set_seeds(SEED)
    C = trial.suggest_float("C", 1e-3, 100, log=True)
    kernel = trial.suggest_categorical("kernel", ["rbf", "linear", "poly", "sigmoid"])
    gamma = trial.suggest_categorical("gamma", ["scale", "auto"])
    degree = 3
    if kernel == "poly":
        degree = trial.suggest_int("degree", 2, 5)
    else:
        degree = 3  # default

    model = Pipeline([
        ("scaler", StandardScaler()),
        ("svc", SVC(
            C=C,
            kernel=kernel,
            gamma=gamma,
            degree=degree,
            random_state=SEED,
            decision_function_shape="ovr"
        ))
    ])
    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)
    score = trend_changes_true(y_val, y_pred)
    return score

In [14]:
set_seeds(SEED)
study = optuna.create_study(
    direction="maximize",
    sampler=optuna.samplers.TPESampler(seed=SEED)
)
study.optimize(objective, n_trials=150)

[I 2025-08-24 15:48:22,341] A new study created in memory with name: no-name-cc32f03b-7af9-41f9-808b-a6822552fe12


[I 2025-08-24 15:48:22,476] Trial 0 finished with value: 0.047619047619047616 and parameters: {'C': 0.009069790423538587, 'kernel': 'poly', 'gamma': 'auto', 'degree': 5}. Best is trial 0 with value: 0.047619047619047616.
[I 2025-08-24 15:48:22,582] Trial 1 finished with value: 0.1651376146788991 and parameters: {'C': 61.75850414870778, 'kernel': 'rbf', 'gamma': 'scale'}. Best is trial 1 with value: 0.1651376146788991.
[I 2025-08-24 15:48:22,668] Trial 2 finished with value: 0.16216216216216217 and parameters: {'C': 0.6397067456356109, 'kernel': 'sigmoid', 'gamma': 'auto'}. Best is trial 1 with value: 0.1651376146788991.
[I 2025-08-24 15:48:22,736] Trial 3 finished with value: 0.18181818181818182 and parameters: {'C': 0.002381805030702129, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 3 with value: 0.18181818181818182.
[I 2025-08-24 15:48:22,839] Trial 4 finished with value: 0.2033898305084746 and parameters: {'C': 0.692617185326897, 'kernel': 'rbf', 'gamma': 'auto'}. Best is tri

In [15]:
# Visualizar el historial de optimización
plot_optimization_history(study)

In [16]:
print("Mejores hiperparámetros encontrados:")
print(study.best_params)
print(f"Mejor score de {SCORE}: {study.best_value:.4f}")

Mejores hiperparámetros encontrados:
{'C': 10.337603131701117, 'kernel': 'linear', 'gamma': 'auto'}
Mejor score de f1-score: 0.3953


In [21]:
# Guardar los mejores hiperparámetros y su valor
history = []
if os.path.exists("best_hyperparams.json"):
    try:
        with open("best_hyperparams.json", "r") as f:
            history = json.load(f)
    except (json.JSONDecodeError, ValueError):
        history = []

# Guardar ambos en un solo diccionario
history.append({
    "params": study.best_params,
    "value": study.best_value
})

with open("best_hyperparams.json", "w") as f:
    json.dump(history, f, indent=2)

CARGAR HIPERPARAMETROS DESDE JSON

In [None]:
# # Cargar historial de hiperparámetros y valores
# with open("best_hyperparams.json", "r") as f:
#     history = json.load(f)

# # Escoger el último (más reciente)
# best_params = history[-1]["params"]
# best_value = history[-1]["value"]

# # Si quieres ver todos:
# for i, entry in enumerate(history):
#     print(f"Hiperparámetros #{i+1}: {entry['params']}, Valor: {entry['value']}")

# # Si quieres escoger uno específico (por índice):
# # best_params = history[indice_que_quieras]["params"]
# # best_value = history[indice_que_quieras]["value"]

In [22]:
# Entrenar modelo final con los mejores hiperparámetros
set_seeds(SEED)
best_params = study.best_params
final_model = Pipeline([
    ("scaler", StandardScaler()),
    ("svc", SVC(
        C=best_params["C"],
        kernel=best_params["kernel"],
        gamma=best_params["gamma"],
        degree=best_params.get("degree", 3),
        random_state=SEED,
        decision_function_shape="ovr"
    ))
])
final_model.fit(X_train, y_train)
y_val_pred = final_model.predict(X_val)
print("SVM FINAL trend_changes_score:\n", trend_changes_score(y_val, y_val_pred))

SVM FINAL trend_changes_score:
               precision    recall  f1-score   support

       False     0.9295    0.8649    0.8960       259
        True     0.3269    0.5000    0.3953        34

    accuracy                         0.8225       293
   macro avg     0.6282    0.6824    0.6457       293
weighted avg     0.8595    0.8225    0.8379       293



In [25]:
# Reporte completo: precisión, recall y F1 por clase
report = classification_report(y_val, y_val_pred, digits=4)
print("SVM Report:\n", report)

SVM Report:
               precision    recall  f1-score   support

          -1     0.8257    0.9574    0.8867        94
           0     0.7241    0.5122    0.6000        41
           1     0.9423    0.9245    0.9333       159

    accuracy                         0.8776       294
   macro avg     0.8307    0.7981    0.8067       294
weighted avg     0.8746    0.8776    0.8719       294



In [26]:
y_pred_test = final_model.predict(X_test)
print("SVM FINAL trend_changes_score:\n", trend_changes_score(y_test, y_pred_test))

SVM FINAL trend_changes_score:
               precision    recall  f1-score   support

       False     0.9037    0.7490    0.8191       263
        True     0.1316    0.3226    0.1869        31

    accuracy                         0.7041       294
   macro avg     0.5176    0.5358    0.5030       294
weighted avg     0.8223    0.7041    0.7525       294



In [27]:
y_pred_train = final_model.predict(X_train)
print("SVM FINAL trend_changes_score:\n", trend_changes_score(y_test, y_pred_train))

SVM FINAL trend_changes_score:
               precision    recall  f1-score   support

       False     0.1976    0.9202    0.3253       263
        True     0.8581    0.1144    0.2019      1110

    accuracy                         0.2688      1373
   macro avg     0.5278    0.5173    0.2636      1373
weighted avg     0.7316    0.2688    0.2255      1373



In [28]:
# --- Exportar y Comparar Métricas de Modelos (Validación) ---

# 1. Definir el nombre del modelo actual y el archivo de salida
model_name = 'SVM'
output_file = '../../../score_models/model_comparison_metrics.csv'

# 2. Calcular el reporte de clasificación estándar
# Usamos y_val_m y y_val_pred que están en la misma escala (0,1,2)
report_dict = classification_report(y_val, y_val_pred, output_dict=True, zero_division=0)
precision = report_dict['macro avg']['precision']
recall = report_dict['macro avg']['recall']
f1_score = report_dict['macro avg']['f1-score']


# 3. Calcular el reporte de cambio de tendencia
report = get_trend_changes_report_dict(y_val, y_val_pred)
trend_change_precision = report['True']['precision']
trend_change_recall = report['True']['recall']
trend_change_f1_score = report['True']['f1-score']

# 4. Organizar las nuevas métricas
new_metrics = {
    'precision': precision,
    'recall': recall,
    'f1_score': f1_score,
    'trend_change_precision': trend_change_precision,
    'trend_change_recall': trend_change_recall,
    'trend_change_f1_score': trend_change_f1_score
}

# 5. Cargar, actualizar y guardar el DataFrame de comparación
try:
    # Intentar cargar el archivo existente
    comparison_df = pd.read_csv(output_file, index_col='model')
    # Si existe, actualizar o añadir la fila para el modelo actual
    comparison_df.loc[model_name] = new_metrics
except FileNotFoundError:
    # Si no existe, crear un DataFrame nuevo directamente con los datos actuales
    comparison_df = pd.DataFrame([new_metrics], index=[model_name])

# Guardar el DataFrame actualizado en el CSV
comparison_df.to_csv(output_file, index_label='model')