In [1]:
import os
os.environ['PYTHONHASHSEED'] = '1234'
import random
random.seed(43)
import numpy as np
np.random.seed(43)
import pandas as pd
import optuna   
import tensorflow as tf
tf.random.set_seed(43)
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.activations import linear, relu, sigmoid
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report,  balanced_accuracy_score
from sklearn.metrics import f1_score
import gc
from tensorflow.keras import backend as K
from optuna.visualization import plot_optimization_history
import json

  from .autonotebook import tqdm as notebook_tqdm
2025-08-20 21:53:05.652769: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-08-20 21:53:06.034055: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-08-20 21:53:06.034370: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-08-20 21:53:06.107110: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-08-20 21:53:06.261435: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-08-20 21:53:06.267319:

In [None]:
SEED = 43
 # Se puede cambiar a "precision" o "recall" o "f1-score"
SCORE = "f1-score"

Score Trend Changes Score

In [3]:
def trend_changes_score(y_test: np.array, y_pred: np.array) -> float:
    """
    Calculate the trend changes score based on the test and predicted values.
    
    Args:
        y_test (np.array): True labels.
        y_pred (np.array): Predicted labels.
        
    Returns:
        float: The trend changes score.
    """
    y_df = pd.DataFrame([y_test, y_pred]).T
    y_df.columns = ["y_test", "y_pred"]
    y_df["y_test_shifted"] = y_df["y_test"].shift(-1)
    y_df["is_changed_trend_test"] = y_df["y_test"] != y_df["y_test_shifted"]
    y_df["y_predict_shifted"] = y_df["y_pred"].shift(-1)
    y_df["is_changed_trend_predict"] = y_df["y_pred"] != y_df["y_predict_shifted"]
    return classification_report(y_df["is_changed_trend_test"][:-1], y_df["is_changed_trend_predict"][:-1], digits=4)

def trend_changes_true(y_test: np.array, y_pred: np.array) -> float:
    """
    Calculate the trend changes score based on the test and predicted values.
    
    Args:
        y_test (np.array): True labels.
        y_pred (np.array): Predicted labels.
        
    Returns:
        float: The trend changes score.
    """
    y_df = pd.DataFrame([y_test, y_pred]).T
    y_df.columns = ["y_test", "y_pred"]
    y_df["y_test_shifted"] = y_df["y_test"].shift(-1)
    y_df["is_changed_trend_test"] = y_df["y_test"] != y_df["y_test_shifted"]
    y_df["y_predict_shifted"] = y_df["y_pred"].shift(-1)
    y_df["is_changed_trend_predict"] = y_df["y_pred"] != y_df["y_predict_shifted"]
    report = classification_report(
        y_df["is_changed_trend_test"][:-1],
        y_df["is_changed_trend_predict"][:-1],
        output_dict=True,
        zero_division=0
    )
    return report["True"][SCORE]

In [4]:
training_set = pd.read_csv("../../../data/training_set.csv", parse_dates=['date'])
validation_set = pd.read_csv("../../../data/validation_set.csv", parse_dates=['date'])
test_set = pd.read_csv("../../../data/test_set.csv", parse_dates=['date'])
# Cargar datos
X_train = training_set.drop(columns=['target_trend','date']).values
y_train = training_set['target_trend'].values
X_val = validation_set.drop(columns=['target_trend','date']).values
y_validation = validation_set['target_trend'].values
X_test = test_set.drop(columns=['target_trend','date']).values
y_test = test_set['target_trend'].values    

# IMPORTANTE: Hay que sumar a la columna de prediccion porque -1 no funciona en la funcion de perdida en los entrenamientos
y_train = y_train + 1
y_validation = y_validation + 1
y_test = y_test + 1

OPTIMIZACION CON OPTUNA

In [None]:
def objective(trial):
    os.environ['PYTHONHASHSEED'] = str(SEED)
    random.seed(SEED)
    np.random.seed(SEED)
    tf.random.set_seed(SEED)
    # Hiperparámetros a optimizar
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True)
    n_l1 = trial.suggest_int("n_l1", 32, 256, step=32)
    n_l2 = trial.suggest_int("n_l2", 32, 256, step=32)
    n_l3 = trial.suggest_int("n_l3", 16, 128, step=16)
    n_l4 = trial.suggest_int("n_l4", 8, 64, step=8)
    dropout_rate = trial.suggest_float("dropout_rate", 0.0, 0.5)
    activation = trial.suggest_categorical("activation", ["relu", "tanh", "selu"])
    optimizer_name = trial.suggest_categorical("optimizer", ["adam", "rmsprop", "sgd"])
    batch_size = trial.suggest_categorical("batch_size", [64, 128, 256])

    # Optimizer
    optimizer = {
        "adam": tf.keras.optimizers.Adam,
        "rmsprop": tf.keras.optimizers.RMSprop,
        "sgd": tf.keras.optimizers.SGD
    }[optimizer_name](learning_rate=learning_rate)

    # Modelo
    model = Sequential([
        tf.keras.Input(shape=(X_train.shape[1],)),
        Dense(n_l1, activation=activation),
        Dropout(dropout_rate),
        Dense(n_l2, activation=activation),
        Dropout(dropout_rate),
        Dense(n_l3, activation=activation),
        Dropout(dropout_rate),
        Dense(n_l4, activation=activation),
        Dropout(dropout_rate),
        Dense(3, activation='linear')  # 3 clases
    ])

    model.compile(
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        optimizer=optimizer,
        metrics=["accuracy"]
    )

    early_stop = tf.keras.callbacks.EarlyStopping(
        monitor="val_loss",
        patience=10,
        restore_best_weights=True
    )

    # Entrenamiento
    model.fit(
        X_train, y_train,
        validation_data=(X_val, y_validation),
        epochs=60,
        batch_size=batch_size,
        callbacks=[early_stop],
        verbose=0
    )

    # Predicción y evaluación con tu métrica personalizada
    y_pred_logits = model.predict(X_val)
    y_pred = np.argmax(y_pred_logits, axis=1)

    score = trend_changes_true(y_validation, y_pred)
    # Liberar memoria GPU y limpiar sesión
    K.clear_session()
    gc.collect()
    return score  # Este es el valor que Optuna maximiza

In [None]:
study = optuna.create_study(
    direction="maximize", 
    sampler=optuna.samplers.TPESampler(seed=SEED)
)
study.optimize(objective, n_trials=500)

In [None]:
# Visualizar el historial de optimización
plot_optimization_history(study)

In [None]:
print("Mejores hiperparámetros encontrados:")
print(study.best_params)
print(f"Mejor score de {SCORE}: {study.best_value:.4f}")
best_params = study.best_params

GUARDAR EN JSON

In [None]:
# Guardar los mejores hiperparámetros y su valor
history = []
if os.path.exists("best_hyperparams.json"):
    try:
        with open("best_hyperparams.json", "r") as f:
            history = json.load(f)
    except (json.JSONDecodeError, ValueError):
        history = []

# Guardar ambos en un solo diccionario
history.append({
    "params": study.best_params,
    "value": study.best_value
})

with open("best_hyperparams.json", "w") as f:
    json.dump(history, f, indent=2)

CARGAR HIPERPARAMETROS DESDE JSON

In [5]:
# Cargar historial de hiperparámetros y valores
with open("best_hyperparams.json", "r") as f:
    history = json.load(f)

# Escoger el último (más reciente)
best_params = history[-1]["params"]
best_value = history[-1]["value"]

# Si quieres ver todos:
for i, entry in enumerate(history):
    print(f"Hiperparámetros #{i+1}: {entry['params']}, Valor: {entry['value']}")

# Si quieres escoger uno específico (por índice):
# best_params = history[indice_que_quieras]["params"]
# best_value = history[indice_que_quieras]["value"]

Hiperparámetros #1: {'learning_rate': 1.00984195139193e-05, 'n_l1': 192, 'n_l2': 128, 'n_l3': 80, 'n_l4': 32, 'dropout_rate': 0.01560157455885968, 'activation': 'selu', 'optimizer': 'rmsprop', 'batch_size': 256}, Valor: 0.5777777777777777
Hiperparámetros #2: {'learning_rate': 1.00984195139193e-05, 'n_l1': 192, 'n_l2': 128, 'n_l3': 80, 'n_l4': 32, 'dropout_rate': 0.01560157455885968, 'activation': 'selu', 'optimizer': 'rmsprop', 'batch_size': 256}, Valor: 0.5777777777777777


In [11]:
# Carga de librerias con su semilla para garantizar reproducibilidad
import os
os.environ['PYTHONHASHSEED'] = str(SEED)
import random
random.seed(SEED)
import numpy as np
np.random.seed(SEED)
import tensorflow as tf
tf.random.set_seed(SEED)

# Modelo final con los mejores hiperparámetros
final_optimizer = {
    "adam": tf.keras.optimizers.Adam,
    "rmsprop": tf.keras.optimizers.RMSprop,
    "sgd": tf.keras.optimizers.SGD
}[best_params["optimizer"]](learning_rate=best_params["learning_rate"])
final_model = Sequential([
    Dense(best_params["n_l1"], activation=best_params["activation"]),
    Dropout(best_params["dropout_rate"]),
    Dense(best_params["n_l2"], activation=best_params["activation"]),
    Dropout(best_params["dropout_rate"]),
    Dense(best_params["n_l3"], activation=best_params["activation"]),
    Dropout(best_params["dropout_rate"]),
    Dense(best_params["n_l4"], activation=best_params["activation"]),
    Dropout(best_params["dropout_rate"]),
    Dense(3, activation='linear')  # 3 clases
])
final_model.compile(
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=final_optimizer,
    metrics=["accuracy"]
)

early_stop = EarlyStopping(
    monitor="val_loss",
    patience=10,
    restore_best_weights=True
)

history = final_model.fit(
    X_train, y_train,
    validation_data=(X_val, y_validation),
    epochs=60,
    batch_size=best_params["batch_size"],
    callbacks=[early_stop],
    verbose=0
)
# Obtener predicciones
y_pred_val = np.argmax(final_model.predict(X_val), axis=1)
print("Multiclass Neural Network Trend Changes Score:\n", trend_changes_score(y_validation, y_pred_val))

Multiclass Neural Network Trend Changes Score:
               precision    recall  f1-score   support

       False     0.9604    0.8661    0.9108       112
        True     0.4643    0.7647    0.5778        17

    accuracy                         0.8527       129
   macro avg     0.7123    0.8154    0.7443       129
weighted avg     0.8950    0.8527    0.8669       129



In [12]:
# Obtener predicciones
y_pred_test = np.argmax(final_model.predict(X_test), axis=1)
print("Multiclass Neural Network Trend Changes Score:\n", trend_changes_score(y_test, y_pred_test))

Multiclass Neural Network Trend Changes Score:
               precision    recall  f1-score   support

       False     0.9263    0.7395    0.8224       119
        True     0.1143    0.3636    0.1739        11

    accuracy                         0.7077       130
   macro avg     0.5203    0.5516    0.4982       130
weighted avg     0.8576    0.7077    0.7676       130



In [13]:
y_pred_train = np.argmax(final_model.predict(X_train), axis=1)
print("Multiclass Neural Network Trend Changes Score:\n", trend_changes_score(y_train, y_pred_train))

Multiclass Neural Network Trend Changes Score:
               precision    recall  f1-score   support

       False     0.8816    0.6861    0.7717       532
        True     0.1257    0.3288    0.1818        73

    accuracy                         0.6430       605
   macro avg     0.5036    0.5074    0.4767       605
weighted avg     0.7904    0.6430    0.7005       605



In [7]:
# Reporte completo: precisión, recall y F1 por clase
report = classification_report(y_validation, y_pred_val, digits=4)
print("Multiclass Neural Network Report:\n", report)
print("Balanced accuracy:", balanced_accuracy_score(y_validation, y_pred_val))

Multiclass Neural Network Report:
               precision    recall  f1-score   support

           0     0.7000    0.5833    0.6364        24
           1     0.5294    0.5000    0.5143        18
           2     0.9032    0.9545    0.9282        88

    accuracy                         0.8231       130
   macro avg     0.7109    0.6793    0.6929       130
weighted avg     0.8139    0.8231    0.8170       130

Balanced accuracy: 0.6792929292929294


Compilacion y entrenamiento de la red SIN OPTUNA

In [None]:
# Definición de hiperparámetros
learning_rate = 0.001
n_l1 = 100 #100
n_l2 = 60  #60
n_l3 = 30  #30
n_l4 = 10  #10

# Definición del modelo
model = Sequential(
    [               
        tf.keras.Input(shape=(X_train.shape[1],)),
        Dense(n_l1,activation='relu',name='L1'),
        Dense(n_l2,activation='relu',name='L2'),
        Dense(n_l3,activation='relu',name='L3'),
        Dense(n_l4,activation='relu',name='L4'),
        Dense(3,activation='linear',name='L5'),
    ], name = "multiclass"
)

In [None]:
model.summary()

In [None]:
# Compilación del modelo
model.compile(
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=tf.keras.optimizers.Adam(learning_rate),
    metrics=['accuracy']
)
# Detiene el entrenamiento si val_loss no mejora después de 10 épocas seguidas
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)
# checkpoint = ModelCheckpoint(
#     filepath='best_model.keras',
#     monitor='val_loss',
#     save_best_only=True
# )
history = model.fit(
    X_train,y_train,
    epochs=60,
    validation_data=(X_val, y_validation),
    callbacks=[early_stop], 
)


In [None]:
loss, accuracy = model.evaluate(X_val, y_validation)

Grafica de Loss y Accuracy en train_set & validation_set

In [None]:
# Loss
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.legend()
plt.title("Loss")
plt.xlabel("Epoch")
plt.ylabel("Value")
plt.show()

# Accuracy
plt.plot(history.history['accuracy'], label='Train Acc')
plt.plot(history.history['val_accuracy'], label='Val Acc')
plt.legend()
plt.title("Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Value")
plt.show()


F1 SCORE

In [None]:
# Obtener predicciones
y_pred_val = np.argmax(model.predict(X_val), axis=1)
# Reporte completo: precisión, recall y F1 por clase
report = classification_report(y_validation, y_pred_val, digits=4)
print("Multiclass Neural Network Report:\n", report)
print("Balanced accuracy:", balanced_accuracy_score(y_validation, y_pred_val))


In [None]:
print("Multiclass Neural Network trend_changes_score:\n",
    trend_changes_score(y_validation, y_pred_val)
)