In [66]:
import numpy as np
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier, HistGradientBoostingClassifier
from sklearn.metrics import classification_report, balanced_accuracy_score

In [67]:
def trend_changes_score(y_test: np.array, y_pred: np.array) -> float:
    """
    Calculate the trend changes score based on the test and predicted values.
    
    Args:
        y_test (np.array): True labels.
        y_pred (np.array): Predicted labels.
        
    Returns:
        float: The trend changes score.
    """
    y_df = pd.DataFrame([y_test, y_pred]).T
    y_df.columns = ["y_test", "y_pred"]
    y_df["y_test_shifted"] = y_df["y_test"].shift(-1)
    y_df["is_changed_trend_test"] = y_df["y_test"] != y_df["y_test_shifted"]
    y_df["y_predict_shifted"] = y_df["y_pred"].shift(-1)
    y_df["is_changed_trend_predict"] = y_df["y_pred"] != y_df["y_predict_shifted"]
    return classification_report(y_df["is_changed_trend_test"][:-1], y_df["is_changed_trend_predict"][:-1], digits=4)

In [68]:
data = pd.read_csv("../../../data/training_set.csv", parse_dates=['date'])

In [69]:
X = data.drop(columns=['date', 'target_trend']).values
y = data['target_trend'].values

In [70]:
# Entrenamiento con GradientBoostingClassifier (árboles secuenciales)
gb_model = GradientBoostingClassifier(
    n_estimators=200, #500      # número de etapas (árboles débiles)
    learning_rate=0.05,    # shrinkage; bajar si subes n_estimators
    max_depth=5,           # profundidad de cada árbol
    subsample=0.9,         # <1.0 introduce estocasticidad (stochastic GB)
    random_state=42        # semilla para la aleatoriedad
)
gb_model.fit(X, y)

# Validación
validation_set = pd.read_csv("../../../data/validation_set.csv", parse_dates=['date'])
x_validation = validation_set.drop(columns=['date', 'target_trend']).values
y_validation = validation_set['target_trend'].values
y_pred = gb_model.predict(x_validation)
report = classification_report(y_validation, y_pred, digits=4)
print("GradientBoostingClassifier report:\n", report)
print("Balanced accuracy:", balanced_accuracy_score(y_validation, y_pred))

GradientBoostingClassifier report:
               precision    recall  f1-score   support

          -1     0.2706    0.9583    0.4220        24
           0     0.4167    0.2778    0.3333        18
           1     0.9394    0.3523    0.5124        88

    accuracy                         0.4538       130
   macro avg     0.5422    0.5295    0.4226       130
weighted avg     0.7435    0.4538    0.4709       130

Balanced accuracy: 0.5294612794612795


In [71]:
print("\nGradientBoostingClassifier trend_changes_score:\n",
    trend_changes_score(y_test=y_validation, y_pred=y_pred))


GradientBoostingClassifier trend_changes_score:
               precision    recall  f1-score   support

       False     0.8583    0.9196    0.8879       112
        True     0.0000    0.0000    0.0000        17

    accuracy                         0.7984       129
   macro avg     0.4292    0.4598    0.4440       129
weighted avg     0.7452    0.7984    0.7709       129



In [79]:
# (Opcional) Versión más rápida y escalable: HistGradientBoostingClassifier
hgb_model = HistGradientBoostingClassifier(
    learning_rate=0.05,
    max_depth=6,          # profundidades algo mayores suelen ir bien aquí
    max_iter=400,         # equivalente a n_estimators
    random_state=42
)
hgb_model.fit(X, y)
y_pred_hgb = hgb_model.predict(x_validation)
print("\nHistGradientBoostingClassifier report:\n",
      classification_report(y_validation, y_pred_hgb, digits=4))
print("Balanced accuracy:", balanced_accuracy_score(y_validation, y_pred_hgb))


HistGradientBoostingClassifier report:
               precision    recall  f1-score   support

          -1     0.8261    0.7917    0.8085        24
           0     0.3333    0.5556    0.4167        18
           1     0.9481    0.8295    0.8848        88

    accuracy                         0.7846       130
   macro avg     0.7025    0.7256    0.7033       130
weighted avg     0.8404    0.7846    0.8059       130

Balanced accuracy: 0.7255892255892257


In [80]:
print("\nHistGradientBoostingClassifier trend_changes_score:\n",
    trend_changes_score(y_test=y_validation, y_pred=y_pred_hgb))  


HistGradientBoostingClassifier trend_changes_score:
               precision    recall  f1-score   support

       False     0.8878    0.7768    0.8286       112
        True     0.1935    0.3529    0.2500        17

    accuracy                         0.7209       129
   macro avg     0.5407    0.5649    0.5393       129
weighted avg     0.7963    0.7209    0.7523       129

