In [17]:
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.metrics import classification_report

In [18]:
def trend_changes_score(y_test: np.array, y_pred: np.array) -> float:
    """
    Calculate the trend changes score based on the test and predicted values.
    
    Args:
        y_test (np.array): True labels.
        y_pred (np.array): Predicted labels.
        
    Returns:
        float: The trend changes score.
    """
    y_df = pd.DataFrame([y_test, y_pred]).T
    y_df.columns = ["y_test", "y_pred"]
    y_df["y_test_shifted"] = y_df["y_test"].shift(-1)
    y_df["is_changed_trend_test"] = y_df["y_test"] != y_df["y_test_shifted"]
    y_df["y_predict_shifted"] = y_df["y_pred"].shift(-1)
    y_df["is_changed_trend_predict"] = y_df["y_pred"] != y_df["y_predict_shifted"]
    return classification_report(y_df["is_changed_trend_test"][:-1], y_df["is_changed_trend_predict"][:-1], digits=4)

In [19]:
# Cargar datasets
train_df = pd.read_csv("../../../data/training_set.csv", parse_dates=['date'])
val_df   = pd.read_csv("../../../data/validation_set.csv", parse_dates=['date'])

In [20]:
X_train = train_df.drop(columns=['date','target_trend']).values

In [21]:
y_train = train_df['target_trend'].values

In [22]:
X_val   = val_df.drop(columns=['date','target_trend']).values

In [23]:
y_val   = val_df['target_trend'].values

In [24]:
# Mapear clases (-1,0,1) -> (0,1,2) para LightGBM
cls_map = {-1:0, 0:1, 1:2}
inv_map = {v:k for k,v in cls_map.items()}
y_train_m = np.vectorize(cls_map.get)(y_train)
y_val_m   = np.vectorize(cls_map.get)(y_val)

# Pesos de clase (inverso de la frecuencia)
from collections import Counter
cnt = Counter(y_train_m)
total = len(y_train_m)
class_weight = {c: total/(len(cnt)*n) for c,n in cnt.items()}

lgb_train = lgb.Dataset(X_train, label=y_train_m, weight=[class_weight[c] for c in y_train_m])
lgb_val   = lgb.Dataset(X_val,   label=y_val_m,   weight=[class_weight[c] for c in y_val_m], reference=lgb_train)

params = dict(
    objective="multiclass",
    num_class=3,
    learning_rate=0.05,
    num_leaves=31,
    max_depth=-1,
    feature_fraction=0.9,
    bagging_fraction=0.8,
    bagging_freq=1,
    min_data_in_leaf=20,
    metric=["multi_logloss","multi_error"],
    verbosity=-1,
    seed=42
)

model = lgb.train(
    params,
    lgb_train,
    num_boost_round=2000,
    valid_sets=[lgb_train, lgb_val],
    valid_names=["train","val"],
    callbacks=[
        lgb.early_stopping(stopping_rounds=100, verbose=True),
        lgb.log_evaluation(period=100)
    ]
)

# Predicciones validation
y_val_prob = model.predict(X_val, num_iteration=model.best_iteration)
y_val_pred_m = y_val_prob.argmax(axis=1)
y_val_pred = np.vectorize(inv_map.get)(y_val_pred_m)

print("Reporte multiclase:")
print(classification_report(y_val, y_val_pred, digits=4))

Training until validation scores don't improve for 100 rounds
[100]	train's multi_logloss: 0.0481828	train's multi_error: 0	val's multi_logloss: 0.436619	val's multi_error: 0.1565
Early stopping, best iteration is:
[7]	train's multi_logloss: 0.727406	train's multi_error: 0.118679	val's multi_logloss: 0.771903	val's multi_error: 0.103731
Reporte multiclase:
              precision    recall  f1-score   support

          -1     0.8519    0.9583    0.9020        24
           0     0.5625    0.5000    0.5294        18
           1     0.9425    0.9318    0.9371        88

    accuracy                         0.8769       130
   macro avg     0.7856    0.7967    0.7895       130
weighted avg     0.8732    0.8769    0.8742       130



In [25]:
print(trend_changes_score(y_test=y_val, y_pred=y_val_pred))

              precision    recall  f1-score   support

       False     0.8684    0.8839    0.8761       112
        True     0.1333    0.1176    0.1250        17

    accuracy                         0.7829       129
   macro avg     0.5009    0.5008    0.5006       129
weighted avg     0.7715    0.7829    0.7771       129

