In [122]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, balanced_accuracy_score

In [123]:
def trend_changes_score(y_test: np.array, y_pred: np.array) -> float:
    """
    Calculate the trend changes score based on the test and predicted values.
    
    Args:
        y_test (np.array): True labels.
        y_pred (np.array): Predicted labels.
        
    Returns:
        float: The trend changes score.
    """
    y_df = pd.DataFrame([y_test, y_pred]).T
    y_df.columns = ["y_test", "y_pred"]
    y_df["y_test_shifted"] = y_df["y_test"].shift(-1)
    y_df["is_changed_trend_test"] = y_df["y_test"] != y_df["y_test_shifted"]
    y_df["y_predict_shifted"] = y_df["y_pred"].shift(-1)
    y_df["is_changed_trend_predict"] = y_df["y_pred"] != y_df["y_predict_shifted"]
    return classification_report(y_df["is_changed_trend_test"][:-1], y_df["is_changed_trend_predict"][:-1], digits=4)

In [124]:
data = pd.read_csv("../../../data/training_set.csv", parse_dates=['date'])

In [125]:
X = data.drop(columns=['date', 'target_trend']).values
y = data['target_trend'].values

model = RandomForestClassifier(
    n_estimators=200, # numero de arboles
    max_depth=5, # profundidad maxima de cada arbol
    random_state=42 # semilla para la aleatoriedad
)
model.fit(X, y)

validation_set = pd.read_csv("../../../data/validation_set.csv")
x_validation = validation_set.drop(columns=['date', 'target_trend']).values
y_validation = validation_set['target_trend'].values

y_pred = model.predict(x_validation)

report = classification_report(y_validation, y_pred, digits=4)
print(f"RandomForest report:\n {report}")
print("Balanced accuracy:", balanced_accuracy_score(y_validation, y_pred))

RandomForest report:
               precision    recall  f1-score   support

          -1     0.7857    0.9167    0.8462        24
           0     0.2041    0.5556    0.2985        18
           1     0.9434    0.5682    0.7092        88

    accuracy                         0.6308       130
   macro avg     0.6444    0.6801    0.6180       130
weighted avg     0.8119    0.6308    0.6776       130

Balanced accuracy: 0.6801346801346803


In [126]:
print("\nRandomForest trend_changes_score:\n",
    trend_changes_score(y_test=y_validation, y_pred=y_pred))


RandomForest trend_changes_score:
               precision    recall  f1-score   support

       False     0.8909    0.8750    0.8829       112
        True     0.2632    0.2941    0.2778        17

    accuracy                         0.7984       129
   macro avg     0.5770    0.5846    0.5803       129
weighted avg     0.8082    0.7984    0.8031       129

