In [12]:
# Import packages
import numpy as np
import os
from tensorflow.keras.models import load_model # type: ignore
import tensorflow as tf
import pandas as pd
import json

# Local imports
from src.lstm_2.model_generation.helper_methods import generate_data
from src.data_processing.lstm_data_preprocessing import FeaturesConfig


In [13]:
# Custom loss function
def weighted_mse_large_moves(y_true, y_pred):
    diff = y_true - y_pred
    weight = tf.math.square(y_true)
    return tf.reduce_mean(weight * tf.square(diff))

# Empty dictionary to hold all models
models = {}

# Create placeholder variable names (or models if already trained)
batch_sizes = [16, 32, 64, 128, 256]

for feature_set in range(1, 4):  # 1 to 3
    for batch_size in batch_sizes:
        model_folder = f"../model_generation/trained_models/lstm_{feature_set}_{batch_size}"
        model_name = f"model_{feature_set}_{batch_size}"
        with open(os.path.join(model_folder, "config.json"), 'r') as f:
            configs = json.load(f)

        features_config = configs["features_config"]
        time_bucket_folder = configs["time_bucket_folder"]
        test_size = configs["test_size"]
        model = load_model(os.path.join(model_folder, "model.keras"),
            custom_objects={'weighted_mse_large_moves': weighted_mse_large_moves})
        models[model_name] = {"model": model, "feature_config": features_config, "time_bucket_folder": time_bucket_folder, "test_size": test_size}
        break

In [17]:
# Get unique feature config train set test size combinations

unique_configs = set()

for model_info in models.values():
    feature_config = frozenset(model_info["feature_config"].items())  # dict → frozenset of (key, value)
    config = (
        feature_config,
        model_info["time_bucket_folder"],
        model_info["test_size"]
    )
    unique_configs.add(config)

# 2. Now unique_configs contains all unique setups
#    You can generate/load your train/test datasets once per config

datasets = {}

for config in unique_configs:
    feature_config_frozen, time_bucket_folder, test_size = config

    # Convert frozenset back to dict
    features_config = FeaturesConfig(**model_info["feature_config"])

    # Generate/load your train/test data
    X_train, X_test, y_train, y_test, X_scaler, y_scaler = generate_data(
        features_config=features_config,
        time_bucket_folder=time_bucket_folder,
        test_size=test_size
    )

    datasets[config] = {
        "X_train": X_train,
        "X_test": X_test,
        "y_train": y_train,
        "y_test": y_test,
        "X_scaler": X_scaler,
        "y_scaler": y_scaler
    }


MemoryError: Unable to allocate 765. MiB for an array with shape (25063500, 4) and data type float64

In [15]:
# Get results for each model

model_results = {}

for model_name, model_info in models.items():
    config_key = (
        frozenset(model_info["feature_config"].items()),
        model_info["time_bucket_folder"],
        model_info["test_size"]
    )

    data = datasets[config_key]

    X_test = data["X_test"]
    y_test = data["y_test"]

    y_pred = model.predict(X_test)

    # Inverse transform to get real values
    y_pred_actual = data["y_scaler"].inverse_transform(y_pred)
    y_test_actual = data["y_scaler"].inverse_transform(y_test)

    model_results[model_name] = {"pred": y_pred_actual, "real": y_test_actual}


[1m523/523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 146ms/step


KeyError: 'y_scaler'

In [None]:
import numpy as np

def large_move_metrics(y_true, y_pred, threshold=0.02):
    """
    Computes metrics focused only on large price moves.
    
    Parameters:
    - y_true: np.ndarray, true target values
    - y_pred: np.ndarray, predicted target values
    - threshold: float, defines a 'large move' (e.g., 0.02 = 2%)
    
    Returns:
    - metrics: dict with direction accuracy, MSE, MAE, count of large moves
    """
    y_true = np.array(y_true).flatten()
    y_pred = np.array(y_pred).flatten()

    # Select only large moves based on true values
    large_moves_idx = np.where(np.abs(y_true) >= threshold)[0]

    if len(large_moves_idx) == 0:
        print("Warning: No large moves found above threshold.")
        return None

    y_true_large = y_true[large_moves_idx]
    y_pred_large = y_pred[large_moves_idx]

    # Directional accuracy (sign matching)
    true_sign = np.sign(y_true_large)
    pred_sign = np.sign(y_pred_large)
    directional_accuracy = np.mean(true_sign == pred_sign)

    # MSE and MAE only for large moves
    mse = np.mean(np.square(y_true_large - y_pred_large))
    mae = np.mean(np.abs(y_true_large - y_pred_large))

    metrics = {
        'directional_accuracy': directional_accuracy,
        'mse': mse,
        'mae': mae,
        'large_move_count': len(large_moves_idx)
    }

    return metrics


In [None]:
import matplotlib.pyplot as plt

# Let's assume you have already defined the `large_move_metrics` function
# and the model_results dictionary where each entry is the predictions and real values for each model

# Store the metrics for each model
model_comparison = []

for model_name, results in model_results.items():
    y_pred = results["pred"]
    y_real = results["real"]
    
    # Compute the large move metrics (directional accuracy, etc.)
    metrics = large_move_metrics(y_real, y_pred, threshold=0.02)
    
    # Save the metrics for this model
    model_comparison.append({
        "model_name": model_name,
        "directional_accuracy": metrics["directional_accuracy"],
        "large_move_mse": metrics["large_move_mse"]
    })

# Convert the results into a DataFrame for easy plotting
import pandas as pd
df_comparison = pd.DataFrame(model_comparison)

# Plotting the comparison
fig, ax = plt.subplots(figsize=(10, 6))

# Bar plot for directional accuracy
ax.bar(df_comparison["model_name"], df_comparison["directional_accuracy"], width=0.4, label="Directional Accuracy", align="center")

# Bar plot for large move MSE
ax.bar(df_comparison["model_name"], df_comparison["large_move_mse"], width=0.4, label="Large Move MSE", align="edge")

# Add labels and title
ax.set_xlabel("Model")
ax.set_ylabel("Metric Value")
ax.set_title("Comparison of Models using Large Move Metrics")
ax.legend()

# Rotate x-axis labels to make them readable
plt.xticks(rotation=45, ha='right')

# Show the plot
plt.tight_layout()
plt.show()
