# Model Experimenting
This notebook will work as an experiment on how well different ML models do on historical data for different stocks.

## Importing

In [1]:
from typing import Union
import numpy as np
import sys

from pathlib import Path
sys.path.append(str(Path("..").resolve()))

from live_trader.ml_model import ML_Pipeline, brier, basic_lstm, attention_bilstm
from live_trader.ml_model.layers import *

  if not hasattr(np, "object"):


In [2]:
# Tensorflow
import tensorflow as tf

from tensorflow.keras import Model
from tensorflow.keras.layers import (
    Input, LSTM, Dense, Dropout, Bidirectional,
    Attention, LayerNormalization, Add, GlobalAveragePooling1D, 
    Conv1D, MultiHeadAttention, Reshape, Lambda, GRU
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import AUC
from keras.saving import register_keras_serializable

## Testing our models that are already made

### Basic LSTM

In [3]:
side, _ = await basic_lstm("GOOG")
print(f"GOOG: {side}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 263ms/step
GOOG: SideSignal.BUY


In [4]:
side, _ = await basic_lstm("AAPL")
print(f"AAPL: {side}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 266ms/step
AAPL: SideSignal.HOLD


In [5]:
side, _ = await basic_lstm("MCFT")
print(f"MCFT: {side}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 266ms/step
MCFT: SideSignal.HOLD


### attention bilstm

In [6]:
side, _ = await attention_bilstm("GOOG")
print(f"GOOG: {side}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 534ms/step
GOOG: SideSignal.BUY


In [7]:
side, _ = await attention_bilstm("AAPL")
print(f"AAPL: {side}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 482ms/step
AAPL: SideSignal.BUY


In [8]:
side, _ = await attention_bilstm("MCFT")
print(f"MCFT: {side}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 520ms/step
MCFT: SideSignal.HOLD


Both attention_bilstm and basic_lstm are not good models. Therefore, we will try out other models as well.

## Modelling

### Temporal Convolutional Network (TCN-lite)

In [9]:
def build_tcn_lite(X_train_seq: Union[np.ndarray, list]) -> Model:
    """
    Builds a lightweight Temporal Convolutional Network (TCN-style)
    for noisy financial time series classification.

    Designed to be robust to non-stationarity and overfitting.

    Args:
        X_train_seq (array-like):
            Training sequences of shape (n_samples, time_steps, n_features)

    Returns:
        Compiled Keras Model
    """
    n_features = X_train_seq.shape[2]

    inputs = Input(shape=(None, n_features))

    x = Conv1D(
        filters=32,
        kernel_size=3,
        padding="causal",
        activation="relu"
    )(inputs)
    x = LayerNormalization()(x)
    x = Dropout(0.3)(x)

    x = Conv1D(
        filters=16,
        kernel_size=3,
        padding="causal",
        activation="relu"
    )(x)
    x = LayerNormalization()(x)

    x = GlobalAveragePooling1D()(x)

    x = Dense(16, activation="relu")(x)
    x = Dropout(0.3)(x)

    outputs = Dense(1, activation="sigmoid")(x)

    model = Model(inputs, outputs, name="tcn_lite")

    model.compile(
        optimizer=Adam(learning_rate=1e-3),
        loss="binary_crossentropy",
        metrics=[
            AUC(name="auc"),
            brier
        ]
    )

    return model

### PatchTST

In [10]:
def build_patchtst_lite(X_train_seq: Union[np.ndarray, list]) -> Model:
    """
    Builds a lightweight PatchTST-style Transformer model for
    noisy financial time series classification.

    The model splits the time dimension into patches, embeds them,
    and applies a Transformer encoder for temporal modeling.

    Designed for robustness to non-stationarity and overfitting.

    Args:
        X_train_seq (array-like):
            Training sequences of shape (n_samples, time_steps, n_features)

    Returns:
        Compiled Keras Model
    """

    patch_len: int = 16
    d_model: int = 64
    num_heads: int = 4
    ff_dim: int = 128
    dropout: float = 0.3

    n_features = X_train_seq.shape[2]

    inputs = Input(shape=(None, n_features))

    # Patch embedding
    x = Patchify(patch_len=patch_len, name="patchify")(inputs)

    x = Dense(d_model, activation="linear")(x)
    x = LayerNormalization()(x)

    # Transformer Encoder Block
    attn_out = MultiHeadAttention(
        num_heads=num_heads,
        key_dim=d_model // num_heads,
        dropout=dropout
    )(x, x)

    x = LayerNormalization()(x + attn_out)

    ff_out = Dense(ff_dim, activation="relu")(x)
    ff_out = Dropout(dropout)(ff_out)
    ff_out = Dense(d_model)(ff_out)

    x = LayerNormalization()(x + ff_out)

    # Pooling & Head
    x = GlobalAveragePooling1D()(x)

    x = Dense(32, activation="relu")(x)
    x = Dropout(dropout)(x)

    outputs = Dense(1, activation="sigmoid")(x)

    model = Model(inputs, outputs, name="patchtst_lite")

    model.compile(
        optimizer=Adam(learning_rate=1e-3),
        loss="binary_crossentropy",
        metrics=[
            AUC(name="auc"),
            brier
        ]
    )

    return model

### GNN (Graph-NN)

In [11]:
def build_gnn_lite(X_train_seq: Union[np.ndarray, list],) -> Model:
    """
    Builds a lightweight Graph Neural Network (GNN-style) model
    for noisy financial time series classification.

    Nodes represent features (indicators).
    Edges are learned implicitly via feature interactions.

    Designed for robustness to:
    - Non-stationarity
    - Variable-length sequences
    - Small batch sizes

    Args:
        X_train_seq (array-like):
            Training sequences of shape (n_samples, time_steps, n_features)

    Returns:
        Compiled Keras Model
    """

    hidden_dim: int = 32
    gnn_layers: int = 2
    dropout: float = 0.3

    n_features = X_train_seq.shape[2]

    inputs = Input(shape=(None, n_features))

    # Temporal aggregation
    # (B, T, F) → (B, F)
    x = GlobalAveragePooling1D(name="temporal_pool")(inputs)

    # Treat features as nodes
    # (B, F) → (B, F, 1)
    x = ExpandDims(axis=-1, name="expand_dims")(x)

    # GNN layers
    for i in range(gnn_layers):
        x = GraphMessagePassing(
            hidden_dim=hidden_dim,
            dropout=dropout,
            name=f"gnn_layer_{i}"
        )(x)

    # Graph pooling
    x = GlobalAveragePooling1D(name="graph_pool")(x)

    # Head
    x = Dense(32, activation="relu")(x)
    x = Dropout(dropout)(x)

    outputs = Dense(1, activation="sigmoid")(x)

    model = Model(inputs, outputs, name="gnn_lite")

    model.compile(
        optimizer=Adam(learning_rate=1e-3),
        loss="binary_crossentropy",
        metrics=[AUC(name="auc"), brier]
    )

    return model

### Neural Anomaly Detection

In [12]:
def build_autoencoder_classifier_lite(X_train_seq: Union[np.ndarray, list]) -> Model:
    """
    Builds an Autoencoder + Classifier model for
    neural anomaly detection in time series.

    Fully compatible with Keras 3 and existing pipelines.

    Args:
        X_train_seq (array-like):
            Training sequences of shape (n_samples, time_steps, n_features)

    Returns:
        Compiled Keras Model
    """

    latent_dim: int = 16
    hidden_dim: int = 64
    dropout: float = 0.3
    recon_weight: float = 0.3

    n_features = X_train_seq.shape[2]

    model = AutoencoderClassifierLite(
        n_features=n_features,
        latent_dim=latent_dim,
        hidden_dim=hidden_dim,
        dropout=dropout,
        recon_weight=recon_weight,
        name="autoencoder_classifier_lite"
    )

    model.compile(
        optimizer=Adam(learning_rate=1e-3),
        loss="binary_crossentropy",
        metrics=[AUC(name="auc"), brier]
    )

    return model

### CNN-GRU

In [13]:
def build_cnn_gru_lite(X_train_seq: Union[np.ndarray, list]) -> Model:
    """
    Builds a lightweight CNN-GRU model for
    noisy financial time series classification.

    Combines shallow temporal convolutions for
    local pattern extraction with a compact GRU
    layer for sequence modeling.

    Designed to be robust to non-stationarity
    and overfitting.

    Args:
        X_train_seq (array-like):
            Training sequences of shape
            (n_samples, time_steps, n_features)

    Returns:
        Compiled Keras Model
    """
    n_features = X_train_seq.shape[2]

    inputs = Input(shape=(None, n_features))

    # ---- CNN block ----
    x = Conv1D(filters=32, kernel_size=3, padding="same", activation="relu")(inputs)
    x = LayerNormalization()(x)
    x = Dropout(0.3)(x)

    x = Conv1D(
        filters=16,
        kernel_size=3,
        padding="same",
        activation="relu"
    )(x)
    x = LayerNormalization()(x)

    # ---- GRU block ----
    x = GRU(
        units=32,
        dropout=0.3
    )(x)

    # ---- Head ----
    x = Dense(16, activation="relu")(x)
    x = Dropout(0.3)(x)

    outputs = Dense(1, activation="sigmoid")(x)

    model = Model(inputs, outputs, name="cnn_gru_lite")

    model.compile(
        optimizer=Adam(learning_rate=1e-3),
        loss="binary_crossentropy",
        metrics=[
            AUC(name="auc"),
            brier
        ]
    )

    return model


## Training / Testing Models

### TCN-lite

In [14]:
symbol = "GOOG"
side, _ = await ML_Pipeline(build_tcn_lite, symbol, {})
print(f"{symbol}: {side}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 217ms/step
GOOG: SideSignal.BUY


In [15]:
symbol = "AAPL"
side, _ = await ML_Pipeline(build_tcn_lite, symbol, {})
print(f"{symbol}: {side}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 275ms/step
AAPL: SideSignal.HOLD


In [16]:
symbol = "MCFT"
side, _ = await ML_Pipeline(build_tcn_lite, symbol, {})
print(f"{symbol}: {side}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 237ms/step
MCFT: SideSignal.HOLD


### PathTST-lite

In [17]:
symbol = "GOOG"
side, _ = await ML_Pipeline(build_patchtst_lite, symbol, {})
print(f"{symbol}: {side}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 340ms/step



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 367ms/step
GOOG: SideSignal.BUY


In [18]:
symbol = "AAPL"
side, _ = await ML_Pipeline(build_patchtst_lite, symbol, {})
print(f"{symbol}: {side}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 322ms/step



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 366ms/step
AAPL: SideSignal.BUY


In [19]:
symbol = "MCFT"
side, _ = await ML_Pipeline(build_patchtst_lite, symbol, {})
print(f"{symbol}: {side}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 310ms/step



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 338ms/step
MCFT: SideSignal.BUY


### GNN-lite

In [20]:
symbol = "GOOG"
side, _ = await ML_Pipeline(build_gnn_lite, symbol, {})
print(f"{symbol}: {side}")

TypeError: <class 'keras.src.models.functional.Functional'> could not be deserialized properly. Please ensure that components that are Python object instances (layers, models, etc.) returned by `get_config()` are explicitly deserialized in the model's `from_config()` method.

config={'module': 'keras.src.models.functional', 'class_name': 'Functional', 'config': {}, 'registered_name': 'Functional', 'build_config': {'input_shape': None}, 'compile_config': {'optimizer': {'module': 'keras.optimizers', 'class_name': 'Adam', 'config': {'name': 'adam', 'learning_rate': 0.0010000000474974513, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-07, 'amsgrad': False}, 'registered_name': None}, 'loss': 'binary_crossentropy', 'loss_weights': None, 'metrics': [{'module': 'keras.metrics', 'class_name': 'AUC', 'config': {'name': 'auc', 'dtype': 'float32', 'num_thresholds': 200, 'curve': 'ROC', 'summation_method': 'interpolation', 'multi_label': False, 'num_labels': None, 'label_weights': None, 'from_logits': False}, 'registered_name': None}, {'module': 'builtins', 'class_name': 'function', 'config': 'Custom>brier', 'registered_name': 'function'}], 'weighted_metrics': None, 'run_eagerly': False, 'steps_per_execution': 1, 'jit_compile': False}}.

Exception encountered: Could not locate class 'GraphMessagePassing'. Make sure custom classes and functions are decorated with `@keras.saving.register_keras_serializable()`. If they are already decorated, make sure they are all imported so that the decorator is run before trying to load them. Full object config: {'module': None, 'class_name': 'GraphMessagePassing', 'config': {'name': 'gnn_layer_0', 'hidden_dim': 32, 'dropout': 0.3, 'trainable': True, 'dtype': {'module': 'keras', 'class_name': 'DTypePolicy', 'config': {'name': 'float32'}, 'registered_name': None, 'shared_object_id': 128808718389504}}, 'registered_name': 'GraphMessagePassing', 'build_config': {'input_shape': [None, 14, 1]}, 'name': 'gnn_layer_0', 'inbound_nodes': [{'args': [{'class_name': '__keras_tensor__', 'config': {'shape': [None, 14, 1], 'dtype': 'float32', 'keras_history': ['expand_dims', 0, 0]}}], 'kwargs': {}}]}

In [None]:
symbol = "AAPL"
side, _ = await ML_Pipeline(build_gnn_lite, symbol, {})
print(f"{symbol}: {side}")

Epoch 1/20
4/4 - 6s - 1s/step - auc: 0.5256 - brier: 0.2648 - loss: 0.7273 - val_auc: 0.4444 - val_brier: 0.2583 - val_loss: 0.7267
Epoch 2/20
4/4 - 0s - 53ms/step - auc: 0.4711 - brier: 0.2684 - loss: 0.7538 - val_auc: 0.4132 - val_brier: 0.2526 - val_loss: 0.7206
Epoch 3/20
4/4 - 0s - 50ms/step - auc: 0.4677 - brier: 0.2599 - loss: 0.7260 - val_auc: 0.2917 - val_brier: 0.2520 - val_loss: 0.7205
Epoch 4/20
4/4 - 0s - 45ms/step - auc: 0.4737 - brier: 0.2595 - loss: 0.7252 - val_auc: 0.3403 - val_brier: 0.2535 - val_loss: 0.7206
Epoch 5/20
4/4 - 0s - 45ms/step - auc: 0.5730 - brier: 0.2599 - loss: 0.6831 - val_auc: 0.3403 - val_brier: 0.2552 - val_loss: 0.7256
Epoch 6/20
4/4 - 0s - 52ms/step - auc: 0.5350 - brier: 0.2532 - loss: 0.6882 - val_auc: 0.3646 - val_brier: 0.2526 - val_loss: 0.7117
Epoch 7/20
4/4 - 0s - 49ms/step - auc: 0.5063 - brier: 0.2535 - loss: 0.6962 - val_auc: 0.3889 - val_brier: 0.2516 - val_loss: 0.7058
Epoch 8/20
4/4 - 0s - 53ms/step - auc: 0.5308 - brier: 0.2554 - 

In [None]:
symbol = "MCFT"
side, _ = await ML_Pipeline(build_gnn_lite, symbol, {})
print(f"{symbol}: {side}")

Epoch 1/20
4/4 - 6s - 2s/step - auc: 0.5072 - brier: 0.2533 - loss: 0.7066 - val_auc: 0.6084 - val_brier: 0.2724 - val_loss: 0.7316
Epoch 2/20
4/4 - 0s - 51ms/step - auc: 0.5033 - brier: 0.2529 - loss: 0.7080 - val_auc: 0.3811 - val_brier: 0.2654 - val_loss: 0.7424
Epoch 3/20
4/4 - 0s - 52ms/step - auc: 0.5463 - brier: 0.2509 - loss: 0.6926 - val_auc: 0.3217 - val_brier: 0.2660 - val_loss: 0.7562
Epoch 4/20
4/4 - 0s - 47ms/step - auc: 0.5172 - brier: 0.2505 - loss: 0.6957 - val_auc: 0.3811 - val_brier: 0.2660 - val_loss: 0.7456
Epoch 5/20
4/4 - 0s - 54ms/step - auc: 0.4982 - brier: 0.2527 - loss: 0.7053 - val_auc: 0.3671 - val_brier: 0.2634 - val_loss: 0.7416
Epoch 6/20
4/4 - 0s - 47ms/step - auc: 0.5092 - brier: 0.2515 - loss: 0.6990 - val_auc: 0.3636 - val_brier: 0.2603 - val_loss: 0.7369
Epoch 7/20
4/4 - 0s - 45ms/step - auc: 0.5485 - brier: 0.2493 - loss: 0.6896 - val_auc: 0.3322 - val_brier: 0.2610 - val_loss: 0.7384
Epoch 8/20
4/4 - 0s - 48ms/step - auc: 0.5846 - brier: 0.2527 - 

### NAD-lite

In [None]:
symbol = "GOOG"
side, _ = await ML_Pipeline(build_autoencoder_classifier_lite, symbol, {})
print(f"{symbol}: {side}")

Epoch 1/20
4/4 - 6s - 2s/step - auc: 0.4857 - brier: 0.3144 - loss: 1.1973 - val_auc: 0.3951 - val_brier: 0.2627 - val_loss: 0.8122
Epoch 2/20
4/4 - 0s - 48ms/step - auc: 0.5802 - brier: 0.2913 - loss: 0.9853 - val_auc: 0.3811 - val_brier: 0.2577 - val_loss: 0.7822
Epoch 3/20
4/4 - 0s - 45ms/step - auc: 0.6068 - brier: 0.2762 - loss: 0.8772 - val_auc: 0.3287 - val_brier: 0.2539 - val_loss: 0.7592
Epoch 4/20
4/4 - 0s - 44ms/step - auc: 0.5606 - brier: 0.2701 - loss: 0.8792 - val_auc: 0.2692 - val_brier: 0.2518 - val_loss: 0.7437
Epoch 5/20
4/4 - 0s - 44ms/step - auc: 0.5859 - brier: 0.2743 - loss: 0.8535 - val_auc: 0.2587 - val_brier: 0.2514 - val_loss: 0.7369
Epoch 6/20
4/4 - 0s - 45ms/step - auc: 0.5572 - brier: 0.2785 - loss: 0.8891 - val_auc: 0.3007 - val_brier: 0.2520 - val_loss: 0.7318
Epoch 7/20
4/4 - 0s - 51ms/step - auc: 0.5331 - brier: 0.2726 - loss: 0.8784 - val_auc: 0.2762 - val_brier: 0.2534 - val_loss: 0.7296
Epoch 8/20
4/4 - 0s - 48ms/step - auc: 0.5980 - brier: 0.2676 - 

NotImplementedError: 
Object AutoencoderClassifierLite was created by passing
non-serializable argument values in `__init__()`,
and therefore the object must override `get_config()` in
order to be serializable. Please implement `get_config()`.

Example:


class CustomLayer(keras.layers.Layer):
    def __init__(self, arg1, arg2, **kwargs):
        super().__init__(**kwargs)
        self.arg1 = arg1
        self.arg2 = arg2

    def get_config(self):
        config = super().get_config()
        config.update({
            "arg1": self.arg1,
            "arg2": self.arg2,
        })
        return config


In [None]:
symbol = "AAPL"
side, _ = await ML_Pipeline(build_autoencoder_classifier_lite, symbol, {})
print(f"{symbol}: {side}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 181ms/step
AAPL: SideSignal.BUY


In [None]:
symbol = "MCFT"
side, _ = await ML_Pipeline(build_autoencoder_classifier_lite, symbol, {})
print(f"{symbol}: {side}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 183ms/step
MCFT: SideSignal.BUY


### CNN-GRU lite

In [None]:
symbol = "GOOG"
side, _ = await ML_Pipeline(build_cnn_gru_lite, symbol, {})
print(f"{symbol}: {side}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 177ms/step
GOOG: SideSignal.HOLD


In [None]:
symbol = "AAPL"
side, _ = await ML_Pipeline(build_cnn_gru_lite, symbol, {})
print(f"{symbol}: {side}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 183ms/step
AAPL: SideSignal.BUY


In [None]:
symbol = "MCFT"
side, _ = await ML_Pipeline(build_cnn_gru_lite, symbol, {})
print(f"{symbol}: {side}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 264ms/step
MCFT: SideSignal.BUY
