In [1]:
# Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

import os
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.models import Sequential, clone_model


import joblib
import json
from sklearn.model_selection import train_test_split, TimeSeriesSplit, GridSearchCV
from sklearn.metrics import mean_squared_error, accuracy_score
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor, StackingRegressor
from sklearn.base import BaseEstimator, RegressorMixin, clone
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from tensorflow.keras.models import Sequential, clone_model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout
from tensorflow.keras.constraints import MaxNorm
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

from sklearn.base import BaseEstimator, RegressorMixin
import tensorflow as tf
import numpy as np

from tensorflow.keras.models import model_from_json
import json

# Updated KerasRegressorWrapper
from tensorflow.keras.models import save_model, load_model
import tempfile
import os

# Set random seed for reproducibility
np.random.seed(42)

TICKERS = ["AAPL", "JPM", "AMZN", "PFE", "XOM"]
N_FOLDS = 5

# Set correct technical, fundamental, sentiment columns here as per your data
TECHNICAL_FEATURES = ['RSI_14', 'SMA_20', 'Volume']
FUNDAMENTAL_FEATURES = ['debt_to_equity']
SENTIMENT_FEATURES = ['sentiment_3day_ma']

TARGET_COL = 'target_5day_return'
LOOKBACK = 60

# Import your model functions (adjust import as needed)
# from your_module import train_technical_model, train_fundamental_model, train_sentiment_model


In [2]:
def build_technical_model(input_shape):
    """Model builder function that returns a fresh compiled model"""
    model = Sequential([
        Input(shape=input_shape),
        LSTM(64,
             activation='tanh',
             recurrent_activation='sigmoid',
             kernel_initializer='glorot_uniform',
             recurrent_initializer='orthogonal',
             kernel_constraint=MaxNorm(3),
             recurrent_constraint=MaxNorm(3)),
        Dropout(0.2),
        Dense(1)
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    return model


def train_technical_model(X_train, y_train, X_test, y_test):
    """Train LSTM model with proper initialization and gradient control"""
    # Normalize targets
    y_mean, y_std = y_train.mean(), y_train.std()
    y_train_norm = (y_train - y_mean) / y_std
    y_test_norm = (y_test - y_mean) / y_std
    
    # Build and compile model
    model = build_technical_model((X_train.shape[1], X_train.shape[2]))
    model.compile(
        optimizer=Adam(learning_rate=0.001, clipvalue=1.0),
        loss='mse',
        metrics=['mae']
    )
    
    # Train with callbacks
    history = model.fit(
        X_train, y_train_norm,
        validation_data=(X_test, y_test_norm),
        epochs=50,
        batch_size=32,
        callbacks=[
            EarlyStopping(patience=10, restore_best_weights=True),
            ReduceLROnPlateau(factor=0.5, patience=5)
        ],
        verbose=1
    )
    
    # Store normalization parameters
    model.y_mean = y_mean
    model.y_std = y_std
    
    return model


def train_fundamental_model(X_train, y_train):
    """XGBoost model for fundamentals"""
    model = XGBRegressor(
        n_estimators=100,
        max_depth=3,
        learning_rate=0.1,
        objective='reg:squarederror'
    )
    model.fit(X_train, y_train)
    return model

In [3]:
def rmse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

def direction_accuracy(y_true, y_pred):
    return np.mean(np.sign(y_true) == np.sign(y_pred))

def sharpe_ratio(returns, risk_free_rate=0):
    excess = returns - risk_free_rate
    if np.std(excess) == 0:
        return 0.0
    return np.mean(excess) / (np.std(excess) + 1e-9) * np.sqrt(252)

# Rolling window creator for LSTM
def create_rolling_sequences(df, features, target_col, lookback=60):
    n_samples = len(df) - lookback + 1
    X = np.zeros((n_samples, lookback, len(features)))
    y = np.zeros(n_samples)
    for i in range(n_samples):
        X[i] = df[features].iloc[i:i+lookback].values
        y[i] = df[target_col].iloc[i+lookback-1]
    return X, y

# Diebold-Mariano test
def diebold_mariano_test(e1, e2, h=1):
    # e1/e2: arrays of forecast errors (e.g., y_true - y_pred)
    # h: forecast horizon, 1 for one-step ahead
    from statsmodels.stats.diagnostic import acorr_ljungbox
    import scipy.stats as stats
    d = np.abs(e1) - np.abs(e2)
    mean_d = np.mean(d)
    n = len(d)
    var_d = np.var(d, ddof=1)
    # Approximate DM statistic
    dm_stat = mean_d / (np.sqrt(var_d / n + 1e-9))
    p_value = 2 * (1 - stats.norm.cdf(np.abs(dm_stat)))
    return dm_stat, p_value


In [4]:
results = []

for ticker in TICKERS:
    print(f"========== {ticker} ==========")
    df = pd.read_csv(f"../data/processed/integrated/{ticker}_integrated.csv")
    fold_size = len(df) // N_FOLDS

    # Store fold-level results for Diebold-Mariano
    all_preds = { 'Technical': [], 'Fundamental': [], 'Sentiment': [], 'TFMS': [], 'y_true': [] }

    for fold in range(N_FOLDS):
        split = fold_size * (fold + 1)
        if split + LOOKBACK - 1 > len(df):
            break
        train_df = df.iloc[:split + LOOKBACK - 1].copy()
        test_df = df.iloc[split:split + fold_size + LOOKBACK - 1].copy()

        # --- 1. Technical only model (LSTM)
        # Prepare rolling window
        X_train_tech, y_train = create_rolling_sequences(train_df, TECHNICAL_FEATURES, TARGET_COL, LOOKBACK)
        X_test_tech, y_test = create_rolling_sequences(test_df, TECHNICAL_FEATURES, TARGET_COL, LOOKBACK)
        # Scale
        from sklearn.preprocessing import StandardScaler
        scaler = StandardScaler()
        X_train_tech_flat = X_train_tech.reshape(-1, len(TECHNICAL_FEATURES))
        X_train_tech_scaled_flat = scaler.fit_transform(X_train_tech_flat)
        X_train_tech_scaled = X_train_tech_scaled_flat.reshape(X_train_tech.shape)
        X_test_tech_flat = X_test_tech.reshape(-1, len(TECHNICAL_FEATURES))
        X_test_tech_scaled_flat = scaler.transform(X_test_tech_flat)
        X_test_tech_scaled = X_test_tech_scaled_flat.reshape(X_test_tech.shape)

        # Train LSTM
        lstm_model = train_technical_model(X_train_tech_scaled, y_train, X_test_tech_scaled, y_test)
        tech_pred = lstm_model.predict(X_test_tech_scaled).flatten()

        # --- 2. Fundamental only model (XGBoost, or your own)
        X_train_fund = train_df[FUNDAMENTAL_FEATURES].iloc[LOOKBACK-1:LOOKBACK-1+len(y_train)].to_numpy()
        X_test_fund = test_df[FUNDAMENTAL_FEATURES].iloc[LOOKBACK-1:LOOKBACK-1+len(y_test)].to_numpy()
        xgb_model = train_fundamental_model(X_train_fund, y_train)
        fund_pred = xgb_model.predict(X_test_fund)

        # --- 3. Sentiment only model (LinearRegressor as default)
        from sklearn.linear_model import LinearRegression
        X_train_sent = train_df[SENTIMENT_FEATURES].iloc[LOOKBACK-1:LOOKBACK-1+len(y_train)].to_numpy()
        X_test_sent = test_df[SENTIMENT_FEATURES].iloc[LOOKBACK-1:LOOKBACK-1+len(y_test)].to_numpy()
        sent_model = LinearRegression()
        sent_model.fit(X_train_sent, y_train)
        sent_pred = sent_model.predict(X_test_sent)

        # --- 4. Full TFMS Ensemble (Stack predictions)
        tech_train_pred = lstm_model.predict(X_train_tech_scaled).flatten()
        fund_train_pred = xgb_model.predict(X_train_fund)
        sent_train_pred = sent_model.predict(X_train_sent)
        stack_X_train = np.column_stack([tech_train_pred, fund_train_pred, sent_train_pred])
        meta_model = RandomForestRegressor(n_estimators=50, random_state=42)
        meta_model.fit(stack_X_train, y_train)

        stack_X_test = np.column_stack([tech_pred, fund_pred, sent_pred])
        tfms_pred = meta_model.predict(stack_X_test)

        # --- Metrics for each
        for model_name, pred in zip(['Technical', 'Fundamental', 'Sentiment', 'TFMS'],
                                    [tech_pred, fund_pred, sent_pred, tfms_pred]):
            acc = direction_accuracy(y_test, pred)
            err = rmse(y_test, pred)
            # Sharpe: use true Close price for equity curves
            close_aligned = test_df['Close'].iloc[LOOKBACK-1:LOOKBACK-1+len(y_test)].to_numpy()
            returns = np.log(close_aligned[1:] / close_aligned[:-1])
            side = np.where(pred > 0, 1, -1)
            strat_returns = returns * side[:-1]
            sharpe = sharpe_ratio(strat_returns)
            results.append({
                "Ticker": ticker, "Fold": fold+1, "Model": model_name, 
                "RMSE": err, "Accuracy": acc, "Sharpe": sharpe
            })
            all_preds[model_name].append(pred)
        all_preds['y_true'].append(y_test)


Epoch 1/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 178ms/step - loss: 0.7866 - mae: 0.6850 - val_loss: 1.2122 - val_mae: 0.8527 - learning_rate: 0.0010
Epoch 2/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 69ms/step - loss: 0.7228 - mae: 0.6434 - val_loss: 1.0788 - val_mae: 0.7792 - learning_rate: 0.0010
Epoch 3/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 78ms/step - loss: 0.6737 - mae: 0.6010 - val_loss: 1.0396 - val_mae: 0.7610 - learning_rate: 0.0010
Epoch 4/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 69ms/step - loss: 0.5688 - mae: 0.5759 - val_loss: 1.0435 - val_mae: 0.7683 - learning_rate: 0.0010
Epoch 5/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 71ms/step - loss: 0.5847 - mae: 0.5836 - val_loss: 1.0504 - val_mae: 0.7733 - learning_rate: 0.0010
Epoch 6/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 109ms/step - loss: 0.5818 - mae: 0.5867 - val_loss: 1.0488 -

In [5]:
# Compare per-fold prediction error streams, TFMS vs each ablation variant
dm_results = []

for ticker in TICKERS:
    subset = [r for r in results if r['Ticker'] == ticker]
    # You should also save per-fold y_true and y_pred above if you want per-fold DM test
    # We'll use the last set from ablation loop's all_preds
    y_true_full = np.concatenate(all_preds['y_true'])
    preds_dict = {name: np.concatenate(all_preds[name]) for name in ['Technical', 'Fundamental', 'Sentiment', 'TFMS']}
    for compare_model in ['Technical', 'Fundamental', 'Sentiment']:
        e1 = y_true_full - preds_dict['TFMS']
        e2 = y_true_full - preds_dict[compare_model]
        dm_stat, pval = diebold_mariano_test(e1, e2)
        dm_results.append({
            "Ticker": ticker,
            "Model_1": "TFMS",
            "Model_2": compare_model,
            "DM_statistic": dm_stat,
            "p_value": pval
        })
        
# Format DM test summary
dm_df = pd.DataFrame(dm_results)
print("Diebold-Mariano Test Results (TFMS vs variant):")
display(dm_df)


Diebold-Mariano Test Results (TFMS vs variant):


Unnamed: 0,Ticker,Model_1,Model_2,DM_statistic,p_value
0,AAPL,TFMS,Technical,-30.880219,0.0
1,AAPL,TFMS,Fundamental,-1.238002,0.215715
2,AAPL,TFMS,Sentiment,-0.90346,0.366282
3,JPM,TFMS,Technical,-30.880219,0.0
4,JPM,TFMS,Fundamental,-1.238002,0.215715
5,JPM,TFMS,Sentiment,-0.90346,0.366282
6,AMZN,TFMS,Technical,-30.880219,0.0
7,AMZN,TFMS,Fundamental,-1.238002,0.215715
8,AMZN,TFMS,Sentiment,-0.90346,0.366282
9,PFE,TFMS,Technical,-30.880219,0.0


In [6]:
results_df = pd.DataFrame(results)
results_df.to_csv('../data/results/metrics/ablation_metrics.csv', index=False)
dm_df.to_csv('../data/results/metrics/ablation_dm_test.csv', index=False)
results_df.head()


Unnamed: 0,Ticker,Fold,Model,RMSE,Accuracy,Sharpe
0,AAPL,1,Technical,0.819558,0.792952,3.708372
1,AAPL,1,Fundamental,0.045951,0.453744,-0.583469
2,AAPL,1,Sentiment,0.046369,0.453744,-0.583469
3,AAPL,1,TFMS,0.039548,0.735683,1.22418
4,AAPL,2,Technical,0.514156,0.797357,4.716066
