In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt
import warnings

from config import STOCKS_TO_TEST, TEST_YEARS, SEQUENCE_LENGTH, PERIOD

# Import our custom preprocessor from the parent directory
import sys
sys.path.append('../')
from training import preprocessor

# Suppress verbose logging
warnings.filterwarnings("ignore")
tf.get_logger().setLevel('ERROR')

In [4]:
results = []

print("--- Starting LSTM Backtesting on Portfolio ---")

for ticker in STOCKS_TO_TEST:
    try:
        print(f"\nProcessing {ticker}...")
        # --- Fetch and Preprocess Data ---
        raw_data = preprocessor.fetch_data(ticker, period=PERIOD)
        processed_data = preprocessor.add_technical_indicators(raw_data.copy())

        # --- Train-Test Split ---
        split_date = processed_data.index.max() - pd.DateOffset(years=TEST_YEARS)
        train_df = processed_data[processed_data.index <= split_date]
        test_df = processed_data[processed_data.index > split_date]

        # --- Scale Data and Create Sequences ---
        scaler = MinMaxScaler(feature_range=(0, 1))
        scaled_train_data = scaler.fit_transform(train_df)
        scaled_test_data = scaler.transform(test_df)

        X_train, y_train = [], []
        for i in range(SEQUENCE_LENGTH, len(scaled_train_data)):
            X_train.append(scaled_train_data[i-SEQUENCE_LENGTH:i])
            y_train.append(scaled_train_data[i, train_df.columns.get_loc('Close')])
        X_train, y_train = np.array(X_train), np.array(y_train)
        
        X_test, y_test = [], []
        for i in range(SEQUENCE_LENGTH, len(scaled_test_data)):
            X_test.append(scaled_test_data[i-SEQUENCE_LENGTH:i])
            y_test.append(scaled_test_data[i, test_df.columns.get_loc('Close')])
        X_test, y_test = np.array(X_test), np.array(y_test)

        # --- Build and Train Model ---
        model = Sequential([
            Input(shape=(X_train.shape[1], X_train.shape[2])),
            LSTM(units=50, return_sequences=True),
            Dropout(0.3),
            LSTM(units=50, return_sequences=False),
            Dropout(0.3),
            Dense(units=25),
            Dense(units=1)
        ])
        model.compile(optimizer='adam', loss='mean_squared_error')
        model.fit(X_train, y_train, epochs=25, batch_size=32, verbose=0)

        # --- Predict and Evaluate ---
        predicted_scaled = model.predict(X_test, verbose=0)
        
        dummy = np.zeros((len(predicted_scaled), train_df.shape[1]))
        dummy[:, train_df.columns.get_loc('Close')] = predicted_scaled.flatten()
        predicted_prices = scaler.inverse_transform(dummy)[:, train_df.columns.get_loc('Close')]

        dummy_y = np.zeros((len(y_test), train_df.shape[1]))
        dummy_y[:, train_df.columns.get_loc('Close')] = y_test.flatten()
        actual_prices = scaler.inverse_transform(dummy_y)[:, train_df.columns.get_loc('Close')]
        
        mae = mean_absolute_error(actual_prices, predicted_prices)
        rmse = np.sqrt(mean_squared_error(actual_prices, predicted_prices))
        
        results.append({"Ticker": ticker, "MAE": mae, "RMSE": rmse})
        print(f"Processed {ticker} - MAE: ${mae:.2f}")

    except Exception as e:
        print(f"Could not process {ticker}. Error: {e}")

print("\n--- LSTM Backtesting Complete ---")

--- Starting LSTM Backtesting on Portfolio ---

Processing AAPL...
Fetching 5y of historical data for AAPL...
Adding technical indicators...
Processed AAPL - MAE: $6.39

Processing MSFT...
Fetching 5y of historical data for MSFT...
Adding technical indicators...
Processed MSFT - MAE: $10.02

Processing JPM...
Fetching 5y of historical data for JPM...
Adding technical indicators...
Processed JPM - MAE: $26.65

Processing JNJ...
Fetching 5y of historical data for JNJ...
Adding technical indicators...
Processed JNJ - MAE: $2.10

Processing F...
Fetching 5y of historical data for F...
Adding technical indicators...
Processed F - MAE: $0.28

Processing NVDA...
Fetching 5y of historical data for NVDA...
Adding technical indicators...
Processed NVDA - MAE: $15.25

--- LSTM Backtesting Complete ---


In [5]:
# --- Display Final Results ---
results_df = pd.DataFrame(results)
print("\n--- LSTM Model Performance Summary ---")
print(results_df.to_string())

# Print the average performance
print("\n--- Average Performance ---")
print(results_df.mean(numeric_only=True))


--- LSTM Model Performance Summary ---
  Ticker        MAE       RMSE
0   AAPL   6.393290   8.420361
1   MSFT  10.017092  12.494013
2    JPM  26.652985  30.026345
3    JNJ   2.101225   2.777292
4      F   0.279212   0.359157
5   NVDA  15.252849  19.357449

--- Average Performance ---
MAE     10.116109
RMSE    12.239103
dtype: float64
