# Model Comparison With Finance Data
This notebook demonstrates how to run the prediction models on financial data

In [6]:
import sys
import os

# Add the project root (one level up from 'notebooks') to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

In [12]:
# Imports from our src/ modules
import pandas as pd
from src.data.load_fin_data import load_finance_data, load_fred_data
from src.data.preprocess_fin_data import add_time_features, calculate_technical_indicators, flatten_cols
from src.data.split_sequences import prepare_train_val_test
from src.models.base_ARIMA import build_arima_model
from src.models.DL_LSTM import build_lstm_model
# from src.models.DL_TCN import build_tcn_model
# from src.models.DL_Transformer import build_transformer_model
from src.evaluation.evaluate import evaluate_model
from src.data.scaler_utils import get_scaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt


In [13]:
# Setup
FRED_API_KEY = '92300a5052e3ca2facec21726fc18b04'
stocks = ['TSLA']
pce_series = {'PCEPI': 'PCE_Price_Index', 'PCEPILFE': 'Core_PCE_Price_Index'}
scaler = get_scaler()


In [None]:
# Load and preprocess data
all_data = {}
for ticker in stocks:
    df = load_finance_data(ticker)
    df = add_time_features(df, prefix=f'{ticker}_')
    df = calculate_technical_indicators(df, ticker)
    df[f'{ticker}_Close_Scaled'] = scaler.fit_transform(df[['Close']])
    all_data[ticker] = df

for series_id, name in pce_series.items():
    df = load_fred_data(series_id, name, FRED_API_KEY)
    all_data[name] = df


In [None]:
# Combine datasets
combined_data = None
first = True
for name, df in all_data.items():
    df = flatten_cols(df)
    if first:
        combined_data = df
        first = False
    else:
        combined_data = pd.merge(combined_data, df, left_index=True, right_index=True, how='outer')

combined_data.reset_index(inplace=True)
combined_data.rename(columns={combined_data.columns[0]: 'Date'}, inplace=True)
combined_data['Date'] = pd.to_datetime(combined_data['Date'])
combined_data.set_index('Date', inplace=True)
combined_data.dropna(inplace=True)


In [None]:
# Train-test split
SEQ_LENGTH = 30
X_train, X_val, X_test, y_train, y_val, y_test = prepare_train_val_test(combined_data.reset_index(), SEQ_LENGTH)


In [None]:
# Train and evaluate models
models = {
    "LSTM": build_lstm_model(SEQ_LENGTH),
    "TCN": build_tcn_model(SEQ_LENGTH),
    "Transformer": build_transformer_model(SEQ_LENGTH)
}

results = {}
mae_scores = {}

for name, model in models.items():
    model.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_val, y_val), verbose=0)
    mae, y_test_inv, y_pred_inv = evaluate_model(model, X_test, y_test, scaler)
    results[name] = (y_test_inv, y_pred_inv)
    mae_scores[name] = mae
    print(f"{name} MAE: {mae:.4f}")


In [None]:
# Linear regression as baseline
from src.data.split_sequences import create_sequences
close_col = [col for col in combined_data.columns if col.lower().endswith('close_scaled')][0]
close_prices = combined_data[close_col].values
X_lr_all, y_lr_all = create_sequences(close_prices, SEQ_LENGTH)
X_lr = X_lr_all.reshape((X_lr_all.shape[0], X_lr_all.shape[1]))
X_train_lr, X_test_lr, y_train_lr, y_test_lr = train_test_split(X_lr, y_lr_all, test_size=0.2, shuffle=False)
lr_model = LinearRegression().fit(X_train_lr, y_train_lr)
mae, y_test_inv, y_pred_inv = evaluate_model(lr_model, X_test_lr, y_test_lr, scaler, is_linear=True)
results["LinearRegression"] = (y_test_inv, y_pred_inv)
mae_scores["LinearRegression"] = mae
print(f"LinearRegression MAE: {mae:.4f}")


In [None]:
# Plot results
plt.figure(figsize=(10, 6))
plt.bar(mae_scores.keys(), mae_scores.values())
plt.title("Model Comparison - MAE (Unscaled Prices)")
plt.ylabel("Mean Absolute Error (USD)")
plt.xlabel("Model")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
