In [1]:
import sys
from pathlib import Path
import numpy as np
import pandas as pd

REPO_ROOT = Path.cwd().parent.parent
BACKEND_DIR = REPO_ROOT / "backend"
sys.path.insert(0, str(BACKEND_DIR))
sys.path.insert(0, str(Path.cwd()))

from analytics.forecasting.base import SimpleForecaster
from _pool_common import (
    load_pool_data,
    backtest_one_step,
    compute_metrics,
    metrics_to_parquet,
    TEST_SIZE,
    MIN_TRAIN_BASELINE,
    ARTIFACTS_DIR,
)
SPAN = 20

In [5]:
# Load pool: all tickers stacked into one DataFrame
stacked = load_pool_data()
print(stacked.groupby("symbol").size())
stacked.head(10)

symbol
AAPL       262
BTC-USD    262
ETH-USD    262
MSFT       262
NVDA       262
QQQ        262
SPY        262
dtype: int64


Price,timestamp,symbol,close,close,close,close,close,close,close
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,BTC-USD,ETH-USD,NVDA,AAPL,MSFT,SPY,QQQ
0,2021-02-22,AAPL,,,,121.260002,,,
1,2021-03-01,AAPL,,,,121.419998,,,
2,2021-03-08,AAPL,,,,121.029999,,,
3,2021-03-15,AAPL,,,,119.989998,,,
4,2021-03-22,AAPL,,,,121.209999,,,
5,2021-03-29,AAPL,,,,123.0,,,
6,2021-04-05,AAPL,,,,133.0,,,
7,2021-04-12,AAPL,,,,134.160004,,,
8,2021-04-19,AAPL,,,,134.320007,,,
9,2021-04-26,AAPL,,,,131.460007,,,


In [7]:
# Same backtest for every symbol: walk-forward one-step, TEST_SIZE steps
model_name = "baseline"
all_preds = []
for sym, grp in stacked.groupby("symbol"):
    close_ser = grp.set_index("timestamp")["close"]
    # Ensure 1D Series (in case "close" is multi-column)
    if isinstance(close_ser, pd.DataFrame):
        close_ser = close_ser.iloc[:, 0] if close_ser.shape[1] == 1 else close_ser[sym] if sym in close_ser.columns else close_ser.iloc[:, 0]
    prices = close_ser.astype(float).dropna().sort_index()
    if len(prices) < TEST_SIZE + MIN_TRAIN_BASELINE:
        continue
    pred = backtest_one_step(
        prices, TEST_SIZE,
        model_factory=lambda: SimpleForecaster(span=SPAN, confidence_level=0.95),
        min_train=MIN_TRAIN_BASELINE,
    )
    if pred.empty:
        continue
    pred["symbol"] = sym
    all_preds.append(pred)

pred_baseline = pd.concat(all_preds, ignore_index=True) if all_preds else pd.DataFrame(columns=["timestamp", "y_true", "y_pred", "symbol"])
print(pred_baseline.groupby("symbol").size() if not pred_baseline.empty else "No predictions (all symbols skipped or backtest returned empty).")
pred_baseline.head()

symbol
AAPL       30
BTC-USD    30
ETH-USD    30
MSFT       30
NVDA       30
QQQ        30
SPY        30
dtype: int64


Unnamed: 0,timestamp,y_true,y_pred,symbol
0,2025-08-04,229.350006,209.3799,AAPL
1,2025-08-11,231.589996,211.2818,AAPL
2,2025-08-18,227.759995,213.2159,AAPL
3,2025-08-25,232.139999,214.6011,AAPL
4,2025-09-01,239.690002,216.2715,AAPL


In [8]:
# Metrics per symbol and overall (MAE, RMSE, MAPE_%)
metrics_rows = []
for sym in pred_baseline["symbol"].unique():
    sub = pred_baseline[pred_baseline["symbol"] == sym]
    m = compute_metrics(sub)
    metrics_rows.append({"model": model_name, "symbol": sym, **m})
m_overall = compute_metrics(pred_baseline)
metrics_rows.append({"model": model_name, "symbol": "overall", **m_overall})

metrics_df = pd.DataFrame(metrics_rows)
print(metrics_df.to_string())
metrics_to_parquet(metrics_rows, ARTIFACTS_DIR / "metrics_baseline_pool.parquet")
print("Saved:", ARTIFACTS_DIR / "metrics_baseline_pool.parquet")

      model   symbol           MAE          RMSE     MAPE_%
0  baseline     AAPL     19.609135     21.964702   7.573419
1  baseline  BTC-USD  11346.617432  13453.780824  13.126679
2  baseline  ETH-USD    741.156682    857.323500  23.236346
3  baseline     MSFT     29.190272     35.509242   6.354190
4  baseline     NVDA     13.574884     16.556554   7.369074
5  baseline      QQQ     28.793306     32.396684   4.781026
6  baseline      SPY     27.940290     29.766749   4.176669
7  baseline  overall   1743.840286   5095.420565   9.516772
Saved: C:\capstone_project_unfc\model\experiments-pool\artifacts\metrics_baseline_pool.parquet
