# Backtesting StockLLM Alpha

This notebook demonstrates building a FinSeer+FAISS index, generating signals using the StockLLM alpha, and backtesting with the existing engines.

Steps:
- Create a small OHLCV dataset for a few symbols
- Build the candidate index (5-day windows) with FinSeer embeddings
- Run the StockLLM alpha to produce signals
- Backtest using `BacktestingEngine`


In [None]:
# If needed, install dependencies
# %pip install -r requirements.txt

import numpy as np
import pandas as pd

from src.schemas import TimeFrame
from src.retrieval.finseer_client import FinSeerEmbedder, FinSeerConfig
from src.retrieval.faiss_index import FaissCandidateIndex, default_indicator_builder
from src.llm.stockllm_client import StockLLMGenerator, StockLLMConfig
from src.alphas.stockllm_alpha import stockllm_alpha
from src.engines.backtesting_engine import BacktestingEngine

# 1) Build small OHLCV dataset
symbols = ["AAPL", "MSFT", "GOOG"]
start = pd.Timestamp("2024-01-02")
idx = []
rows = []
for sym in symbols:
    dates = pd.date_range(start, periods=60, freq="B")
    drift = 0.2 if sym == "AAPL" else (-0.1 if sym == "MSFT" else 0.05)
    for i, d in enumerate(dates):
        base = 100 + i * drift + np.random.normal(scale=0.5)
        idx.append((sym, d))
        rows.append({
            "open": base - 0.2,
            "high": base + 0.5,
            "low": base - 0.5,
            "close": base,
            "adjusted_close": base,
            "volume": 1_000_000 + 5000 * np.sin(i/5),
        })

df = pd.DataFrame(rows, index=pd.MultiIndex.from_tuples(idx, names=["symbol", "timestamp"]))

# 2) Build candidate index
embedder = FinSeerEmbedder(FinSeerConfig())
index = FaissCandidateIndex(embedder)
added = index.build_from_symbol_dfs({s: df.loc[s] for s in symbols}, lookback=5, indicator_builder=default_indicator_builder, timeframe=TimeFrame.day)
print(f"Indexed {added} candidates")

# 3) Initialize StockLLM generator
stockllm = StockLLMGenerator(StockLLMConfig(temperature=0.0, max_new_tokens=64))

# 4) Generate signals using the stockLLM alpha
signals = stockllm_alpha(
    historical_data=df,
    index=index,
    generator=stockllm,
    lookback=5,
    top_k=5,
    timeframe=TimeFrame.day,
    confidence_threshold=0.0,
)
print(signals[["movement", "prob_rise", "prob_fall", "prob_freeze", "confidence", "signal"]].dropna().head())

# 5) Backtest the signals
bt = BacktestingEngine(initial_capital=1_000_000.0)
results = bt.run_backtest(df, signals, transaction_cost=0.001, show_progress=False)
print({
    "total_pnl": results.total_pnl,
    "sharpe_ratio": results.sharpe_ratio,
    "avg_turnover": results.avg_turnover,
})
