## 9 Links to Other Indices

In this  step, I benchmark my **CSIAI** composite index against three established, publicly‐traded ETFs that capture very similar investment themes:

- **SPY** (SPDR S&P 500 ETF): the broad U.S. market proxy.  
- **QUAL** (iShares S&P 500 Quality Factor ETF): focuses on “quality” metrics (profitability, low leverage).  
- **MTUM** (iShares Edge MSCI USA Momentum Factor ETF): represents momentum exposure similar to the Growth potential of CSIAI.

I download each ETF’s adjusted close price from **2023-01-01** (the same start date as my stock data) through **2025-05-06**. Then I compute daily returns and cumulative performance, then plot them together. 

In [None]:
import pandas as pd, yfinance as yf, numpy as np
from pathlib import Path

ROOT     = Path("..")
ARTIFACT = ROOT / "artifacts"
PROC_DIR = ROOT / "data" / "processed"
SCORES   = PROC_DIR / "ci_scores.parquet"
PRICES   = PROC_DIR / "raw" / "prices"
BENCH_DIR = ARTIFACT / "benchmarks"
BENCH_DIR.mkdir(parents=True, exist_ok=True)

scores = pd.read_parquet(SCORES)["CI_pca_lin"]

tickers = scores.index.tolist()

px = pd.concat(
    {t: pd.read_parquet(PRICES / f"{t}.parquet")["Adj Close"]
     for t in tickers},
    axis=1
).dropna(how="all")

px = px.loc["2023-01-01":]

#  using the month start date for rebalancing
rebal_dates = (
    px.resample("MS").first().index 
)

weights = {}
for d in rebal_dates:
    top_decile = scores.nlargest(int(0.1*len(scores))).index
    weights[d] = pd.Series(1/len(top_decile), index=top_decile)

rets = px.pct_change().fillna(0)

w_df = pd.DataFrame(weights).ffill().reindex_like(rets).fillna(0)
port_ret = (w_df * rets).sum(axis=1)
port_ret.to_csv(BENCH_DIR / "csiai_portfolio_returns.csv")

port_cum = (1+port_ret).cumprod().rename("CSIAI_Port")

print("CSIAI portfolio series saved.")


In [None]:
import matplotlib.pyplot as plt

cum_returns = (1 + returns).cumprod()

cum_returns.to_csv(OUT_DIR / "benchmark_cum_returns.csv")

# plot
plt.figure(figsize=(10,6))
for ticker in cum_returns.columns:
    plt.plot(cum_returns.index, cum_returns[ticker], label=ticker)
plt.title("Cumulative Returns: SPY vs QUAL vs MTUM\n(2023-01-01 to today)")
plt.xlabel("Date")
plt.ylabel("Cumulative Return")
plt.legend()
plt.tight_layout()
FIG_DIR = Path("..") / "artifacts" / "figures" / "benchmarks"
FIG_DIR.mkdir(parents=True, exist_ok=True)
plt.savefig(FIG_DIR / "benchmark_cum_returns.png", dpi=300)
plt.show()