In [1]:
# Parameters
TICKERS = ["SPY", "QQQ", "GLD"]
START = "1999-01-01"
END = "2024-12-31"
CSV = "data/price_data.csv"
OUT_DIR = "mc"


In [2]:
# Parameters (papermill will override these)
TICKERS = ["SPY", "QQQ", "GLD"]          # keep SPY (benchmark), QQQ (tech proxy), GLD (gold)
START   = "1999-01-01"
END     = "2024-12-31"
CSV     = "data/price_data.csv"          # set "" to force Yahoo Finance
OUT_DIR = "mc"                            # where params.json and hist_returns.csv will be written

In [3]:
from pathlib import Path
import json, sys
import numpy as np
import pandas as pd

Path(OUT_DIR).mkdir(parents=True, exist_ok=True)

def load_prices_from_csv(csv_path: Path) -> pd.DataFrame:
    df = pd.read_csv(csv_path, parse_dates=["Date"])
    if "Ticker" in df.columns:  # long format
        piv = df.pivot(index="Date", columns="Ticker", values="Adj Close").sort_index()
    else:                       # wide format
        piv = df.set_index("Date").sort_index()
    return piv

def load_prices_from_yf(tickers, start, end):
    import yfinance as yf
    data = yf.download(tickers, start=start, end=end, auto_adjust=True, progress=False)
    if isinstance(data.columns, pd.MultiIndex):
        data = data["Close"]
    return data.dropna().sort_index()

# --- load prices ---
if CSV and Path(CSV).exists():
    prices = load_prices_from_csv(Path(CSV))
    # keep only requested tickers present in CSV
    tickers = [t for t in TICKERS if t in prices.columns]
    missing = [t for t in TICKERS if t not in prices.columns]
    if missing:
        print(f"[fit_params] WARNING: Missing in CSV: {missing}; using {tickers} only.", file=sys.stderr)
    prices = prices[tickers].dropna(how="any")
else:
    tickers = TICKERS[:]  # use requested list
    prices = load_prices_from_yf(tickers, START, END)

assert len(tickers) >= 3 and {"SPY","QQQ","GLD"}.issubset(set(tickers)), \
    "SPY/QQQ/GLD must be included."

# --- daily log returns in percentage points (×100) ---
lrets_pct = np.log(prices).diff().dropna() * 100.0
lrets_pct.to_csv(Path(OUT_DIR) / "hist_returns.csv", index=True)

mu = lrets_pct.mean().to_dict()               # daily mean (%)
cov = lrets_pct.cov().values.tolist()         # daily covariance (%^2), order = tickers
start_prices = prices.iloc[0].to_dict()

params = {
    "tickers": tickers,
    "start": str(prices.index.min().date()),
    "end":   str(prices.index.max().date()),
    "mu_daily_pct": mu,
    "cov_daily_pct2": cov,
    "start_prices": start_prices,
    "horizon_days_default": 252*5
}

with open(Path(OUT_DIR) / "params.json", "w") as f:
    json.dump(params, f, indent=2)

print(f"[fit_params] Wrote {Path(OUT_DIR)/'params.json'} and {Path(OUT_DIR)/'hist_returns.csv'}")


[fit_params] Wrote mc/params.json and mc/hist_returns.csv
