In [6]:
# --- Cell 1: configuration + imports ---

import os, time, math, datetime as dt
import pandas as pd
import requests
from dotenv import load_dotenv

load_dotenv()
API_KEY = os.getenv("POLYGON_API_KEY")
if not API_KEY:
    raise ValueError("❌ POLYGON_API_KEY not found in .env")

BASE = "https://api.massive.com"
NY_TZ = "America/New_York"
HEADERS = {"Authorization": f"Bearer {API_KEY}"}
os.makedirs("data", exist_ok=True)

In [7]:
# --- Cell 2: Black-Scholes helpers ---

def norm_cdf(x): 
    return 0.5 * (1.0 + math.erf(x / math.sqrt(2.0)))

def norm_pdf(x): 
    return 1.0 / math.sqrt(2*math.pi) * math.exp(-0.5 * x * x)

def bs_call_price(S, K, T, r, sigma):
    if T <= 0:
        return max(S - K, 0.0)
    if sigma <= 0:
        return max(S - K * math.exp(-r*T), 0.0)
    d1 = (math.log(S/K) + (r + 0.5*sigma*sigma)*T) / (sigma*math.sqrt(T))
    d2 = d1 - sigma*math.sqrt(T)
    return S * norm_cdf(d1) - K * math.exp(-r*T) * norm_cdf(d2)

def bs_put_price(S, K, T, r, sigma):
    if T <= 0:
        return max(K - S, 0.0)
    if sigma <= 0:
        return max(K * math.exp(-r*T) - S, 0.0)
    d1 = (math.log(S/K) + (r + 0.5*sigma*sigma)*T) / (sigma*math.sqrt(T))
    d2 = d1 - sigma*math.sqrt(T)
    return K * math.exp(-r*T) * norm_cdf(-d2) - S * norm_cdf(-d1)

def bs_vega(S, K, T, r, sigma):
    if T <= 0 or sigma <= 0 or S <= 0 or K <= 0:
        return None
    d1 = (math.log(S/K) + (r + 0.5*sigma*sigma)*T) / (sigma*math.sqrt(T))
    return S * norm_pdf(d1) * math.sqrt(T)

def implied_vol_call(S, K, T, r, price, max_iter=60, tol=1e-6):
    intrinsic = max(S - K * math.exp(-r*T), 0.0)
    if price is None or price <= intrinsic + 1e-4 or S <= 0 or K <= 0 or T <= 0:
        return None
    low, high = 1e-4, 3.0
    for _ in range(max_iter):
        mid = 0.5*(low+high)
        diff = bs_call_price(S, K, T, r, mid) - price
        if abs(diff) < tol:
            return mid
        if diff > 0:
            high = mid
        else:
            low = mid
    return 0.5*(low+high)

def implied_vol_put(S, K, T, r, price, max_iter=60, tol=1e-6):
    intrinsic = max(K * math.exp(-r*T) - S, 0.0)
    if price is None or price <= intrinsic + 1e-4 or S <= 0 or K <= 0 or T <= 0:
        return None
    low, high = 1e-4, 3.0
    for _ in range(max_iter):
        mid = 0.5*(low+high)
        diff = bs_put_price(S, K, T, r, mid) - price
        if abs(diff) < tol:
            return mid
        if diff > 0:
            high = mid
        else:
            low = mid
    return 0.5*(low+high)

In [8]:
# --- Cell 3: API helpers ---

def fetch_underlying_daily(sym, start_date, end_date):
    """
    Pull 1-day OHLC for the underlying for the whole window.
    """
    url = f"{BASE}/v2/aggs/ticker/{sym}/range/1/day/{start_date}/{end_date}"
    params = {"adjusted": "true", "sort": "asc", "limit": 50000}
    r = requests.get(url, params=params, headers=HEADERS, timeout=30)
    if r.status_code != 200:
        raise RuntimeError(f"{sym} daily {start_date}→{end_date}: {r.text[:200]}")
    j = r.json()
    rows = j.get("results", [])
    if not rows:
        return pd.DataFrame(columns=["date_ny","S_open","S_close"])
    df = pd.DataFrame(rows).rename(columns={"t":"ts","o":"open","c":"close"})
    df["ts_utc"] = pd.to_datetime(df["ts"], unit="ms", utc=True)
    df["ts_ny"]  = df["ts_utc"].dt.tz_convert(NY_TZ)
    df["date_ny"] = df["ts_ny"].dt.date
    df = df.sort_values("date_ny")
    return df[["date_ny","open","close"]].rename(columns={"open":"S_open","close":"S_close"})

def list_contracts_asof(sym, asof_date):
    """
    Get all option contracts for this underlying as of a certain date.
    """
    url = f"{BASE}/v3/reference/options/contracts"
    params = {
        "underlying_ticker": sym,
        "include_expired": "true",
        "as_of": pd.Timestamp(asof_date).strftime("%Y-%m-%d"),
        "limit": 1000,
        "order": "asc",
    }
    r = requests.get(url, params=params, headers=HEADERS, timeout=30)
    if r.status_code != 200:
        raise RuntimeError(f"{sym} contracts {asof_date}: {r.text[:200]}")
    return pd.DataFrame(r.json().get("results", []))

def pick_nearest_expiry_atm(df_contracts, spot, asof_date,
                            min_days_ahead=3, max_days_ahead=30):
    """
    From today's contract list, pick the nearest expiry (1-45D) and then pick ATM call + ATM put.
    """
    if df_contracts is None or df_contracts.empty:
        return None, None, None
    keep = df_contracts[["ticker","contract_type","strike_price","expiration_date"]].dropna()
    if keep.empty:
        return None, None, None
    keep["strike_price"] = keep["strike_price"].astype(float)
    keep["expiration_date"] = pd.to_datetime(keep["expiration_date"]).dt.date

    as_of = pd.Timestamp(asof_date).date()
    keep["days_to_exp"] = (keep["expiration_date"] - as_of).apply(lambda x: x.days)

    fwd = keep[(keep["days_to_exp"] >= min_days_ahead) & (keep["days_to_exp"] <= max_days_ahead)].copy()
    if fwd.empty:
        return None, None, None

    expiry = fwd.sort_values(["days_to_exp","expiration_date"]).iloc[0]["expiration_date"]
    near = fwd[fwd["expiration_date"] == expiry]

    calls = near[near["contract_type"]=="call"].copy()
    puts  = near[near["contract_type"]=="put"].copy()
    if calls.empty or puts.empty:
        return None, None, None

    calls["dist"] = (calls["strike_price"] - spot).abs()
    puts["dist"]  = (puts["strike_price"]  - spot).abs()

    call_info = calls.sort_values("dist").iloc[0].to_dict()
    put_info  = puts.sort_values("dist").iloc[0].to_dict()
    return expiry, call_info, put_info

def fetch_option_bar(opt_ticker, day):
    """
    Try daily first; if missing, fallback to minute and aggregate volume.
    """
    start_ymd = pd.Timestamp(day).strftime("%Y-%m-%d")
    end_ymd   = pd.Timestamp(day + dt.timedelta(days=1)).strftime("%Y-%m-%d")

    # 1) daily
    url_day = f"{BASE}/v2/aggs/ticker/{opt_ticker}/range/1/day/{start_ymd}/{end_ymd}"
    r = requests.get(url_day, headers=HEADERS, timeout=20)
    if r.status_code == 200:
        rows = r.json().get("results", [])
        if rows:
            x = rows[0]
            return {"volume": x.get("v", 0), "close": x.get("c", None)}

    # 2) fallback: minute
    url_min = f"{BASE}/v2/aggs/ticker/{opt_ticker}/range/1/minute/{start_ymd}/{end_ymd}"
    params = {"adjusted":"true","sort":"asc","limit":50000}
    r = requests.get(url_min, params=params, headers=HEADERS, timeout=30)
    rows = r.json().get("results", [])
    if not rows:
        return {"volume": 0, "close": None}
    vol = sum(row.get("v", 0) for row in rows)
    last_close = rows[-1].get("c", None)
    return {"volume": vol, "close": last_close}

In [9]:
# --- Cell 4: main daily builder ---

def build_daily_options(sym, start_date, end_date):
    """
    Original daily version:
    - daily underlying
    - contracts every day
    - ATM 1-45 DTE
    - option daily → minute fallback
    - compute IV + vega
    """
    print(f"\n[{sym}] DAILY build {start_date} → {end_date}")
    px = fetch_underlying_daily(sym, start_date, end_date)
    if px.empty:
        raise RuntimeError(f"No underlying daily for {sym}")

    rows = []

    for _, row in px.iterrows():
        day     = row["date_ny"]
        S_open  = float(row["S_open"])
        S_close = float(row["S_close"])

        # 1) contracts for this exact day
        try:
            dfc = list_contracts_asof(sym, day)
        except Exception as e:
            print(f"[{sym}] contracts fail {day}: {e}")
            continue

        expiry, call_info, put_info = pick_nearest_expiry_atm(
            dfc, S_close, day, min_days_ahead=1, max_days_ahead=45
        )
        if expiry is None:
            print(f"[{sym}] no suitable ATM on {day}")
            continue

        # 2) option bars
        call_bar = fetch_option_bar(call_info["ticker"], day)
        put_bar  = fetch_option_bar(put_info["ticker"],  day)

        call_vol   = call_bar["volume"]
        call_price = call_bar["close"]
        put_vol    = put_bar["volume"]
        put_price  = put_bar["close"]
        total_vol  = (call_vol or 0) + (put_vol or 0)

        # 3) time to expiry
        asof = pd.Timestamp(day).date()
        dte  = max((expiry - asof).days, 1)
        T    = dte / 365.0
        r    = 0.0

        # 4) IV + vega
        call_iv = call_vega = None
        if call_price is not None and call_price > 0.05:
            Kc = float(call_info["strike_price"])
            call_iv = implied_vol_call(S_close, Kc, T, r, call_price)
            if call_iv is None:
                call_iv = 0.20
            v = bs_vega(S_close, Kc, T, r, call_iv)
            call_vega = v/100 if v is not None else None

        put_iv = put_vega = None
        if put_price is not None and put_price > 0.05:
            Kp = float(put_info["strike_price"])
            put_iv = implied_vol_put(S_close, Kp, T, r, put_price)
            if put_iv is None:
                put_iv = 0.20
            v = bs_vega(S_close, Kp, T, r, put_iv)
            put_vega = v/100 if v is not None else None

        ivs = [x for x in [call_iv, put_iv] if x is not None]
        avg_iv = sum(ivs)/len(ivs) if ivs else None

        vegas = [x for x in [call_vega, put_vega] if x is not None]
        avg_vega = sum(vegas)/len(vegas) if vegas else None

        rows.append({
            "date_ny": day,
            "S_open": S_open,
            "S_close": S_close,
            "call_ticker": call_info["ticker"],
            "put_ticker":  put_info["ticker"],
            "strike_call": float(call_info["strike_price"]),
            "strike_put":  float(put_info["strike_price"]),
            "expiry": pd.Timestamp(expiry),
            "days_to_exp": dte,
            "call_vol": call_vol,
            "put_vol": put_vol,
            "total_vol": total_vol,
            "call_price": call_price,
            "put_price": put_price,
            "call_iv": call_iv,
            "put_iv": put_iv,
            "avg_iv": avg_iv,
            "call_vega": call_vega,
            "put_vega": put_vega,
            "avg_vega": avg_vega,
        })

        print(f"[{sym}] {day} | Cvol={call_vol} Pvol={put_vol} | Cpx={call_price} Ppx={put_price}")

        # tiny pause
        time.sleep(0.02)

    df_out = pd.DataFrame(rows).sort_values("date_ny")
    return df_out

In [10]:
# --- Cell 5: run for 1 ticker (10 min every 6 months) ---

"""
ranges = [
    ("2020-05-31","2020-11-27"),
    ("2020-11-28","2021-05-25"),
    ("2021-05-26","2021-11-21"),
    ("2021-11-22","2022-05-20"),
    ("2022-05-21","2022-11-15"),
    ("2022-11-16","2023-05-14"),
    ("2023-05-15","2023-11-10"),
    ("2023-11-11","2024-05-08"),
    ("2024-05-09","2024-11-04"),
    ("2024-11-05","2025-05-03"),
    ("2025-05-04","2025-10-30"),
    ("2025-10-31","2026-04-29")
]
tickers = [
    "SPY",      # S&P 500 ETF
    "NVDA",     # Nvidia
    "AAPL",     # Apple
    "MSFT",     # Microsoft
    "GOOG",     # Alphabet
    "AMZN",     # Amazon
    "AVGO",     # Broadcom
    "META",     # Meta
    "TSLA",     # Tesla
    "JPM",      # JPMorgan
    "WMT",      # Walmart
    "LLY",      # Eli Lilly
    "V",        # Visa
]
"""

sym = "AVGO"
end_date = "2020-11-27"
start_date = "2020-05-31"

df_daily = build_daily_options(sym, start_date, end_date)
out_path = f"data/{sym}_options_daily_{start_date}_{end_date}.csv"
df_daily.to_csv(out_path, index=False)

print(f"[{sym}] ✅ saved to {out_path} | rows={len(df_daily)}")
df_daily.tail()


[AVGO] DAILY build 2020-05-31 → 2020-11-27


KeyboardInterrupt: 