In [3]:
from google.colab import files
uploaded = files.upload()

Saving yahoo_cookie.txt to yahoo_cookie.txt


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
import pandas as pd
import requests
import os
import numpy as np, pandas as pd
from datetime import datetime, timezone
from math import sqrt, exp, log
from scipy.stats import norm
from scipy.optimize import brentq
from scipy.interpolate import griddata
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D  # noqa: F401

In [5]:
class YahooClient:
    def __init__(self, load_browser_cookies=True, cookie_str: str | None = None):
        self.s = requests.Session()
        self.s.headers.update({
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
                          "(KHTML, like Gecko) Chrome/127.0 Safari/537.36",
            "Accept": "application/json, text/javascript, */*; q=0.01",
            "Accept-Language": "en-US,en;q=0.9",
            "Connection": "keep-alive",
            "Sec-Fetch-Site": "same-origin",
            "Sec-Fetch-Mode": "cors",
            "Sec-Fetch-Dest": "empty",
            "Pragma": "no-cache",
            "Cache-Control": "no-cache",
        })
        self.crumb = None

        # Allow passing a manual cookie string if browser extraction fails
        if not cookie_str:
            cookie_str = os.environ.get("YAHOO_COOKIE") or None
        if not cookie_str:
            cookie_path = "/content/yahoo_cookie.txt"
            if os.path.isfile(cookie_path):
              with open(cookie_path, "r", encoding="utf-8") as f:
                cookie_str = f.read().strip()
            else:
              print("No cookie found...")
        if cookie_str:
            for kv in cookie_str.split(";"):
                if "=" in kv:
                    k, v = kv.split("=", 1)
                    self.s.cookies.set(k.strip(), v.strip(), domain=".yahoo.com")

        if load_browser_cookies and not cookie_str:
            try:
                import browser_cookie3
            except Exception:
                browser_cookie3 = None
            if browser_cookie3:
                for loader in ("chrome", "edge", "firefox"):
                    try:
                        cj = getattr(browser_cookie3, loader)(domain_name=".yahoo.com")
                        self.s.cookies.update(cj)
                        break
                    except Exception:
                        continue

    def warmup(self, ticker="JPM"):
        pages = [
            f"https://finance.yahoo.com/quote/{ticker}/options?p={ticker}",
            f"https://finance.yahoo.com/quote/{ticker}?p={ticker}",
            "https://finance.yahoo.com/",
        ]
        for u in pages:
            try:
                r = self.s.get(u, timeout=12, allow_redirects=True)
                if r.status_code == 200:
                    break
            except Exception:
                pass

        for cu in (
            "https://query1.finance.yahoo.com/v1/test/getcrumb",
            "https://query2.finance.yahoo.com/v1/test/getcrumb",
        ):
            try:
                c = self.s.get(cu, timeout=12)
                if c.ok and c.text.strip():
                    self.crumb = c.text.strip()
                    break
            except Exception:
                continue

    def _get_json(self, base_url, params=None, referer=None, retries=2):
        for _ in range(retries + 1):
            p = dict(params or {})
            if self.crumb:
                p["crumb"] = self.crumb
            headers = {"Accept": "application/json, text/javascript, */*; q=0.01"}
            if referer:
                headers["Referer"] = referer

            r = self.s.get(base_url, params=p, headers=headers, timeout=12, allow_redirects=True)

            if r.status_code in (401, 403, 404, 999):
                self.warmup()
                continue

            r.raise_for_status()
            return r.json()
        raise RuntimeError(f"Yahoo request failed after retries: {base_url}")

    def get_expirations(self, ticker: str) -> list[str]:
        t = normalize_ticker(ticker)
        self.warmup(t)
        ref = f"https://finance.yahoo.com/quote/{t}/options?p={t}"
        for host in ("https://query2.finance.yahoo.com", "https://query1.finance.yahoo.com"):
            try:
                j = self._get_json(f"{host}/v7/finance/options/{t}", referer=ref)
                res = j.get("optionChain", {}).get("result", [])
                if res:
                    exp_unix = res[0].get("expirationDates", [])
                    return [pd.to_datetime(ts, unit="s").strftime("%Y-%m-%d") for ts in exp_unix]
            except Exception:
                continue
        return []

    def get_chain(self, ticker: str, expiry: str):
        t = normalize_ticker(ticker)
        ref = f"https://finance.yahoo.com/quote/{t}/options?p={t}"
        exps = self.get_expirations(t)
        if not exps:
            raise ValueError("No expirations returned by Yahoo (blocked or symbol not optionable).")
        exp_map = {pd.to_datetime(e).strftime("%Y-%m-%d"): int(pd.Timestamp(e).timestamp()) for e in exps}
        if expiry not in exp_map:
            raise ValueError(f"Requested expiry {expiry} not in Yahoo list.")
        epoch = exp_map[expiry]

        j = None
        for host in ("https://query2.finance.yahoo.com", "https://query1.finance.yahoo.com"):
            try:
                j = self._get_json(f"{host}/v7/finance/options/{t}", params={"date": epoch}, referer=ref)
                break
            except Exception:
                continue
        if j is None:
            raise RuntimeError("Failed to fetch chain from Yahoo.")

        result = j.get("optionChain", {}).get("result", [])
        if not result:
            raise ValueError("Empty optionChain result.")
        opts = result[0].get("options", [])
        if not opts:
            return pd.DataFrame()

        o = opts[0]
        calls = pd.DataFrame(o.get("calls", []))
        puts  = pd.DataFrame(o.get("puts", []))
        if calls.empty and puts.empty:
            return pd.DataFrame()
        calls["type"] = "call"
        puts["type"]  = "put"
        df = pd.concat([calls, puts], ignore_index=True)
        if "lastTradeDate" in df.columns:
            df["lastTradeDate"] = pd.to_datetime(df["lastTradeDate"], unit="s", errors="coerce")

        # Convenience columns
        for col in ["bid", "ask", "lastPrice", "strike", "impliedVolatility"]:
            if col in df.columns:
                df[col] = pd.to_numeric(df[col], errors="coerce")
        if {"bid","ask"}.issubset(df.columns):
            df["mid"] = (df["bid"] + df["ask"]) / 2
            df["spread"] = df["ask"] - df["bid"]
        if "impliedVolatility" in df.columns:
            df["IV_%"] = df["impliedVolatility"] * 100

        # Order columns nicely
        order = [
            "type","contractSymbol","strike","lastPrice","bid","ask","mid","spread",
            "change","percentChange","volume","openInterest","IV_%","impliedVolatility",
            "inTheMoney","lastTradeDate","contractSize","currency"
        ]
        cols = [c for c in order if c in df.columns] + [c for c in df.columns if c not in order]
        return df[cols] if not df.empty else df


def normalize_ticker(t: str) -> str:
    t = (t or "").strip().upper()
    if "." in t and not t.endswith(".HK"):
        t = t.replace(".", "-")
    return t


def fetch_all_option_chains(ticker: str) -> pd.DataFrame:
    yc = YahooClient()
    expirations = yc.get_expirations(ticker)
    if not expirations:
        return pd.DataFrame()
    frames: list[pd.DataFrame] = []
    for exp in expirations:
        try:
            df = yc.get_chain(ticker, exp)
            if not df.empty:
                df = df.copy()
                df["expiry"] = exp
                frames.append(df)
        except Exception:
            continue
    if not frames:
        return pd.DataFrame()
    combined = pd.concat(frames, ignore_index=True)
    # Columns already normalized in get_chain; ensure order with expiry first
    order = [
        "expiry","type","contractSymbol","strike","lastPrice","bid","ask","mid","spread",
        "change","percentChange","volume","openInterest","IV_%","impliedVolatility",
        "inTheMoney","lastTradeDate","contractSize","currency"
    ]
    cols = [c for c in order if c in combined.columns] + [c for c in combined.columns if c not in order]
    return combined[cols]


In [6]:

# === Basic usage ===

# Choose your symbol
TICKER = "AAPL"

yc = YahooClient()

# 1) List expirations
exps = yc.get_expirations(TICKER)
print("First few expirations:", exps[:5])

# 2) Pick one expiry (use first if unsure)
if exps:
    EXPIRY = exps[0]
    chain = yc.get_chain(TICKER, EXPIRY)
    display(chain.head(10))
else:
    print("No expirations returned. Yahoo may be blocking requests or symbol is not optionable.")


First few expirations: ['2025-11-14', '2025-11-21', '2025-11-28', '2025-12-05', '2025-12-12']


Unnamed: 0,type,contractSymbol,strike,lastPrice,bid,ask,mid,spread,change,percentChange,volume,openInterest,IV_%,impliedVolatility,inTheMoney,lastTradeDate,contractSize,currency,expiration
0,call,AAPL251114C00110000,110.0,161.1,157.15,160.0,158.575,2.85,0.0,0.0,4.0,1,273.82844,2.738284,True,2025-11-05 19:11:51,REGULAR,USD,1763078400
1,call,AAPL251114C00120000,120.0,147.94,147.05,149.95,148.5,2.9,-3.459992,-2.285331,1.0,4,218.750453,2.187505,True,2025-11-07 19:14:29,REGULAR,USD,1763078400
2,call,AAPL251114C00130000,130.0,141.13,136.6,140.1,138.35,3.5,0.0,0.0,6.0,1,341.357568,3.413576,True,2025-11-05 19:11:51,REGULAR,USD,1763078400
3,call,AAPL251114C00140000,140.0,130.5,127.05,130.1,128.575,3.05,0.0,0.0,1.0,1,203.90674,2.039067,True,2025-10-31 14:08:20,REGULAR,USD,1763078400
4,call,AAPL251114C00145000,145.0,125.5,121.85,124.8,123.325,2.95,0.0,0.0,2.0,3,284.277633,2.842776,True,2025-10-31 14:08:20,REGULAR,USD,1763078400
5,call,AAPL251114C00150000,150.0,119.41,116.95,119.95,118.45,3.0,0.0,0.0,,1,276.513981,2.76514,True,2025-11-05 16:07:08,REGULAR,USD,1763078400
6,call,AAPL251114C00155000,155.0,116.77,111.85,115.0,113.425,3.15,0.0,0.0,1.0,1,264.941744,2.649417,True,2025-10-31 19:01:36,REGULAR,USD,1763078400
7,call,AAPL251114C00160000,160.0,115.65,106.85,110.1,108.475,3.25,0.0,0.0,1.0,1,125.000375,1.250004,True,2025-10-31 13:31:51,REGULAR,USD,1763078400
8,call,AAPL251114C00165000,165.0,105.25,101.85,104.95,103.4,3.1,0.0,0.0,4.0,4,237.451578,2.374516,True,2025-10-31 13:40:46,REGULAR,USD,1763078400
9,call,AAPL251114C00170000,170.0,98.91,96.85,99.85,98.35,3.0,0.0,0.0,6.0,6,221.826617,2.218266,True,2025-10-28 15:23:23,REGULAR,USD,1763078400


In [7]:

# === Fetch all expirations for a ticker and combine into one DataFrame ===

ALL = fetch_all_option_chains(TICKER)
print(f"Combined rows: {len(ALL)}")
display(ALL.head(1000))


Combined rows: 2262


Unnamed: 0,expiry,type,contractSymbol,strike,lastPrice,bid,ask,mid,spread,change,percentChange,volume,openInterest,IV_%,impliedVolatility,inTheMoney,lastTradeDate,contractSize,currency,expiration
0,2025-11-14,call,AAPL251114C00110000,110.0,161.10,157.15,160.00,158.575,2.85,0.000000,0.000000,4.0,1.0,273.828440,2.738284,True,2025-11-05 19:11:51,REGULAR,USD,1763078400
1,2025-11-14,call,AAPL251114C00120000,120.0,147.94,147.05,149.95,148.500,2.90,-3.459992,-2.285331,1.0,4.0,218.750453,2.187505,True,2025-11-07 19:14:29,REGULAR,USD,1763078400
2,2025-11-14,call,AAPL251114C00130000,130.0,141.13,136.60,140.10,138.350,3.50,0.000000,0.000000,6.0,1.0,341.357568,3.413576,True,2025-11-05 19:11:51,REGULAR,USD,1763078400
3,2025-11-14,call,AAPL251114C00140000,140.0,130.50,127.05,130.10,128.575,3.05,0.000000,0.000000,1.0,1.0,203.906740,2.039067,True,2025-10-31 14:08:20,REGULAR,USD,1763078400
4,2025-11-14,call,AAPL251114C00145000,145.0,125.50,121.85,124.80,123.325,2.95,0.000000,0.000000,2.0,3.0,284.277633,2.842776,True,2025-10-31 14:08:20,REGULAR,USD,1763078400
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,2026-03-20,call,AAPL260320C00240000,240.0,37.75,37.90,38.20,38.050,0.30,-2.029999,-5.103064,64.0,12608.0,34.451949,0.344519,True,2025-11-07 20:43:39,REGULAR,USD,1773964800
996,2026-03-20,call,AAPL260320C00250000,250.0,30.50,30.50,30.80,30.650,0.30,-1.450001,-4.538343,2523.0,12246.0,32.596499,0.325965,True,2025-11-07 20:37:11,REGULAR,USD,1773964800
997,2026-03-20,call,AAPL260320C00260000,260.0,23.84,23.85,24.10,23.975,0.25,-1.209999,-4.830336,384.0,9898.0,30.914997,0.309150,True,2025-11-07 20:46:06,REGULAR,USD,1773964800
998,2026-03-20,call,AAPL260320C00270000,270.0,18.17,18.10,18.25,18.175,0.15,-0.830000,-4.368421,655.0,11528.0,29.456272,0.294563,False,2025-11-07 20:59:21,REGULAR,USD,1773964800


In [None]:
# Save the combined DataFrame to a CSV file in Google Drive
try:
    ALL.to_csv('/content/drive/MyDrive/Colab Notebooks/VolSurf_ML/combined_options_data.csv', index=False)
    print("Successfully saved combined_options_data.csv to Google Drive.")
except Exception as e:
    print(f"Error saving file to Google Drive: {e}")
    print("Make sure you have mounted your Google Drive correctly.")

Successfully saved combined_options_data.csv to Google Drive.


In [10]:
try:
    ALL.to_parquet('/content/drive/MyDrive/Colab Notebooks/VolSurf_ML/combined_options_data.parquet', index=False)
    print("Successfully saved combined_options_data.parquet to Google Drive.")
except Exception as e:
    print(f"Error saving file to Google Drive: {e}")
    print("Make sure you have mounted your Google Drive correctly.")

Successfully saved combined_options_data.parquet to Google Drive.


In [9]:
# Download historical data for the last year
ticker_data = yf.download(TICKER, period="1y")

# Calculate daily returns
ticker_data['Daily_Return'] = ticker_data[('Close', TICKER)].pct_change()

# Calculate realized volatility (annualized)
# Assuming 252 trading days in a year
realized_volatility = ticker_data['Daily_Return'].std() * np.sqrt(252)

print(f"Realized Volatility for {TICKER} over the last year: {realized_volatility:.4f}")

#ticker_data.to_csv('/content/drive/MyDrive/Colab Notebooks/VolSurf_ML/ticker_data.csv', index=False)

NameError: name 'yf' is not defined

In [11]:
hist_price = yf.Ticker(TICKER).history(period="1y")
print(f'Hist price for {TICKER} over 1 year {hist_price}')


NameError: name 'yf' is not defined