### Idiosyncratic Volatility Calculation

In [41]:
import pandas as pd
import polars as pl
import polars_ols as pls
import yfinance as yf

In [42]:
tickers = ["AAPL", "GOOGL", "NVDA", "TSLA"]
start_date = "1976-01-01"
end_date = "2024-12-31"
window = 252

In [43]:
# Download
assets = yf.download(tickers, start_date, end_date)
market = yf.download(["SPY"], start_date, end_date)

[*********************100%***********************]  4 of 4 completed
[*********************100%***********************]  1 of 1 completed


In [44]:
def clean(df: pd.DataFrame) -> pl.DataFrame:
    # Fix multi index
    df = df.stack(future_stack=True).reset_index()

    # Lower case columns
    df = df.rename(columns={col: col.lower() for col in df.columns})

    # Compute Returns
    df["return"] = df.groupby("ticker")["close"].pct_change()

    # Drop NaN
    df = df.dropna()

    # Keep columns
    df = df[["date", "ticker", "return"]]

    return pl.from_pandas(df)

In [45]:
# Clean datasets
assets = clean(assets)
market = clean(market).drop("ticker").rename({"return": "market_return"})

# Merge
df = assets.join(market, on="date", how="left")

  df["return"] = df.groupby("ticker")["close"].pct_change()


In [None]:
# Compute regression terms
df = (
    df
    # Run rolling ols regression
    .with_columns(
        pl.col("return")
        .least_squares.rolling_ols(
            pl.col("market_return"),
            add_intercept=True,
            mode="coefficients",
            window_size=window,
            min_periods=window,
        )
        .over("ticker")
        .alias("results")
    )
    # Get results
    .with_columns(
        pl.col("results").struct.field("const"),
        pl.col("results").struct.field("market_return").alias("coefficient"),
    )
    # Drop results column
    .drop("results")
    # Drop nulls
    .drop_nulls(["const", "coefficient"]).sort(["ticker", "date"])
)

In [None]:
# Calculate residual volatility
df = (
    df
    # Compute residual
    .with_columns(
        (
            pl.col("return") - pl.col("const") - pl.col("coefficient") * pl.col("market_return")
        ).alias("residual")
    )
    # Compute volatility
    .with_columns(
        pl.col("residual")
        .rolling_std(window_size=window)
        .over("ticker")
        .alias("idiosyncrati_volatility")
    )
)

In [48]:
print(df)

shape: (0, 8)
┌──────────────┬────────┬────────┬───────────────┬───────┬─────────────┬──────────┬────────────────┐
│ date         ┆ ticker ┆ return ┆ market_return ┆ const ┆ coefficient ┆ residual ┆ idiosyncrati_v │
│ ---          ┆ ---    ┆ ---    ┆ ---           ┆ ---   ┆ ---         ┆ ---      ┆ olatility      │
│ datetime[ns] ┆ str    ┆ f64    ┆ f64           ┆ f64   ┆ f64         ┆ f64      ┆ ---            │
│              ┆        ┆        ┆               ┆       ┆             ┆          ┆ f64            │
╞══════════════╪════════╪════════╪═══════════════╪═══════╪═════════════╪══════════╪════════════════╡
└──────────────┴────────┴────────┴───────────────┴───────┴─────────────┴──────────┴────────────────┘
