### Idiosyncratic Volatility Calculation

In [2]:
import pandas as pd
import polars as pl
import polars_ols as pls

In [3]:
start_date = "1976-01-01"
end_date = "2024-12-31"
window = 252

In [4]:
df = pd.read_csv("daily_crsp_1976_andOn.csv")

In [5]:
# Asset-level variables
asset_df = df[["permno", "date", "stock_ret"]].assign(permno=lambda x: x["permno"].astype(str))

# Market-level variables
market_df = df[["date", "mkt_ret"]].drop_duplicates().reset_index(drop=True)

In [6]:
asset_df.head()

Unnamed: 0,permno,date,stock_ret
0,10000,1986-01-07,
1,10000,1986-01-08,-0.02439
2,10000,1986-01-09,0.0
3,10000,1986-01-10,0.0
4,10000,1986-01-13,0.05


In [7]:
market_df.head()

Unnamed: 0,date,mkt_ret
0,1986-01-07,0.0138
1,1986-01-08,-0.02075
2,1986-01-09,-0.011315
3,1986-01-10,4.7e-05
4,1986-01-13,0.00268


In [8]:
def clean(df: pd.DataFrame) -> pl.DataFrame:
    # Fix multi index
    df = df.stack(future_stack=True).reset_index()

    # Lower case columns
    df = df.rename(columns={col: col.lower() for col in df.columns})

    # Compute Returns
    df["return"] = df.groupby("ticker")["close"].pct_change()

    # Drop NaN
    df = df.dropna()

    # Keep columns
    df = df[["date", "ticker", "return"]]

    return pl.from_pandas(df)

In [9]:
# Clean datasets
assets = clean(assets)
market = clean(market).drop("ticker").rename({"return": "market_return"})

# Merge
df = assets.join(market, on="date", how="left")

NameError: name 'assets' is not defined

In [None]:
asset_df["permno"] = asset_df["permno"].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  asset_df['permno'] = asset_df['permno'].astype(str)


In [None]:
# Clean datasets
assets = clean(asset_df)
market = clean(market_df).drop("ticker")

# Merge
df = assets.join(market, on="date", how="left")

AttributeError: 'int' object has no attribute 'lower'

In [None]:
df

In [None]:
# Compute regression terms
df = (
    df
    # Run rolling ols regression
    .with_columns(
        pl.col("return")
        .least_squares.rolling_ols(
            pl.col("market_return"),
            add_intercept=True,
            mode="coefficients",
            window_size=window,
            min_periods=window,
        )
        .over("ticker")
        .alias("results")
    )
    # Get results
    .with_columns(
        pl.col("results").struct.field("const"),
        pl.col("results").struct.field("market_return").alias("coefficient"),
    )
    # Drop results column
    .drop("results")
    # Drop nulls
    .drop_nulls(["const", "coefficient"]).sort(["ticker", "date"])
)

In [None]:
# Calculate residual volatility
df = (
    df
    # Compute residual
    .with_columns(
        (
            pl.col("return") - pl.col("const") - pl.col("coefficient") * pl.col("market_return")
        ).alias("residual")
    )
    # Compute volatility
    .with_columns(
        pl.col("residual")
        .rolling_std(window_size=window)
        .over("ticker")
        .alias("idiosyncrati_volatility")
    )
)

In [None]:
print(df)

shape: (22_333, 8)
┌─────────────┬────────┬───────────┬─────────────┬───────────┬────────────┬───────────┬────────────┐
│ date        ┆ ticker ┆ return    ┆ market_retu ┆ const     ┆ coefficien ┆ residual  ┆ idiosyncra │
│ ---         ┆ ---    ┆ ---       ┆ rn          ┆ ---       ┆ t          ┆ ---       ┆ ti_volatil │
│ datetime[ns ┆ str    ┆ f64       ┆ ---         ┆ f64       ┆ ---        ┆ f64       ┆ ity        │
│ ]           ┆        ┆           ┆ f64         ┆           ┆ f64        ┆           ┆ ---        │
│             ┆        ┆           ┆             ┆           ┆            ┆           ┆ f64        │
╞═════════════╪════════╪═══════════╪═════════════╪═══════════╪════════════╪═══════════╪════════════╡
│ 1994-01-27  ┆ AAPL   ┆ 0.018659  ┆ 0.009248    ┆ -0.002165 ┆ 1.265743   ┆ 0.009119  ┆ null       │
│ 00:00:00    ┆        ┆           ┆             ┆           ┆            ┆           ┆            │
│ 1994-01-28  ┆ AAPL   ┆ -0.003666 ┆ 0.002618    ┆ -0.002265 ┆ 1.245571 