In [None]:
import sys

sys.path.append("../..")
from config import set_project_root

set_project_root(levels_up=3)

In [None]:
import pandas as pd
import numpy as np
from research.datasets import CRSP
import statsmodels.formula.api as smf

In [None]:
# Read in crsp monthly data

df = CRSP().df.copy()

df.head()

In [None]:
# In sample
start = np.datetime64("1929-01-01")
end = np.datetime64("1982-12-31")

df = df[(df["date"] >= start) & (df["date"] <= end)]

df = df.reset_index(drop=True)

df

In [None]:
# Cleaning

df = df[["permno", "ticker", "date", "prc", "ret"]].copy()

df["mdt"] = df["date"].dt.strftime("%Y-%m")
df["month"] = df["date"].dt.month
df["year"] = df["date"].dt.year


df.head()

In [None]:
# Target Variable
df["ret_60_mean"] = df.groupby("permno")["ret"].rolling(59, 59).mean().reset_index(drop=True)
df["ret_60_mean"] = df.groupby("permno")["ret_60_mean"].shift(-1)

df["xs_ret"] = df["ret"] - df["ret_60_mean"]


# Explanatory Variables
def create_lags(df, lags, column="ret"):
    for lag in lags:
        df[f"{column}_lag_{lag}"] = df.groupby("permno")[column].shift(lag)
    return df


lags = list(range(1, 13)) + [24, 36]

df = create_lags(df, lags)
df = df.dropna()

df

In [None]:
formula = "xs_ret ~ " + " + ".join([f"ret_lag_{lag}" for lag in lags])

mdts = df["mdt"].unique()

result_df_list = []

for mdt in mdts:
    # Filter data for the current year
    slice_df = df[df["mdt"] == mdt].copy()

    # Fit the OLS regression model using the formula syntax
    model = smf.ols(formula=formula, data=slice_df)
    result = model.fit()

    result_df = pd.DataFrame()

    result_df["name"] = ["intercept"] + [f"ret_lag_{lag}" for lag in lags]
    result_df["mdt"] = mdt
    result_df["coef"] = result.params.values
    result_df["t_stat"] = result.tvalues.values

    result_df_list.append(result_df)

# Display the resulting DataFrame
combined = pd.concat(result_df_list)

combined

In [None]:
combined[["name", "coef", "t_stat"]].groupby("name").mean().T