In [None]:
## import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LassoCV
from sklearn.metrics import mean_squared_error
import zipfile

import statsmodels.api as sm
from statsmodels.regression.rolling import RollingOLS


sns.set_style("darkgrid")
pd.plotting.register_matplotlib_converters()
%matplotlib inline

In [None]:
## data extraction
zip_path = "Data-20240401.zip"

with zipfile.ZipFile(zip_path, 'r') as zip:
    zip.extractall()

FF5 = pd.read_csv('FF5_2000_2022.csv')
CRSP = pd.read_csv('CRSP_2000_2022.csv')
PS = pd.read_csv('PS_2000_2022.csv')
HXZ5 = pd.read_csv('HXZ5_2000_2022.csv')

In [None]:
# Data preparation, define df = dataframe with all the factors
df = (FF5
            .merge(HXZ5, on='YYYYMM')
            .drop(columns=['R_MKT','R_F','RF'])
            .merge(PS, on='YYYYMM',how='left')
            .set_index('YYYYMM')
)
exog_vars = ['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'R_ME', 'R_IA', 'R_ROE', 'R_EG', 'AggLiq', 'eq8', 'LIQ_V']
df.tail()

In [None]:
from sklearn.linear_model import LassoCV

# Data preparation
y = CRSP.loc[CRSP['PERMNO'] == 10324, 'MthRet'].set_axis(CRSP.loc[CRSP['PERMNO'] == 10324, 'YYYYMM'])
exog = df.loc[y.index, exog_vars]

# Rolling window regression
window_size = 60
n_periods = len(y)
n_windows = n_periods - window_size + 1
coefficients = []

for i in range(n_windows):
    X_train = exog.iloc[i:i+window_size]
    y_train = y.iloc[i:i+window_size]
    
    # Fit LassoCV model
    model = LassoCV(cv=5)
    model.fit(X_train, y_train)
    
    # Store coefficients
    coefficients.append(model.coef_)

# Convert coefficients to DataFrame
coefficients_df = pd.DataFrame(coefficients, columns=exog_vars)

# Plot coefficients
coefficients_df.plot(figsize=(12, 6))
plt.xlabel('Window Index')
plt.ylabel('Coefficient Value')
plt.title('Rolling Window Regression - Lasso')
plt.legend(loc='upper right')
plt.show()
