In [None]:
## import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LassoCV
from sklearn.metrics import mean_squared_error
import zipfile

In [None]:
## data extraction
zip_path = "Data-20240401.zip"

with zipfile.ZipFile(zip_path, 'r') as zip:
    zip.extractall()

# Monthly stock returns
CRSP = pd.read_csv('CRSP_2000_2022.csv')
# Fama-French 5 factor model
FF5 = pd.read_csv('FF5_2000_2022.csv')
# Pastor Stambaugh Liquidity factor
PS = pd.read_csv('PS_2000_2022.csv')
# Hou, Xue, Zhang q factor
HXZ5 = pd.read_csv('HXZ5_2000_2022.csv')

print(HXZ5.head())

print(FF5.head())
print(PS.head)

In [None]:
# Fama-MacBeth regression
from linearmodels import FamaMacBeth
import statsmodels.api as sm

# Mutating Date column
CRSP['Date'] = pd.to_datetime(CRSP['YYYYMM'], format='%Y%m')

# Performing left joins
ALLDATA = pd.merge(CRSP, FF5, on='YYYYMM', how='left')
ALLDATA = pd.merge(ALLDATA, HXZ5, on='YYYYMM', how='left')

# Performing inner join
ALLDATA = pd.merge(ALLDATA, PS, on='YYYYMM', how='inner')

# Dropping R_MKT column
ALLDATA = ALLDATA.drop('R_MKT', axis=1)

# Prepare the data for the regression
data = ALLDATA[['PERMNO', 'Date', 'Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']].dropna()

# Set the dependent variable
y = data['Mkt-RF']

# Set the independent variables
#X = data[['SMB', 'HML', 'RMW', 'CMA']]
X = data[['SMB', 'HML']]
print(X)

# Set the index of the DataFrame to a 2-level MultiIndex
data.set_index(['PERMNO', 'Date'], inplace=True)

# Add a constant column to the independent variables
X = sm.add_constant(X)

# Run the Fama-MacBeth regression
fmb = FamaMacBeth(y, X)

# Print the regression results
print(fmb.fit().summary())