In [213]:
import numpy as np
import pandas as pd
from QuantConnect.Research import QuantBook
import statsmodels.api as sm

In [214]:
ff_data_path = "Additional Research Data/F-F_Research_Data_Factors_daily_CSV/F-F_Research_Data_Factors_daily.CSV"
col_names = ["date", "Mkt", "SMB", "HML", "RF"]
ff_factors = pd.read_csv(ff_data_path, names = col_names, index_col="date", skiprows=5, header=None, skipfooter=1, delimiter=",", engine="python")
ff_factors.index = pd.to_datetime(ff_factors.index.astype(str), format="%Y%m%d").date

In [215]:
ff_factors.loc[:, "Mkt":"HML"] /= 100
ff_factors

Unnamed: 0,Mkt,SMB,HML,RF
1926-07-01,0.0010,-0.0025,-0.0027,0.009
1926-07-02,0.0045,-0.0033,-0.0006,0.009
1926-07-06,0.0017,0.0030,-0.0039,0.009
1926-07-07,0.0009,-0.0058,0.0002,0.009
1926-07-08,0.0021,-0.0038,0.0019,0.009
...,...,...,...,...
2024-12-24,0.0111,-0.0009,-0.0005,0.017
2024-12-26,0.0002,0.0104,-0.0019,0.017
2024-12-27,-0.0117,-0.0066,0.0056,0.017
2024-12-30,-0.0109,0.0012,0.0074,0.017


In [216]:
quant_book = QuantBook()

20250209 22:01:48.614 TRACE:: Config.GetValue(): qb-data-hour - Using default value: 9
20250209 22:01:48.624 TRACE:: Config.Get(): Configuration key not found. Key: lean-manager-type - Using default value: LocalLeanManager
20250209 22:01:48.625 TRACE:: Config.Get(): Configuration key not found. Key: data-permission-manager - Using default value: DataPermissionManager
20250209 22:01:48.627 TRACE:: Config.GetValue(): zip-data-cache-provider - Using default value: 10
20250209 22:01:48.627 TRACE:: Config.Get(): Configuration key not found. Key: fundamental-data-provider - Using default value: CoarseFundamentalDataProvider
20250209 22:01:48.628 TRACE:: Config.GetValue(): algorithm-manager-time-loop-maximum - Using default value: 20
20250209 22:01:48.628 TRACE:: Config.GetValue(): storage-limit - Using default value: 10737418240
20250209 22:01:48.629 TRACE:: Config.GetValue(): storage-permissions - Using default value: 3
20250209 22:01:48.629 TRACE:: LocalObjectStore.Initialize(): Storage Ro

In [217]:
start_date = datetime(2010, 1, 1)
end_date = datetime(2024,1,1)
value_stocks = ["BRK-B", "VZ", "F"]
growth_stocks = ["TSLA", "AMZN", "NVDA"]
small_caps = ["ROKU", "ETSY", "BYND"]
large_caps = ["AAPL", "MSFT", "GOOGL"]

In [218]:
def fetch_stock_data(tickers, category_name):
    data = {}
    for ticker in tickers :
        symbol = quant_book.add_equity(ticker).symbol
        history = quant_book.history(symbol, start_date, end_date, Resolution.Daily)
        data[ticker] = history
        print(f"Download data for {ticker} ({category_name})")
    return data

In [219]:
value_data = fetch_stock_data(value_stocks, "Value Stocks")
growth_data = fetch_stock_data(growth_stocks, "Growth Stocks")
small_cap_data = fetch_stock_data(small_caps, "Small Caps")
large_cap_data = fetch_stock_data(large_caps, "Large Caps")

Download data for BRK-B (Value Stocks)
Download data for VZ (Value Stocks)
Download data for F (Value Stocks)
Download data for TSLA (Growth Stocks)
Download data for AMZN (Growth Stocks)
Download data for NVDA (Growth Stocks)
Download data for ROKU (Small Caps)
Download data for ETSY (Small Caps)
Download data for BYND (Small Caps)
Download data for AAPL (Large Caps)
Download data for MSFT (Large Caps)
Download data for GOOGL (Large Caps)


In [220]:
large_cap_data["AAPL"]["close"].head()

symbol  time               
AAPL    2010-01-04 16:00:00    6.588835
        2010-01-05 16:00:00    6.594988
        2010-01-06 16:00:00    6.490086
        2010-01-07 16:00:00    6.478088
        2010-01-08 16:00:00    6.521156
Name: close, dtype: float64

In [221]:
aapl_data = large_cap_data["AAPL"].droplevel(0)
aapl_data.index = pd.to_datetime(aapl_data.index).date
aapl_data = aapl_data.pct_change().dropna()
aapl_data

Unnamed: 0,close,high,low,open,volume
2010-01-05,0.000934,0.005082,0.004096,0.005807,0.230321
2010-01-06,-0.015906,-0.001670,-0.011723,-0.002002,-0.074314
2010-01-07,-0.001849,-0.015007,-0.008019,-0.012502,-0.137719
2010-01-08,0.006648,0.000000,0.000000,-0.005622,-0.088583
2010-01-11,-0.008822,0.004717,-0.002918,0.010974,0.069735
...,...,...,...,...,...
2021-03-26,0.005141,-0.001480,-0.000672,0.007872,-0.043865
2021-03-29,0.001485,0.009055,0.015136,0.010968,-0.168643
2021-03-30,-0.012274,-0.009708,-0.015408,-0.013643,0.111270
2021-03-31,0.018766,0.017712,0.019266,0.013666,0.303189


In [222]:
def run_regression(start_date, end_date, ff_factors, stock_data):
    stock_data_view = stock_data["close"].loc[start_date:end_date]
    rfr_view = ff_factors.loc[start_date:end_date, "RF"]
    ff_factors_view = ff_factors.loc[start_date:end_date, "Mkt":"HML"]
    rfr_stock_view = stock_data_view - rfr_view
    regression_df = pd.concat([ff_factors_view, rfr_stock_view], axis = 1)
    regression_df.columns = ["Mkt", "SMB", "HML", "Stock"]
    X = regression_df.loc[:,"Mkt":"HML"]
    y = regression_df.loc[:, "Stock"]
    X = sm.add_constant(X)
    model = sm.OLS(y, X).fit()
    return model.summary()



In [223]:
start_date = pd.to_datetime("2010-01-05").date()
end_date = pd.to_datetime("2021-01-01").date()

In [224]:
summary = run_regression(start_date, end_date, ff_factors, aapl_data)
print(summary)

                            OLS Regression Results                            
Dep. Variable:                  Stock   R-squared:                       0.465
Model:                            OLS   Adj. R-squared:                  0.465
Method:                 Least Squares   F-statistic:                     801.8
Date:                Sun, 09 Feb 2025   Prob (F-statistic):               0.00
Time:                        22:01:53   Log-Likelihood:                 8041.3
No. Observations:                2768   AIC:                        -1.607e+04
Df Residuals:                    2764   BIC:                        -1.605e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0015      0.000     -6.030      0.0