In [1]:
%pip install numpy
%pip install pandas
%pip install matplotlib
%pip install yfinance
%pip install statsmodels

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf

In [3]:
df_stats = pd.read_csv("stock_data.csv")
df_hist = pd.read_csv("historical_data.csv", header = [0, 1], index_col = 0, parse_dates = True)
df_hist = df_hist.xs("Close", axis = 1, level = 1)

In [4]:
df_stats.head()

In [None]:
df_hist.head()

In [None]:
"""

Call if you want to redownload data with different tickers or time range

tickers = df_stats["Symbol"].map(lambda x: x.split(" ")[0])
df_hist = yf.download(list(tickers.values), period = "max", interval = "1d", group_by = "tickers")
df_hist.dropna(inplace = True)
df_hist.to_csv("historical_data.csv")

"""

In [7]:
#using cboe 30 year treasury yield index as a proxy for risk free rate
#remember to divide by 252 (# of trading days in a year) to make daily risk free rate

#daily_risk_free_rate = yf.download("^TYX", period = "max", interval = "1d", group_by = "tickers").iloc[-len(df_hist):] / 252
#daily_risk_free_rate.to_csv("risk_free.csv")

daily_risk_free_rate = pd.read_csv("risk_free.csv", header = [0, 1], index_col = 0, parse_dates = True)
daily_risk_free_rate = daily_risk_free_rate.xs("Close", axis = 1, level = 1)["^TYX"]

In [None]:
df_hist.columns

In [14]:
#some data engineering to create the fama 3 data

#pick some energy company, entirely up to you, just pick from above
asset = None

df_fama_3 = pd.DataFrame()
df_fama_3.index = df_hist.index[1:]

#subtract R_f from R_m (the average return of our entire portfolio, which I've designated as market) - feel free to pull better data ^SNP or ^IXIC if youd like
df_fama_3["Mkt_Returns"] = df_hist.pct_change()[1:].mean(axis = 1) - daily_risk_free_rate
df_fama_3["Asset_Returns"] = df_hist[asset].pct_change()[1:].mean() - daily_risk_free_rate

#top N percent, and bottom N percent
mkt_cap_threshold = .5

top_n_mkt_cap = df_stats["Symbol"][df_stats["Market cap"] > df_stats["Market cap"].quantile(1 - mkt_cap_threshold)]
bot_n_mkt_cap = df_stats["Symbol"][df_stats["Market cap"] < df_stats["Market cap"].quantile(mkt_cap_threshold)]

df_fama_3["SMB"] = df_hist[bot_n_mkt_cap].pct_change()[1:].mean(axis = 1) - df_hist[top_n_mkt_cap].pct_change()[1:].mean(axis = 1)

#try coding HML, we will go over together (note, book-to-market = 1 / price-to-book)


In [None]:
df_fama_3

In [23]:
training_pct = .8
training_index = int(len(df_fama_3) * training_pct)

df_train = df_fama_3[:training_index]
df_test = df_fama_3[training_index:]

In [None]:
import statsmodels.formula.api as smf

model = smf.ols("Asset_Returns ~ Mkt_Returns + SMB + HML", df_train)
result = model.fit()

print(result.summary())

In [None]:
plt.plot(df_test["Asset_Returns"], label = "Test")
plt.plot(result.predict(df_test), label = "Predicted")

plt.legend()
plt.show()

plt.plot(df_train["Asset_Returns"], label = "Training")
plt.plot(result.predict(df_train), label = "Training_Pred")

plt.legend()
plt.show()