In [1]:
pip install --upgrade yfinance==0.2.54



In [2]:
pip install arch

Collecting arch
  Downloading arch-7.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading arch-7.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (985 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m985.3/985.3 kB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: arch
Successfully installed arch-7.2.0


In [3]:
pip install scikit-learn



In [4]:
pip install pandas_datareader



In [5]:
import yfinance as yf
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
import numpy as np
import statsmodels.api as sm  # for CAPM and Fama-French regressions
import pandas_datareader
import matplotlib.pyplot as plt
from arch import arch_model
import itertools

In [None]:
# 1. Load Fama–French Factor Data

ff = pd.read_csv('F-F_Research_Data_Factors.CSV', skiprows=4, header=0, names=['Date','Mkt-RF','SMB','HML','RF']) # Added names and header, skiprows to 4
# Date is in yyyymmdd format, convert it:
ff['Date'] = pd.to_datetime(ff['Date'], format='%Y%m')
ff = ff.set_index('Date')
# Convert factor returns from percentages to decimals
ff = ff.astype(float) / 100

In [None]:
#2. Forecast Factors using optimal GARCH(1,1)
#(Please multiply your ff (Farma-French) factors by 100 so it prevents a scaling error from GARCH predicitons)

#2.1 You should do some data cleaning first on the factors ( dropna() )

#2.2 Convert everything to log returns, try this format np.log(1 + "your cleaned factors data")

#2.3 You will need to use the log returns to find the best GARCH parameters

#These are the parameters you will be working with in the GARCH model
#mean_models = ['Constant', 'AR']
#vol_models = ['GARCH', 'EGARCH']
#dists = ['normal', 't']

#(You should create a function that loops through these paramters given your log returns and find the best combination of mean, vol and dists)
# Check tutorial4 solution for this loop

#2.4Forecast one period ahead (next month) and rescale them

#(Note that the forecasted mean is on a 100x scale and variance on a 100^2 scale), so convert back to simple returns you should do something like:
# next_day_mean_rescaled = np.exp(next_day_mean / 100) - 1, next_day_variance_rescaled = np.exp(next_day_variance / 100**2) - 1
# next_day_mean and next_day_variance are the predicitons from your best fitted GAUCh model with the log reutrns


In [None]:
# 3. Download Stock Data and Run Regressions

tickers = ["AAPL", "MSFT", "GOOG", "BRK-B", "JPM", "JNJ", "V", "PG"]
prices = yf.download(tickers, start='2009-12-31', end='2024-12-31', interval='1mo', auto_adjust=True)['Close']

returns = prices.pct_change().dropna()

# Align dates of stock returns and factor data
common_dates = returns.index.intersection(ff.index)
returns = returns.loc[common_dates]
ff = ff.loc[common_dates]

In [None]:
# 4. Run OLS regressions to estimate factor loadings and obtain residual variances.

# Now you need to combine the tickers data with the ff (Farma-French) data

# 4.1 To combine the data, create a pd.DataFrame which take the returns and join them with the ff factors

#df = pd.DataFrame()
#df.join(ff[['Mkt-RF', 'SMB', 'HML', 'RF']])

# 4.2 Compute excess returns: r - RF

# 4.3 We want to define variables for Regression. Below is the equation we are working with, our goal is the find the
# alpha, beta 1,2 and 3

# Note, to find the alpha and betas, use sm.OLS(y, X).fit(), which is a ordinary least squares, where y and X are matricies
 #(you should define them as matricies like: X = df[['A', 'B', 'C']]) and y = df['D']
# which can be multiplied out to obtian the second equation (present in the lecture)

# 4.4 print out the result of the alpha and betas

\begin{equation}
\begin{aligned}
y &= \begin{pmatrix} r_1 - RF_1 \\ r_2 - RF_2 \\ \vdots \\ r_T - RF_T \end{pmatrix}, \quad
X = \begin{pmatrix} 1 & (MktRF)_1 & (SMB)_1 & (HML)_1 \\ 1 & (MktRF)_2 & (SMB)_2 & (HML)_2 \\ \vdots & \vdots & \vdots & \vdots \\ 1 & (MktRF)_T & (SMB)_T & (HML)_T \end{pmatrix}, \\[1ex]
\min_{\beta}\ & \| y - X\beta \|^2, \quad \hat{\beta} = (X^T X)^{-1} X^T y.
\end{aligned}
\tag{1}
\end{equation}

\begin{equation}
r_t - RF_t = \alpha + \beta_1 (MktRF)_t + \beta_2 (SMB)_t + \beta_3 (HML)_t.
\tag{2}
\end{equation}

In [None]:
# 5. Forecast Next Day Expected Returns (r_{i,t+1}), you keep the same alpha and betas and the last Risk-Free value and use the forecasted ff (Farma-French) factors
# to make a predicition on the next day time-step return r at t+1. (Note that ff factors are forecasted for a month, but use that for the daily forecast as well)
# Convert forecasted factor mean from 100x back to original scale by dividing by 100.

\begin{equation}
r_{t+1} - RF_{t} = \alpha + \beta_1 (MktRF)_{t+1} + \beta_2 (SMB)_{t+1} + \beta_3 (HML)_{t+1}.
\tag{3}
\end{equation}

In [None]:
# 6. Calculate Total Forecast Error Variance for r_{i,t+1}
# Total variance = sum_j (beta_{ij}^2 * (sigma_{j, forecast}^2 / 100^2)) + sigma^2_reg,
# where sigma^2_reg is the mean squared error (residual variance) from the regression
# Convert forecasted variance from 100x scale back to original scale: divide by 100^2