In [23]:
import yfinance as yf
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from scipy.linalg import toeplitz
from statsmodels.tsa.arima.model import ARIMA
from itertools import product

In [32]:
start_date = '2018-01-01'
end_date = '2024-06-30'

data = yf.download(['AAPL', 'SPY'], start=start_date, end=end_date, progress=False)['Close']

returns = data.pct_change().dropna()
returns.columns = ['AAPL_Return', 'Market_Return']

In [25]:
ff_url = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_daily_CSV.zip"
ff_data = pd.read_csv(ff_url, skiprows=3)

In [26]:
ff_data = ff_data.rename(columns={ff_data.columns[0]: "Date"})
ff_data = ff_data[ff_data['Date'].str.isnumeric()]
ff_data['Date'] = pd.to_datetime(ff_data['Date'], format='%Y%m%d')
ff_data = ff_data.set_index('Date')
ff_data = ff_data.astype(float) / 100
start_date = returns.index.min()
end_date = returns.index.max()

ff_data = ff_data.loc[start_date:end_date] 
returns = returns.merge(ff_data[['Mkt-RF', 'SMB', 'HML', 'RF']], left_index=True, right_index=True)
returns['Excess_AAPL'] = returns['AAPL_Return'] - returns['RF']
returns['Excess_Market'] = returns['Market_Return'] - returns['RF']

In [27]:
X = returns[['Mkt-RF', 'SMB', 'HML']]
X = sm.add_constant(X)
y = returns['Excess_AAPL']

ff_model = sm.OLS(y, X).fit()
print(ff_model.summary())

                            OLS Regression Results                            
Dep. Variable:            Excess_AAPL   R-squared:                       0.660
Model:                            OLS   Adj. R-squared:                  0.660
Method:                 Least Squares   F-statistic:                     1055.
Date:                Fri, 07 Feb 2025   Prob (F-statistic):               0.00
Time:                        17:49:51   Log-Likelihood:                 4979.0
No. Observations:                1632   AIC:                            -9950.
Df Residuals:                    1628   BIC:                            -9928.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0005      0.000      1.652      0.0

In [28]:
# Estimation of Sigma

residuals = pd.Series(ff_model.resid)

def find_best_arima(data, p_range, d_range, q_range):
    best_aic = np.inf
    best_order = None
    best_model = None

    for order in product(p_range, d_range, q_range):
        try:
            model = ARIMA(data, order=order).fit()
            if model.aic < best_aic:
                best_aic = model.aic
                best_order = order
                best_model = model
        except:
            continue 

    return best_model, best_order

p_range = range(0, 4)
d_range = range(0, 2)
q_range = range(0, 4)

warnings.filterwarnings("ignore", message=".*A date index has been provided.*")
warnings.filterwarnings("ignore", category=UserWarning, message=".*Non-stationary starting autoregressive parameters.*")
warnings.filterwarnings("ignore", category=UserWarning, message=".*Non-invertible starting MA parameters found.*")
warnings.filterwarnings("ignore", category=UserWarning, message=".*Maximum Likelihood optimization failed to converge.*")

model, best_order = find_best_arima(residuals, p_range, d_range, q_range)

In [29]:
# Extract parameters
sigma2 = model.scale 
ar_params = model.arparams if model.arparams.size > 0 else np.array([])
ma_params = model.maparams if model.maparams.size > 0 else np.array([])

# AR and MA polynomials
ar_poly = np.r_[1, -ar_params] if ar_params.size else np.array([1])
ma_poly = np.r_[1, ma_params] if ma_params.size else np.array([1])

# Manual autocovariance computation
def compute_acovf(ar, ma, sigma2, lags):
    acovf = np.zeros(lags)
    acovf[0] = sigma2 * (1 + np.sum(ma**2)) / (1 - np.sum(ar**2)) if np.sum(ar**2) < 1 else sigma2

    for lag in range(1, lags):
       
        ar_slice = ar[:min(lag, len(ar))]
        acovf_slice = acovf[:lag][::-1]

        
        if len(ar_slice) != len(acovf_slice):
            diff = abs(len(ar_slice) - len(acovf_slice))
            if len(ar_slice) < len(acovf_slice):
                ar_slice = np.pad(ar_slice, (0, diff))
            else:
                acovf_slice = np.pad(acovf_slice, (0, diff))

        acovf[lag] = np.dot(ar_slice, acovf_slice)

    return acovf


acovf = compute_acovf(ar_params, ma_params, sigma2, len(residuals))


cov_matrix = toeplitz(acovf)


In [30]:
#GLS

ff_gls = sm.GLS(y, X, sigma=cov_matrix).fit()
print(ff_gls.summary())

                            GLS Regression Results                            
Dep. Variable:            Excess_AAPL   R-squared:                       0.660
Model:                            GLS   Adj. R-squared:                  0.660
Method:                 Least Squares   F-statistic:                     1055.
Date:                Fri, 07 Feb 2025   Prob (F-statistic):               0.00
Time:                        17:50:05   Log-Likelihood:                 4979.0
No. Observations:                1632   AIC:                            -9950.
Df Residuals:                    1628   BIC:                            -9928.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0005      0.000      1.652      0.0