In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import statsmodels.formula.api as smf

### Data Preparation

In [2]:
stkdata = pd.read_sas('stkdata.sas7bdat', encoding='utf-8')

print(stkdata.head())

        DATE TICKER       RET
0 2011-01-31   AAPL  0.051959
1 2011-02-28   AAPL  0.040935
2 2011-03-31   AAPL -0.013314
3 2011-04-29   AAPL  0.004656
4 2011-05-31   AAPL -0.006569


In [3]:
stkdata = stkdata[stkdata['TICKER'].isin(['AAPL', 'INTC', 'MSFT'])]

In [4]:
mktdata = pd.read_sas('mktdata.sas7bdat', encoding='utf-8')

In [5]:
Regdata = pd.merge(stkdata, mktdata, on='DATE')

In [6]:
Regdata['RETRF'] = Regdata['RET'] - Regdata['RF']

### CAPM estimation

In [7]:
for TIC in ['AAPL', 'INTC', 'MSFT']:
    tempdf = Regdata[Regdata['TICKER'] == TIC]
    mdl = smf.ols('RETRF ~ MKTRF', data=tempdf).fit()
    print()
    print(TIC)
    print()
    print(mdl.summary())


AAPL

                            OLS Regression Results                            
Dep. Variable:                  RETRF   R-squared:                       0.306
Model:                            OLS   Adj. R-squared:                  0.300
Method:                 Least Squares   F-statistic:                     52.02
Date:                Mon, 12 Apr 2021   Prob (F-statistic):           5.71e-11
Time:                        02:54:01   Log-Likelihood:                 155.82
No. Observations:                 120   AIC:                            -307.6
Df Residuals:                     118   BIC:                            -302.1
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0122      0.006      1.930  

### Fama and French 3-Factor Model Estimation and F-Test against CAPM

In [8]:
for TIC in ['AAPL', 'INTC', 'MSFT']:
    tempdf = Regdata[Regdata['TICKER'] == TIC]
    mdl = smf.ols('RETRF ~ MKTRF + SMB + HML', data=tempdf).fit()
    print()
    print(TIC)
    print()
    print(mdl.summary())
    print()
    hypotheses = '(SMB = 0), (HML=0)'
    f_test = mdl.f_test(hypotheses)
    print(f_test)


AAPL

                            OLS Regression Results                            
Dep. Variable:                  RETRF   R-squared:                       0.388
Model:                            OLS   Adj. R-squared:                  0.372
Method:                 Least Squares   F-statistic:                     24.50
Date:                Mon, 12 Apr 2021   Prob (F-statistic):           2.36e-12
Time:                        02:54:01   Log-Likelihood:                 163.36
No. Observations:                 120   AIC:                            -318.7
Df Residuals:                     116   BIC:                            -307.6
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0064      0.006      1.037  

### Collinearity (VIF)

In [9]:
mdl_VIF1 = smf.ols('MKTRF ~ SMB + HML', data=mktdata).fit()
print('VIF1')
print()
print(mdl_VIF1.summary())
print()

mdl_VIF2 = smf.ols('SMB ~ MKTRF + HML', data=mktdata).fit()
print('VIF2')
print()
print(mdl_VIF2.summary())
print()

mdl_VIF3 = smf.ols('HML ~ MKTRF + SMB', data=mktdata).fit()
print('VIF3')
print()
print(mdl_VIF3.summary())
print()

VIF1

                            OLS Regression Results                            
Dep. Variable:                  MKTRF   R-squared:                       0.176
Model:                            OLS   Adj. R-squared:                  0.162
Method:                 Least Squares   F-statistic:                     12.52
Date:                Mon, 12 Apr 2021   Prob (F-statistic):           1.18e-05
Time:                        02:54:01   Log-Likelihood:                 224.47
No. Observations:                 120   AIC:                            -442.9
Df Residuals:                     117   BIC:                            -434.6
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0126      0.003      3.607   

### Constructing D Variable

In [10]:
Regdata.loc[Regdata['MKTRF']>=0, 'D'] =1
Regdata.loc[Regdata['MKTRF']< 0, 'D'] =0

In [11]:
# I picked .head(15) because it takes the fifth month of the sample to see the first D=0

Regdata.head(15)

Unnamed: 0,DATE,TICKER,RET,SMB,HML,MKTRF,RF,UMD,RETRF,D
0,2011-01-31,AAPL,0.051959,-0.0252,0.0082,0.0199,0.0001,-0.0029,0.051859,1.0
1,2011-01-31,INTC,0.020447,-0.0252,0.0082,0.0199,0.0001,-0.0029,0.020347,1.0
2,2011-01-31,MSFT,-0.006628,-0.0252,0.0082,0.0199,0.0001,-0.0029,-0.006728,1.0
3,2011-02-28,AAPL,0.040935,0.0153,0.0129,0.0349,0.0001,0.0208,0.040835,1.0
4,2011-02-28,INTC,0.00891,0.0153,0.0129,0.0349,0.0001,0.0208,0.00881,1.0
5,2011-02-28,MSFT,-0.035528,0.0153,0.0129,0.0349,0.0001,0.0208,-0.035628,1.0
6,2011-03-31,AAPL,-0.013314,0.0258,-0.0176,0.0046,0.0001,0.0352,-0.013414,1.0
7,2011-03-31,INTC,-0.060084,0.0258,-0.0176,0.0046,0.0001,0.0352,-0.060184,1.0
8,2011-03-31,MSFT,-0.044771,0.0258,-0.0176,0.0046,0.0001,0.0352,-0.044871,1.0
9,2011-04-29,AAPL,0.004656,-0.0037,-0.0243,0.029,0.0,0.0006,0.004656,1.0


### Chow Test

In [12]:
Regdata['DMKTRF'] = Regdata['MKTRF']*Regdata['D']
Regdata['DSMB'] = Regdata['SMB']*Regdata['D']
Regdata['DHML'] = Regdata['HML']*Regdata['D']

In [13]:
Regdata.head(15)

Unnamed: 0,DATE,TICKER,RET,SMB,HML,MKTRF,RF,UMD,RETRF,D,DMKTRF,DSMB,DHML
0,2011-01-31,AAPL,0.051959,-0.0252,0.0082,0.0199,0.0001,-0.0029,0.051859,1.0,0.0199,-0.0252,0.0082
1,2011-01-31,INTC,0.020447,-0.0252,0.0082,0.0199,0.0001,-0.0029,0.020347,1.0,0.0199,-0.0252,0.0082
2,2011-01-31,MSFT,-0.006628,-0.0252,0.0082,0.0199,0.0001,-0.0029,-0.006728,1.0,0.0199,-0.0252,0.0082
3,2011-02-28,AAPL,0.040935,0.0153,0.0129,0.0349,0.0001,0.0208,0.040835,1.0,0.0349,0.0153,0.0129
4,2011-02-28,INTC,0.00891,0.0153,0.0129,0.0349,0.0001,0.0208,0.00881,1.0,0.0349,0.0153,0.0129
5,2011-02-28,MSFT,-0.035528,0.0153,0.0129,0.0349,0.0001,0.0208,-0.035628,1.0,0.0349,0.0153,0.0129
6,2011-03-31,AAPL,-0.013314,0.0258,-0.0176,0.0046,0.0001,0.0352,-0.013414,1.0,0.0046,0.0258,-0.0176
7,2011-03-31,INTC,-0.060084,0.0258,-0.0176,0.0046,0.0001,0.0352,-0.060184,1.0,0.0046,0.0258,-0.0176
8,2011-03-31,MSFT,-0.044771,0.0258,-0.0176,0.0046,0.0001,0.0352,-0.044871,1.0,0.0046,0.0258,-0.0176
9,2011-04-29,AAPL,0.004656,-0.0037,-0.0243,0.029,0.0,0.0006,0.004656,1.0,0.029,-0.0037,-0.0243


In [14]:
for TIC in ['AAPL', 'INTC', 'MSFT']:
    tempdf = Regdata[Regdata['TICKER'] == TIC]
    mdl = smf.ols('RETRF ~ D + MKTRF + SMB + HML + DMKTRF + DSMB + DHML', data=tempdf).fit()
    print()
    print(TIC)
    print()
    print(mdl.summary())
    print()
    hypotheses = '(D=0), (DMKTRF=0), (DSMB = 0), (DHML=0)'
    f_test = mdl.f_test(hypotheses)
    print(f_test)


AAPL

                            OLS Regression Results                            
Dep. Variable:                  RETRF   R-squared:                       0.408
Model:                            OLS   Adj. R-squared:                  0.371
Method:                 Least Squares   F-statistic:                     11.01
Date:                Mon, 12 Apr 2021   Prob (F-statistic):           1.59e-10
Time:                        02:54:01   Log-Likelihood:                 165.33
No. Observations:                 120   AIC:                            -314.7
Df Residuals:                     112   BIC:                            -292.4
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.0058      0.016     -0.373  