In [1]:
import numpy as np
import pandas as pd
import quandl
import matplotlib.pyplot as plt
import statsmodels.formula.api as sm

In [2]:
spy_table = quandl.get('LSE/SPY5')
amzn_table = quandl.get('WIKI/AMZN')
ebay_table = quandl.get('WIKI/EBAY')
wal_table = quandl.get('WIKI/WMT')
aapl_table = quandl.get('WIKI/AAPL')

In [3]:
spy = spy_table.loc['2016',['Last Close']]
amzn = amzn_table.loc['2016',['Close']]
ebay = ebay_table.loc['2016',['Close']]
wal = wal_table.loc['2016',['Close']]
aapl = aapl_table.loc['2016',['Close']]
spy_log = np.log(spy['Last Close']).diff().dropna()
amzn_log = np.log(amzn.Close).diff().dropna()
ebay_log = np.log(ebay.Close).diff().dropna()
wal_log = np.log(wal.Close).diff().dropna()
aapl_log = np.log(aapl.Close).diff().dropna()
df = pd.concat([spy_log,amzn_log,ebay_log,wal_log,aapl_log],axis = 1).dropna()
df.columns = ['spy','amzn','ebay','wal','aapl']
df.tail()

Unnamed: 0_level_0,spy,amzn,ebay,wal,aapl
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-12-22,-0.004462,-0.005543,0.006112,-0.023434,-0.0066
2016-12-23,0.001372,-0.007531,0.008427,-0.000719,0.001976
2016-12-28,0.000928,0.000946,-0.007635,-0.005611,-0.004273
2016-12-29,-0.005671,-0.009081,-0.001,-0.000722,-0.000257
2016-12-30,0.002086,-0.020172,-0.00972,-0.002023,-0.007826


In [10]:
model = sm.ols(formula = 'amzn ~ spy+ebay+wal+aapl',data = df).fit()
print model.summary()

                            OLS Regression Results                            
Dep. Variable:                   amzn   R-squared:                       0.151
Model:                            OLS   Adj. R-squared:                  0.136
Method:                 Least Squares   F-statistic:                     10.23
Date:                Mon, 31 Jul 2017   Prob (F-statistic):           1.21e-07
Time:                        11:02:51   Log-Likelihood:                 622.99
No. Observations:                 235   AIC:                            -1236.
Df Residuals:                     230   BIC:                            -1219.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept  -9.619e-06      0.001     -0.009      0.9

In [11]:
simple = sm.ols(formula = 'amzn ~ spy',data = df).fit()
print simple.summary()

                            OLS Regression Results                            
Dep. Variable:                   amzn   R-squared:                       0.044
Model:                            OLS   Adj. R-squared:                  0.040
Method:                 Least Squares   F-statistic:                     10.63
Date:                Mon, 31 Jul 2017   Prob (F-statistic):            0.00128
Time:                        11:02:52   Log-Likelihood:                 608.99
No. Observations:                 235   AIC:                            -1214.
Df Residuals:                     233   BIC:                            -1207.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept   1.234e-05      0.001      0.010      0.9

In [12]:
fama_table = quandl.get('KFRENCH/FACTORS5_D')

In [13]:
fama = fama_table['2016']
fama = fama.rename(columns = {'Mkt-RF':'MKT'})
fama = fama.apply(lambda x: x/100)
fama_df = pd.concat([fama,amzn_log],axis = 1)

In [14]:
fama_model = sm.ols(formula = 'Close~MKT+SMB+HML+RMW+CMA',data = fama_df).fit()
print fama_model.summary()

                            OLS Regression Results                            
Dep. Variable:                  Close   R-squared:                       0.387
Model:                            OLS   Adj. R-squared:                  0.375
Method:                 Least Squares   F-statistic:                     30.97
Date:                Mon, 31 Jul 2017   Prob (F-statistic):           2.21e-24
Time:                        11:03:01   Log-Likelihood:                 709.59
No. Observations:                 251   AIC:                            -1407.
Df Residuals:                     245   BIC:                            -1386.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0010      0.001      1.028      0.3

In [15]:
result = pd.DataFrame({'simple regression':simple.predict(),'fama_french':fama_model.predict(),'sample':df.amzn},index = df.index)
plt.figure(figsize = (15,7.5))
plt.plot(result['2016-7':'2016-9'].index,result.loc['2016-7':'2016-9','simple regression'])
plt.plot(result['2016-7':'2016-9'].index,result.loc['2016-7':'2016-9','fama_french'])
plt.plot(result['2016-7':'2016-9'].index,result.loc['2016-7':'2016-9','sample'])
plt.legend()
plt.show()

ValueError: Shape of passed values is (3, 251), indices imply (3, 235)

In [None]:
plt.figure()
simple.resid.plot.density()
plt.show()

In [None]:
print 'residual mean: ', np.mean(fama_model.resid)
print 'residual variance: ', np.var(fama_model.resid)

In [None]:
plt.figure(figsize = (20,10))
plt.scatter(df.spy,simple.resid)
plt.axhline(0.05,color = 'r')
plt.axhline(-0.05,color = 'r')
plt.axhline(0,color = 'black')
plt.xlabel('x value')
plt.ylabel('residual')
plt.show()

In [None]:
from statsmodels.stats import diagnostic as dia

In [None]:
het = dia.het_breuschpagan(fama_model.resid,fama_df[['MKT','SMB','HML','RMW','CMA']][1:])
print 'p-value of Heteroskedasticity: ', het[-1]

In [None]:
dia.het_breuschpagan(simple.resid,pd.DataFrame(df.spy))

In [None]:
simple.resid.shape

In [25]:
df.spy.shape

(251,)

In [6]:
# fama-french factor explore##
fama_table = quandl.get('KFRENCH/FACTORS5_M')
fama = fama_table['2008':'2016']

AuthenticationError: (Status 400) (Quandl Error QEAx01) We could not recognize your API key: _fgkxjSbt5389zGt4crC. Please check your API key and try again. You can find your API key under your account settings.

In [7]:
fama = fama.apply(lambda x: x/100)
fama.tail()

NameError: name 'fama' is not defined

In [98]:
amzn = amzn_table.loc['2008':'2016',['Close']]
amzn = amzn.resample('M').agg(lambda x: x[-1])
amzn_log = np.log(amzn.Close).diff().dropna()
amzn_log.tail()

Date
2016-08-31    0.013548
2016-09-30    0.084895
2016-10-31   -0.058389
2016-11-30   -0.050972
2016-12-31   -0.000933
Freq: M, Name: Close, dtype: float64

In [100]:
new_df = pd.concat([fama,amzn_log],axis = 1).dropna()
new_df = new_df.rename(columns = {'Mkt-RF':'MKT'})
new_df.tail()

Unnamed: 0_level_0,MKT,SMB,HML,RMW,CMA,RF,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-08-31,0.005,0.0152,0.0318,-0.0124,-0.0056,0.0002,0.013548
2016-09-30,0.0025,0.0172,-0.0134,-0.0185,-0.0005,0.0002,0.084895
2016-10-31,-0.0202,-0.0397,0.0415,0.0136,0.0022,0.0002,-0.058389
2016-11-30,0.0486,0.0703,0.0844,-0.0068,0.0383,0.0001,-0.050972
2016-12-31,0.0182,0.0032,0.0352,0.0095,-0.0021,0.0003,-0.000933


In [128]:
m = sm.ols(formula = 'Close ~ MKT + SMB + HML',data = new_df).fit()
print m.summary()

                            OLS Regression Results                            
Dep. Variable:                  Close   R-squared:                       0.022
Model:                            OLS   Adj. R-squared:                  0.012
Method:                 Least Squares   F-statistic:                     2.333
Date:                Wed, 19 Jul 2017   Prob (F-statistic):              0.130
Time:                        15:40:51   Log-Likelihood:                 102.72
No. Observations:                 107   AIC:                            -201.4
Df Residuals:                     105   BIC:                            -196.1
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0198      0.009      2.184      0.0