In [27]:
import pandas as pd
import numpy as np
import yfinance as yf
import statsmodels.api as sm

In [28]:
RISKY_ASSET = 'AMZN'
MARKET_BENCHMARK = '^GSPC'
START_DATE = '2016-01-01'
END_DATE = '2021-02-10'


Download data from Yahoo Finance

In [29]:
df = yf.download([RISKY_ASSET, MARKET_BENCHMARK],
                 start=START_DATE,
                 end=END_DATE,
                 adjusted=True,
                 progress=False)

print(f'Downloaded {df.shape[0]} rows of data.')

Downloaded 1285 rows of data.


In [30]:
df.head()

Unnamed: 0_level_0,Adj Close,Adj Close,Close,Close,High,High,Low,Low,Open,Open,Volume,Volume
Unnamed: 0_level_1,AMZN,^GSPC,AMZN,^GSPC,AMZN,^GSPC,AMZN,^GSPC,AMZN,^GSPC,AMZN,^GSPC
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
2016-01-04,636.98999,2012.660034,636.98999,2012.660034,657.719971,2038.199951,627.51001,1989.680054,656.289978,2038.199951,9314500,4304880000
2016-01-05,633.789978,2016.709961,633.789978,2016.709961,646.909973,2021.939941,627.650024,2004.170044,646.859985,2013.780029,5822600,3706620000
2016-01-06,632.650024,1990.26001,632.650024,1990.26001,639.789978,2011.709961,620.309998,1979.050049,622.0,2011.709961,5329200,4336660000
2016-01-07,607.940002,1943.089966,607.940002,1943.089966,630.0,1985.319946,605.210022,1938.829956,621.799988,1985.319946,7074900,5076590000
2016-01-08,607.049988,1922.030029,607.049988,1922.030029,624.140015,1960.400024,606.0,1918.459961,619.659973,1945.969971,5512900,4664940000


Resample to monthly data and calculate simple returns

In [31]:
X = df['Adj Close'].rename(columns={RISKY_ASSET: 'asset', 
                                    MARKET_BENCHMARK: 'market'}) \
                    .resample('M') \
                   .last() \
                   .pct_change() \
                   .dropna()
X.head()

Unnamed: 0_level_0,asset,market
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-02-29,-0.058739,-0.004128
2016-03-31,0.074423,0.065991
2016-04-30,0.111094,0.002699
2016-05-31,0.095817,0.015325
2016-06-30,-0.00992,0.000911


Calculate beta using the covariance approach: 

In [32]:
covariance = X.cov().iloc[0,1]
benchmark_variance = X.market.var()
beta = covariance / benchmark_variance
beta

1.1358559175325675

<font color = 'red'>Note: </font>
Variance refers to the spread of a data set around its mean value, while a covariance refers to the measure of the directional relationship between two random variables.

Prepare the input and estimate CAPM as a linear regression

In [24]:
# separate target
y = X.pop('asset')

# add constant
X = sm.add_constant(X)

# define and fit the regression model 
capm_model = sm.OLS(y, X).fit()

# print results 
print(capm_model.summary())

                            OLS Regression Results                            
Dep. Variable:                  asset   R-squared:                       0.382
Model:                            OLS   Adj. R-squared:                  0.372
Method:                 Least Squares   F-statistic:                     36.53
Date:                Wed, 17 Feb 2021   Prob (F-statistic):           1.09e-07
Time:                        10:42:02   Log-Likelihood:                 83.059
No. Observations:                  61   AIC:                            -162.1
Df Residuals:                      59   BIC:                            -157.9
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0176      0.008      2.090      0.0