In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import qgrid
import statsmodels.api as sm
import warnings
from scipy import stats
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('./merged_monthlyInfo.csv', index_col=0)
df_beta = pd.read_csv('./beta.csv', index_col=0)

In [3]:
df_beta = df_beta.dropna()

## Find Stock with Min/Max Beta

Now we have the beta for each stock in each rolling window, then we search for where the minimum / maximum beta occurs

In [4]:
min_betas = df_beta.describe().loc['min']
max_betas = df_beta.describe().loc['max']

In [5]:
min_betas.min()
min_betas.argmin()
df_beta[min_betas.argmin()].argmin()

-0.0638275048724565

'600594'

'2015-01'

In [6]:
max_betas.max()
max_betas.argmax()
df_beta[max_betas.argmin()].argmax()

2.1775508197632782

'600030'

'2018-09'

Type | Stock | Date | Beta
--|--|--|--
Min Beta| 600594 | 2015-01 | -0.0638275048724565
Max Beta| 600030 | 2018-09 | 2.1775508197632782

## Test CAPM

We obtained betas of each stocks, in the format of:

In [7]:
df_beta.head()

Unnamed: 0,600000,600004,600007,600008,600009,600011,600012,600015,600016,600017,...,601601,601628,601666,601699,601808,601857,601939,601988,601991,601998
2013-01,1.107955,0.765357,0.814966,0.967411,0.86114,0.695644,0.717668,0.837064,0.899479,0.835758,...,1.169431,0.950304,1.259531,1.594732,0.922375,0.63778,0.77037,0.607325,0.74262,0.848265
2013-02,1.120298,0.765737,0.795829,0.948405,0.864654,0.655563,0.707989,0.87393,0.910414,0.861732,...,1.130272,0.892784,1.299521,1.602488,0.921121,0.616097,0.751736,0.588556,0.722824,0.857944
2013-03,1.123275,0.766405,0.795683,0.946535,0.865976,0.655074,0.708128,0.877575,0.907275,0.862143,...,1.132107,0.893426,1.302042,1.606058,0.919197,0.619001,0.752119,0.588117,0.722779,0.858126
2013-04,1.140774,0.739266,0.784726,0.88321,0.831195,0.599534,0.695682,0.872028,0.887897,0.858937,...,1.117016,0.859152,1.377841,1.669429,0.859517,0.577462,0.7499,0.59216,0.699333,0.840491
2013-05,1.129066,0.727859,0.792882,0.904688,0.842592,0.598478,0.695042,0.872416,0.886161,0.856913,...,1.10972,0.836099,1.380144,1.657459,0.848226,0.575106,0.731979,0.59025,0.69468,0.845269


And we use Fama-Macbeth Regression to test CAPM. 

First step is to do a Periodic Cross-Sectional Regression:

$$ r_{it}-r_{ft} = \delta_0+\delta_1\beta_{it} $$

Then compute the mean and standard deviation of regressed $\delta_0$, $\delta_1$:

$$ \hat \delta_0=\frac{1}{T}\sum_{t=1}^T \hat \delta_{0t}$$

$$ \hat \delta_1=\frac{1}{T}\sum_{t=1}^T \hat \delta_{1t}$$

Then we use t-test to test whether CAPM holds, that is to say, whether $$ \hat \delta_{0}\ does\ not\ significantly\ \ne 0 $$

In [8]:
Months = list(df_beta.index)

In [9]:
delta0_hist = []
delta1_hist = []

for month in Months:
    monthly_df = df[df['Month']==month]
    excess_return = monthly_df['ri']-monthly_df['rf']
    betas = df_beta.loc[month]
    y = excess_return.values
    X = betas.values
    X = sm.add_constant(X)
    model = sm.OLS(y, X)
    results = model.fit()
    delta0 = results.params[0]
    delta1 = results.params[1]
    delta0_hist.append(delta0)
    delta1_hist.append(delta1)

In [10]:
delta0_hist = np.array(delta0_hist)
delta1_hist = np.array(delta1_hist)

In [11]:
delta0_hist.mean()
delta0_hist.std()

0.015182963623538282

0.07706224576729304

In [12]:
delta1_hist.mean()
delta1_hist.std()

-0.008369189075356878

0.04809331279986916

The mean of $\delta_{0}$ is 0.015182963623538282


The standard deviation of $\delta_{0}$ is 
0.07706224576729304


The mean of $\delta_{1}$ is -0.008369189075356878


The standard deviation of $\delta_{1}$ is 0.04809331279986916

In [13]:
stats.ttest_1samp(delta0_hist, 0)

Ttest_1sampResult(statistic=1.660137518947314, pvalue=0.10129746647870715)

T test result of $\delta_0$ (the constant term):

t statistic | pvalue
--|--
1.660137518947314 | 0.10129746647870715

The p-value is 10.13%, which means we fail to reject the null hypothesis (H0: $\delta_{0}$ $\ne$ 0) at 5% significance level. That is to say, $$ \hat \delta_{0}\ does\ not\ significantly\ \ne 0 $$

In [20]:
stats.ttest_1samp(delta1_hist, 0)

Ttest_1sampResult(statistic=-1.466316968482576, pvalue=0.14697719583754815)

T test result of $\delta_1$ (the constant term):

t statistic | pvalue
--|--
-1.466316968482576 | 0.14697719583754815

The p-value is 14.70%, which means we fail to reject the null hypothesis (H0: $\delta_{1}$ $\ne$ 0) at 5% significance level. That is to say, $$ \hat \delta_{1}\ does\ not\ significantly\ \ne 0 $$