# 资本资产定价模型 Capital Asset Pricing Model

## Alpha and Beta

在横截面上，所有股票预期收益率的变化只来源于股票贝塔的变化。

 - 第一类方式：检验个股贝塔预测未来的股票收益率

 - 第二类方式：检验其他能够预测个股未来回报率指标的能力

CAPM模型另一个实证的检验标准是：横截面上所有股票的变化能够完全被贝塔解释掉，所以在考虑了贝塔的影响之后，没有额外的回报率，即没有α。

## 使用贝塔构造投资组合

t 月的高贝塔股票在t+1月能否获得更高的回报率，相比于低贝塔股票

In [1]:
import numpy as np # 数据处理最重要的模块
import pandas as pd # 数据处理最重要的模块
import matplotlib.pyplot as plt  # 画图模块
import scipy.stats as stats # 统计模块
import scipy
from datetime import datetime # 时间模块
from IPython.core.interactiveshell import InteractiveShell # jupyter运行输出的模块
import statsmodels.formula.api as smf  # OLS regression

#输出矢量图 渲染矢量图 是一个魔法函数（Magic Functions）内嵌绘图
%matplotlib inline 
%config InlineBackend.figure_format = 'svg'

#显示每一个运行结果
InteractiveShell.ast_node_interactivity = 'all'

#设置行不限制数量
#pd.set_option('display.max_rows',None)

#设置列不限制数量
pd.set_option('display.max_columns', None)

In [2]:
cross = pd.read_csv('C:/Users/hp/Desktop/Python/Python-5/cross_section.csv')
cross['month'] = pd.to_datetime(cross['month'],format='%Y-%m-%d')
cross['to_v'] = pd.to_numeric(cross['to_v'])
cross['floatingvalue'] = pd.to_numeric(cross['floatingvalue'])
cross = cross.dropna(subset=['ret','next_ret'])
cross

Unnamed: 0,Stkcd,month,floatingvalue,totalvalue,Return,rfmonth,Rank,Freq,ret,next_ret,sizef,sizet,ew,Clsprc,beta_6m,N6m,beta_12m,N12m,bm,am,ep,to_v,fv,max1
0,1,1991-05-01,1.016010e+09,1.859497e+09,-0.122253,0.006092,2,24,-0.128345,-0.119551,20.739149,21.343572,1,38.34,4.192309,44.0,4.192309,44.0,,,,0.007087,0.000804,-0.004883
1,1,1991-06-01,9.007350e+08,1.648521e+09,-0.113459,0.006092,3,23,-0.119551,-0.137013,20.618722,21.223144,1,33.99,0.246808,67.0,0.246808,67.0,,,,0.001155,0.000895,0.004956
2,1,1991-07-01,7.828100e+08,1.432695e+09,-0.130921,0.006092,4,16,-0.137013,-0.417680,20.478401,21.082823,1,29.54,0.165424,83.0,0.165424,83.0,,,,0.000230,0.001596,-0.004853
3,1,1991-08-01,6.748338e+08,1.346275e+09,-0.411588,0.006092,5,15,-0.417680,-0.039425,20.329977,21.020607,1,15.00,1.506699,98.0,1.506699,98.0,,,,0.072087,0.240913,0.152896
4,1,1991-09-01,6.523394e+08,1.301399e+09,-0.033333,0.006092,6,24,-0.039425,0.849080,20.296075,20.986706,1,14.50,1.502630,122.0,1.502630,122.0,,,,0.160940,0.005872,0.032258
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
604788,605589,2021-11-01,3.192953e+09,3.051846e+10,0.319156,0.001241,4,22,0.317915,-0.028913,21.884212,24.141598,1,39.39,2.063907,73.0,2.063907,73.0,0.259870,0.429465,,1.812936,0.025807,0.077155
604790,605598,2021-10-01,5.874312e+08,2.349311e+09,-0.093937,0.001241,2,16,-0.095178,0.076700,20.191270,21.577388,1,13.60,1.221250,23.0,1.221250,23.0,0.612103,0.755971,,1.345783,0.007366,0.040237
604791,605598,2021-11-01,6.332162e+08,2.532419e+09,0.077941,0.001241,3,22,0.076700,0.085389,20.266323,21.652441,1,14.66,1.053620,45.0,1.053620,45.0,0.567844,0.701310,,1.263335,0.005363,0.031528
604793,605599,2021-10-01,9.076669e+08,9.076667e+09,-0.080378,0.001241,2,16,-0.081619,0.060456,20.626388,22.928973,1,11.67,1.339638,29.0,1.339638,29.0,0.342676,0.529242,,0.970121,0.009502,0.034691


In [3]:
cross_beta = cross[cross['N12m'] >= 120].copy()

fenweishu = pd.DataFrame(
    cross_beta.groupby(['month'])['beta_12m'].quantile([0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]))
fenweishu = fenweishu.reset_index()
fenweishu = fenweishu.pivot_table(index='month',columns='level_1',values='beta_12m')
fenweishu.columns = ['one','two','three','four','five','six','seven','eight','nine']
fenweishu

Unnamed: 0_level_0,one,two,three,four,five,six,seven,eight,nine
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1991-06-01,0.629680,0.654735,0.679790,0.704846,0.729901,198.093343,395.456785,592.820227,790.183670
1991-07-01,0.350270,0.380092,0.398394,0.405176,0.411957,0.563721,0.715485,80.784079,240.769503
1991-08-01,0.075724,0.227887,0.350608,0.363817,0.402592,0.466887,0.569418,0.608483,99.488554
1991-09-01,0.119252,0.279118,0.327724,0.377070,0.449710,0.514359,0.833726,1.996685,32.542532
1991-10-01,0.121964,0.133576,0.145975,0.166748,0.188384,0.454713,0.787636,1.210933,1.439716
...,...,...,...,...,...,...,...,...,...
2021-07-01,0.420992,0.524887,0.607201,0.682744,0.767242,0.855596,0.951368,1.072957,1.265692
2021-08-01,0.414310,0.515808,0.595169,0.668179,0.749922,0.836589,0.928121,1.042918,1.247703
2021-09-01,0.427101,0.522362,0.600581,0.674865,0.752911,0.837922,0.926860,1.039764,1.248725
2021-10-01,0.406947,0.504075,0.584442,0.657301,0.732744,0.814718,0.916062,1.027471,1.241403


In [4]:
portfolio = pd.merge(cross_beta,fenweishu,on='month')
portfolio

Unnamed: 0,Stkcd,month,floatingvalue,totalvalue,Return,rfmonth,Rank,Freq,ret,next_ret,sizef,sizet,ew,Clsprc,beta_6m,N6m,beta_12m,N12m,bm,am,ep,to_v,fv,max1,one,two,three,four,five,six,seven,eight,nine
0,1,1991-09-01,652339350.0,1.301399e+09,-0.033333,0.006092,6,24,-0.039425,0.849080,20.296075,20.986706,1,14.50,1.502630,122.0,1.502630,122.0,,,,0.160940,0.005872,0.032258,0.119252,0.279118,0.327724,0.377070,0.449710,0.514359,0.833726,1.996685,32.542532
1,2,1991-09-01,265635410.0,3.617614e+08,-0.253968,0.006092,9,26,-0.260060,2.589653,19.397635,19.706495,1,4.70,0.947756,129.0,0.038375,165.0,,,,0.077189,0.080263,0.042373,0.119252,0.279118,0.327724,0.377070,0.449710,0.514359,0.833726,1.996685,32.542532
2,4,1991-09-01,19750000.0,4.937500e+07,-0.024691,0.006092,9,26,-0.030783,1.323022,16.798664,17.714955,1,3.95,0.411523,116.0,0.547052,153.0,,,,0.146100,0.008545,0.040541,0.119252,0.279118,0.327724,0.377070,0.449710,0.514359,0.833726,1.996685,32.542532
3,600601,1991-09-01,55419000.0,6.090000e+07,-0.004902,0.006092,10,21,-0.010994,0.176174,17.830433,17.924744,1,60.90,0.103693,126.0,0.316838,176.0,0.174742,0.332603,0.008368,0.561331,0.001873,0.009804,0.119252,0.279118,0.327724,0.377070,0.449710,0.514359,0.833726,1.996685,32.542532
4,600602,1991-09-01,279133500.0,1.140000e+09,-0.045661,0.006092,10,21,-0.051753,0.214137,19.447201,20.851659,1,568.50,0.107799,130.0,0.332390,199.0,0.261355,1.028144,0.020343,0.358617,0.002046,0.010072,0.119252,0.279118,0.327724,0.377070,0.449710,0.514359,0.833726,1.996685,32.542532
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
542362,600653,1991-07-01,22225000.0,3.175000e+07,0.254941,0.006092,7,23,0.248849,0.214380,16.916728,17.273403,1,63.50,0.039913,125.0,0.791367,141.0,,,0.003163,0.398457,0.002180,0.010889,0.350270,0.380092,0.398394,0.405176,0.411957,0.563721,0.715485,80.784079,240.769503
542363,600654,1991-07-01,10840200.0,1.084536e+08,0.231981,0.006092,8,23,0.225889,0.238774,16.198772,18.501833,1,516.20,0.059126,118.0,400.754927,145.0,0.232901,1.052468,0.003295,0.475095,0.002210,0.010092,0.350270,0.380092,0.398394,0.405176,0.411957,0.563721,0.715485,80.784079,240.769503
542364,600602,1991-06-01,240786400.0,9.808000e+08,0.196098,0.006092,7,20,0.190006,-0.029950,19.299421,20.703879,1,490.40,0.272131,125.0,0.729901,133.0,0.302978,1.191884,0.023583,0.143191,0.001931,0.010110,0.629680,0.654735,0.679790,0.704846,0.729901,198.093343,395.456785,592.820227,790.183670
542365,600651,1991-06-01,15681600.0,1.568160e+07,0.196375,0.006092,7,20,0.190283,0.082292,16.567999,16.567999,1,475.20,0.079023,120.0,0.604625,128.0,0.137145,0.372341,0.031155,0.206848,0.001930,0.010118,0.629680,0.654735,0.679790,0.704846,0.729901,198.093343,395.456785,592.820227,790.183670


In [5]:
portfolio['sort'] = np.where(
    portfolio['beta_12m'] <= portfolio['one'], 'P1',
    np.where(
        portfolio['beta_12m'] <= portfolio['two'], 'P2',
        np.where(
            portfolio['beta_12m'] <= portfolio['three'], 'P3',
            np.where(
                portfolio['beta_12m'] <= portfolio['four'], 'P4',
                np.where(
                    portfolio['beta_12m'] <= portfolio['five'], 'P5',
                    np.where(
                        portfolio['beta_12m'] <= portfolio['six'], 'P6',
                        np.where(
                            portfolio['beta_12m'] <= portfolio['seven'], 'P7',
                            np.where(
                                portfolio['beta_12m'] <= portfolio['eight'], 'P8',
                                np.where(
                                    portfolio['beta_12m'] <= portfolio['nine'],
                                    'P9', 'Pmax')))))))))

In [6]:
portfolio_beta =  pd.DataFrame(
    portfolio.groupby(['month','sort']).apply(lambda x: np.average(x['next_ret'],weights = x['floatingvalue'])))
portfolio_beta

Unnamed: 0_level_0,Unnamed: 1_level_0,0
month,sort,Unnamed: 2_level_1
1991-06-01,P1,0.082292
1991-06-01,P5,-0.029950
1991-06-01,Pmax,0.225889
1991-07-01,P1,-0.047111
1991-07-01,P3,0.237810
...,...,...
2021-11-01,P6,0.028319
2021-11-01,P7,0.010714
2021-11-01,P8,0.024583
2021-11-01,P9,0.022001


In [7]:
portfolio_beta = portfolio_beta.reset_index()
portfolio_beta.columns = ['month', 'sort', 'p']
portfolio_beta

Unnamed: 0,month,sort,p
0,1991-06-01,P1,0.082292
1,1991-06-01,P5,-0.029950
2,1991-06-01,Pmax,0.225889
3,1991-07-01,P1,-0.047111
4,1991-07-01,P3,0.237810
...,...,...,...
3641,2021-11-01,P6,0.028319
3642,2021-11-01,P7,0.010714
3643,2021-11-01,P8,0.024583
3644,2021-11-01,P9,0.022001


In [8]:
portfolio_beta = portfolio_beta.pivot_table(index='month',
                                            columns='sort',
                                            values='p')
portfolio_beta['My_portfolio'] = portfolio_beta['Pmax'] - portfolio_beta['P1']
portfolio_beta

sort,P1,P2,P3,P4,P5,P6,P7,P8,P9,Pmax,My_portfolio
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1991-06-01,0.082292,,,,-0.029950,,,,,0.225889,0.143597
1991-07-01,-0.047111,,0.237810,,0.238320,,,0.214380,,0.238774,0.285885
1991-08-01,-0.260060,0.226310,-0.010994,,-0.051753,0.190075,,0.190037,-0.030783,-0.074564,0.185496
1991-09-01,2.589653,0.202068,0.176174,0.214137,0.226707,0.190240,1.323022,0.849080,0.156142,0.118699,-2.470954
1991-10-01,0.167193,0.142519,0.130003,0.189958,0.225559,0.167821,-0.000175,0.143794,0.016213,0.172135,0.004942
...,...,...,...,...,...,...,...,...,...,...,...
2021-07-01,0.043058,0.082310,0.094548,0.072904,0.060254,0.053524,0.064011,0.030013,0.011501,0.013069,-0.029988
2021-08-01,0.066407,0.057901,0.005956,-0.005510,-0.025323,-0.025050,-0.017079,-0.009261,-0.011851,-0.010869,-0.077275
2021-09-01,-0.034691,-0.059139,-0.050441,-0.033865,-0.011551,-0.024197,-0.027590,0.013785,0.008904,0.034940,0.069631
2021-10-01,-0.022601,0.012542,0.023425,0.014095,0.068158,0.035858,0.053832,0.040056,0.022319,0.006069,0.028669


In [9]:
portfolio_beta = portfolio_beta['1995-01':'2021-11'].copy()
portfolio_beta['month'] = pd.date_range(start='1995-01', end='2021-12', freq='m')
portfolio_beta.set_index('month', inplace = True)
portfolio_beta['My_portfolio'] = portfolio_beta['My_portfolio'].shift(1)
portfolio_beta['P1'] = portfolio_beta['P1'].shift(1)
portfolio_beta['Pmax'] = portfolio_beta['Pmax'].shift(1)
portfolio_beta

sort,P1,P2,P3,P4,P5,P6,P7,P8,P9,Pmax,My_portfolio
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1995-01-31,,-0.003604,-0.017382,-0.025049,-0.015368,-0.032056,-0.029097,-0.041449,-0.036201,,
1995-02-28,-0.015230,0.083273,0.105131,0.169104,0.130281,0.164102,0.117882,0.158471,0.161724,-0.029482,-0.014252
1995-03-31,0.059054,-0.096394,-0.118613,-0.112506,-0.118591,-0.131330,-0.111892,-0.129553,-0.102839,0.175586,0.116533
1995-04-30,-0.108752,0.097845,0.118312,0.155232,0.171149,0.230445,0.244515,0.232863,0.201124,-0.132636,-0.023884
1995-05-31,0.058638,-0.094559,-0.064943,-0.103748,-0.087385,-0.095071,-0.085276,-0.108471,-0.124543,0.261958,0.203320
...,...,...,...,...,...,...,...,...,...,...,...
2021-07-31,-0.060247,0.082310,0.094548,0.072904,0.060254,0.053524,0.064011,0.030013,0.011501,-0.007484,0.052763
2021-08-31,0.043058,0.057901,0.005956,-0.005510,-0.025323,-0.025050,-0.017079,-0.009261,-0.011851,0.013069,-0.029988
2021-09-30,0.066407,-0.059139,-0.050441,-0.033865,-0.011551,-0.024197,-0.027590,0.013785,0.008904,-0.010869,-0.077275
2021-10-31,-0.034691,0.012542,0.023425,0.014095,0.068158,0.035858,0.053832,0.040056,0.022319,0.034940,0.069631


In [10]:
model = smf.ols('My_portfolio ~ 1',
                 data=portfolio_beta['2000-01':'2021-12']).fit(
                     cov_type='HAC', cov_kwds={'maxlags': 6})
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:           My_portfolio   R-squared:                      -0.000
Model:                            OLS   Adj. R-squared:                 -0.000
Method:                 Least Squares   F-statistic:                       nan
Date:                Sun, 11 Dec 2022   Prob (F-statistic):                nan
Time:                        21:03:04   Log-Likelihood:                 314.86
No. Observations:                 263   AIC:                            -627.7
Df Residuals:                     262   BIC:                            -624.2
Df Model:                           0                                         
Covariance Type:                  HAC                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.0037      0.004     -0.883      0.3

In [11]:
model = smf.ols('My_portfolio ~ 1',
                 data=portfolio_beta['2010-01':'2021-12']).fit(
                     cov_type='HAC', cov_kwds={'maxlags': 6})
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:           My_portfolio   R-squared:                      -0.000
Model:                            OLS   Adj. R-squared:                 -0.000
Method:                 Least Squares   F-statistic:                       nan
Date:                Sun, 11 Dec 2022   Prob (F-statistic):                nan
Time:                        21:03:04   Log-Likelihood:                 171.94
No. Observations:                 143   AIC:                            -341.9
Df Residuals:                     142   BIC:                            -338.9
Df Model:                           0                                         
Covariance Type:                  HAC                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.0083      0.005     -1.568      0.1

## 是否存在α

In [12]:
cross_beta

Unnamed: 0,Stkcd,month,floatingvalue,totalvalue,Return,rfmonth,Rank,Freq,ret,next_ret,sizef,sizet,ew,Clsprc,beta_6m,N6m,beta_12m,N12m,bm,am,ep,to_v,fv,max1
4,1,1991-09-01,6.523394e+08,1.301399e+09,-0.033333,0.006092,6,24,-0.039425,0.849080,20.296075,20.986706,1,14.50,1.502630,122.0,1.502630,122.0,,,,0.160940,0.005872,0.032258
5,1,1991-10-01,1.210202e+09,2.414319e+09,0.855172,0.006092,7,25,0.849080,0.016213,20.914053,21.604683,1,26.90,1.442916,127.0,1.439716,147.0,,,,0.235418,0.161766,0.240580
6,1,1991-11-01,1.237195e+09,2.468170e+09,0.022305,0.006092,8,25,0.016213,0.061181,20.936113,21.626743,1,27.50,1.328260,128.0,1.323324,172.0,,,,0.270395,0.078992,0.109907
7,1,1991-12-01,1.320425e+09,2.634211e+09,0.067273,0.006092,9,26,0.061181,0.055237,21.001219,21.691849,1,29.35,1.352059,131.0,1.320747,198.0,,,,0.084107,0.011311,0.046211
8,1,1992-01-01,1.401405e+09,2.795764e+09,0.061329,0.006092,10,22,0.055237,0.006749,21.060741,21.751371,1,31.15,1.361207,137.0,1.314970,220.0,,,,0.089871,0.003226,0.031457
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
604758,605500,2021-07-01,8.470000e+08,3.390000e+09,-0.041313,0.001241,8,22,-0.042554,0.034178,20.557211,21.943506,1,16.94,0.549457,120.0,0.577457,147.0,0.640626,0.708610,,0.567496,0.002147,0.011325
604759,605500,2021-08-01,8.770000e+08,3.510000e+09,0.035419,0.001241,9,22,0.034178,-0.016634,20.592018,21.978312,1,17.54,0.644013,127.0,0.597785,169.0,0.632851,0.719216,,0.588553,0.002303,0.020286
604760,605500,2021-09-01,8.635000e+08,3.450000e+09,-0.015393,0.001241,10,20,-0.016634,-0.033088,20.576504,21.962799,1,17.27,0.563838,124.0,0.609941,189.0,0.642745,0.730460,,1.198832,0.011876,0.077096
604761,605500,2021-10-01,8.360000e+08,3.340000e+09,-0.031847,0.001241,11,16,-0.033088,0.019094,20.544139,21.930434,1,16.72,0.594148,119.0,0.613978,205.0,0.679988,0.774058,,0.321845,0.001724,0.010423


In [13]:
cross_beta = cross[cross['N12m'] >= 120].copy()
cross_beta['month'] = pd.to_datetime(cross['month'],format='%Y-%m-%d') # 日期需设置为datetime格式
cross_beta = cross_beta.set_index(['Stkcd', 'month']) # 设置multi-index
cross_beta

Unnamed: 0_level_0,Unnamed: 1_level_0,floatingvalue,totalvalue,Return,rfmonth,Rank,Freq,ret,next_ret,sizef,sizet,ew,Clsprc,beta_6m,N6m,beta_12m,N12m,bm,am,ep,to_v,fv,max1
Stkcd,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
1,1991-09-01,6.523394e+08,1.301399e+09,-0.033333,0.006092,6,24,-0.039425,0.849080,20.296075,20.986706,1,14.50,1.502630,122.0,1.502630,122.0,,,,0.160940,0.005872,0.032258
1,1991-10-01,1.210202e+09,2.414319e+09,0.855172,0.006092,7,25,0.849080,0.016213,20.914053,21.604683,1,26.90,1.442916,127.0,1.439716,147.0,,,,0.235418,0.161766,0.240580
1,1991-11-01,1.237195e+09,2.468170e+09,0.022305,0.006092,8,25,0.016213,0.061181,20.936113,21.626743,1,27.50,1.328260,128.0,1.323324,172.0,,,,0.270395,0.078992,0.109907
1,1991-12-01,1.320425e+09,2.634211e+09,0.067273,0.006092,9,26,0.061181,0.055237,21.001219,21.691849,1,29.35,1.352059,131.0,1.320747,198.0,,,,0.084107,0.011311,0.046211
1,1992-01-01,1.401405e+09,2.795764e+09,0.061329,0.006092,10,22,0.055237,0.006749,21.060741,21.751371,1,31.15,1.361207,137.0,1.314970,220.0,,,,0.089871,0.003226,0.031457
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
605500,2021-07-01,8.470000e+08,3.390000e+09,-0.041313,0.001241,8,22,-0.042554,0.034178,20.557211,21.943506,1,16.94,0.549457,120.0,0.577457,147.0,0.640626,0.708610,,0.567496,0.002147,0.011325
605500,2021-08-01,8.770000e+08,3.510000e+09,0.035419,0.001241,9,22,0.034178,-0.016634,20.592018,21.978312,1,17.54,0.644013,127.0,0.597785,169.0,0.632851,0.719216,,0.588553,0.002303,0.020286
605500,2021-09-01,8.635000e+08,3.450000e+09,-0.015393,0.001241,10,20,-0.016634,-0.033088,20.576504,21.962799,1,17.27,0.563838,124.0,0.609941,189.0,0.642745,0.730460,,1.198832,0.011876,0.077096
605500,2021-10-01,8.360000e+08,3.340000e+09,-0.031847,0.001241,11,16,-0.033088,0.019094,20.544139,21.930434,1,16.72,0.594148,119.0,0.613978,205.0,0.679988,0.774058,,0.321845,0.001724,0.010423


In [14]:
df = cross_beta.loc[pd.IndexSlice[:, '2001-01':'2021-12'], :]
df

Unnamed: 0_level_0,Unnamed: 1_level_0,floatingvalue,totalvalue,Return,rfmonth,Rank,Freq,ret,next_ret,sizef,sizet,ew,Clsprc,beta_6m,N6m,beta_12m,N12m,bm,am,ep,to_v,fv,max1
Stkcd,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
1,2001-01-01,2.086901e+10,2.914842e+10,0.031680,0.001856,118,14,0.029824,-0.061269,23.761531,24.095666,1,14.98,0.778918,118.0,0.680193,233.0,0.105615,1.706175,0.019047,0.049985,0.005247,0.039286
1,2001-02-01,1.962913e+10,2.741663e+10,-0.059413,0.001856,119,18,-0.061269,0.149315,23.700280,24.034416,1,14.09,0.880670,113.0,0.690082,239.0,0.112286,1.813946,0.020250,0.026150,0.003641,0.021142
1,2001-03-01,2.259648e+10,3.156124e+10,0.151171,0.001856,120,22,0.149315,-0.044396,23.841060,24.175195,1,16.22,0.981993,114.0,0.803293,238.0,0.097541,1.575740,0.017591,0.262139,0.009797,0.052448
1,2001-04-01,2.163523e+10,3.021862e+10,-0.042540,0.001856,121,21,-0.044396,0.041286,23.797589,24.131724,1,15.53,0.930027,118.0,0.857901,239.0,0.156820,2.224705,0.016763,0.121548,0.002904,0.024939
1,2001-05-01,2.256862e+10,3.152232e+10,0.043142,0.001856,122,18,0.041286,-0.056794,23.839826,24.173962,1,16.20,1.008609,114.0,0.887494,239.0,0.150334,2.132695,0.016070,0.077083,0.002589,0.027743
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
605500,2021-07-01,8.470000e+08,3.390000e+09,-0.041313,0.001241,8,22,-0.042554,0.034178,20.557211,21.943506,1,16.94,0.549457,120.0,0.577457,147.0,0.640626,0.708610,,0.567496,0.002147,0.011325
605500,2021-08-01,8.770000e+08,3.510000e+09,0.035419,0.001241,9,22,0.034178,-0.016634,20.592018,21.978312,1,17.54,0.644013,127.0,0.597785,169.0,0.632851,0.719216,,0.588553,0.002303,0.020286
605500,2021-09-01,8.635000e+08,3.450000e+09,-0.015393,0.001241,10,20,-0.016634,-0.033088,20.576504,21.962799,1,17.27,0.563838,124.0,0.609941,189.0,0.642745,0.730460,,1.198832,0.011876,0.077096
605500,2021-10-01,8.360000e+08,3.340000e+09,-0.031847,0.001241,11,16,-0.033088,0.019094,20.544139,21.930434,1,16.72,0.594148,119.0,0.613978,205.0,0.679988,0.774058,,0.321845,0.001724,0.010423


windows+R,输入cmd

输入：

pip install linearmodels

In [15]:
from linearmodels import FamaMacBeth
model = FamaMacBeth.from_formula('next_ret ~ 1 + beta_12m', data=df)
## 一般fm回归结果展示的是Newey-West调整后的t值，.fit()中做如下设置
## 其中`bandwidth`是Newey-West滞后阶数，选取方式为lag = 4(T/100) ^ (2/9)
## 若不需要Newey-West调整则去掉括号内所有设置。
res = model.fit(cov_type= 'kernel',debiased = False, bandwidth = 6)
print(res.summary)

                            FamaMacBeth Estimation Summary                           
Dep. Variable:                    next_ret   R-squared:                       -0.0002
Estimator:                     FamaMacBeth   R-squared (Between):             -0.0009
No. Observations:                   497268   R-squared (Within):              -0.0002
Date:                     Sun, Dec 11 2022   R-squared (Overall):             -0.0002
Time:                             21:03:06   Log-likelihood                 2.717e+05
Cov. Estimator:    Fama-MacBeth Kernel Cov                                           
                                             F-statistic:                     -89.489
Entities:                             4182   P-value                           1.0000
Avg Obs:                            118.91   Distribution:                F(1,497266)
Min Obs:                            0.0000                                           
Max Obs:                            251.00   F-statist

## My Code for Fama MacBeth

In [16]:
cross_beta = cross[cross['N12m'] >= 120].copy()
cross_beta['month'] = pd.to_datetime(cross['month'],format='%Y-%m-%d') # 日期需设置为datetime格式
cross_beta = cross_beta.set_index(['Stkcd', 'month']) # 设置multi-index
cross_beta

Unnamed: 0_level_0,Unnamed: 1_level_0,floatingvalue,totalvalue,Return,rfmonth,Rank,Freq,ret,next_ret,sizef,sizet,ew,Clsprc,beta_6m,N6m,beta_12m,N12m,bm,am,ep,to_v,fv,max1
Stkcd,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
1,1991-09-01,6.523394e+08,1.301399e+09,-0.033333,0.006092,6,24,-0.039425,0.849080,20.296075,20.986706,1,14.50,1.502630,122.0,1.502630,122.0,,,,0.160940,0.005872,0.032258
1,1991-10-01,1.210202e+09,2.414319e+09,0.855172,0.006092,7,25,0.849080,0.016213,20.914053,21.604683,1,26.90,1.442916,127.0,1.439716,147.0,,,,0.235418,0.161766,0.240580
1,1991-11-01,1.237195e+09,2.468170e+09,0.022305,0.006092,8,25,0.016213,0.061181,20.936113,21.626743,1,27.50,1.328260,128.0,1.323324,172.0,,,,0.270395,0.078992,0.109907
1,1991-12-01,1.320425e+09,2.634211e+09,0.067273,0.006092,9,26,0.061181,0.055237,21.001219,21.691849,1,29.35,1.352059,131.0,1.320747,198.0,,,,0.084107,0.011311,0.046211
1,1992-01-01,1.401405e+09,2.795764e+09,0.061329,0.006092,10,22,0.055237,0.006749,21.060741,21.751371,1,31.15,1.361207,137.0,1.314970,220.0,,,,0.089871,0.003226,0.031457
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
605500,2021-07-01,8.470000e+08,3.390000e+09,-0.041313,0.001241,8,22,-0.042554,0.034178,20.557211,21.943506,1,16.94,0.549457,120.0,0.577457,147.0,0.640626,0.708610,,0.567496,0.002147,0.011325
605500,2021-08-01,8.770000e+08,3.510000e+09,0.035419,0.001241,9,22,0.034178,-0.016634,20.592018,21.978312,1,17.54,0.644013,127.0,0.597785,169.0,0.632851,0.719216,,0.588553,0.002303,0.020286
605500,2021-09-01,8.635000e+08,3.450000e+09,-0.015393,0.001241,10,20,-0.016634,-0.033088,20.576504,21.962799,1,17.27,0.563838,124.0,0.609941,189.0,0.642745,0.730460,,1.198832,0.011876,0.077096
605500,2021-10-01,8.360000e+08,3.340000e+09,-0.031847,0.001241,11,16,-0.033088,0.019094,20.544139,21.930434,1,16.72,0.594148,119.0,0.613978,205.0,0.679988,0.774058,,0.321845,0.001724,0.010423


In [17]:
import statsmodels.api as sm 

def Fama_MacBeth(data, yvar, xvars):
    Y = data[yvar]
    X = data[xvars]
    X['intercept'] = 1.
    result = sm.OLS(Y, X).fit()
    return result.params

coef = df.groupby('month').apply(Fama_MacBeth, 'next_ret', ['beta_12m'])
coef

Unnamed: 0_level_0,beta_12m,intercept
month,Unnamed: 1_level_1,Unnamed: 2_level_1
2001-01-01,-0.044704,-0.017280
2001-02-01,0.040169,0.036714
2001-03-01,-0.013524,0.011918
2001-04-01,0.034465,0.014742
2001-05-01,0.004950,0.000262
...,...,...
2021-07-01,-0.005698,0.071792
2021-08-01,-0.064676,0.036335
2021-09-01,0.053148,-0.064804
2021-10-01,0.014183,0.087244


In [18]:
model_alpha = smf.ols('intercept ~ 1',
                 data=coef['2000-01':'2021-12']).fit(
                     cov_type='HAC', cov_kwds={'maxlags': 6})
print(model_alpha.summary())

                            OLS Regression Results                            
Dep. Variable:              intercept   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                  0.000
Method:                 Least Squares   F-statistic:                       nan
Date:                Sun, 11 Dec 2022   Prob (F-statistic):                nan
Time:                        21:03:07   Log-Likelihood:                 271.30
No. Observations:                 251   AIC:                            -540.6
Df Residuals:                     250   BIC:                            -537.1
Df Model:                           0                                         
Covariance Type:                  HAC                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0098      0.006      1.552      0.1

In [19]:
model_alpha = smf.ols('intercept ~ 1',
                 data=coef['2010-01':'2021-12']).fit(
                     cov_type='HAC', cov_kwds={'maxlags': 6})
print(model_alpha.summary())

                            OLS Regression Results                            
Dep. Variable:              intercept   R-squared:                      -0.000
Model:                            OLS   Adj. R-squared:                 -0.000
Method:                 Least Squares   F-statistic:                       nan
Date:                Sun, 11 Dec 2022   Prob (F-statistic):                nan
Time:                        21:03:07   Log-Likelihood:                 153.22
No. Observations:                 143   AIC:                            -304.4
Df Residuals:                     142   BIC:                            -301.5
Df Model:                           0                                         
Covariance Type:                  HAC                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0114      0.007      1.633      0.1

In [20]:
model_beta = smf.ols('beta_12m ~ 1',
                 data=coef['2000-01':'2021-12']).fit(
                     cov_type='HAC', cov_kwds={'maxlags': 6})
print(model_beta.summary())

                            OLS Regression Results                            
Dep. Variable:               beta_12m   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                  0.000
Method:                 Least Squares   F-statistic:                       nan
Date:                Sun, 11 Dec 2022   Prob (F-statistic):                nan
Time:                        21:03:07   Log-Likelihood:                 283.53
No. Observations:                 251   AIC:                            -565.1
Df Residuals:                     250   BIC:                            -561.5
Df Model:                           0                                         
Covariance Type:                  HAC                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0008      0.004      0.211      0.8

In [21]:
model_beta = smf.ols('beta_12m ~ 1',
                 data=coef['2010-01':'2021-12']).fit(
                     cov_type='HAC', cov_kwds={'maxlags': 6})
print(model_beta.summary())

                            OLS Regression Results                            
Dep. Variable:               beta_12m   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                  0.000
Method:                 Least Squares   F-statistic:                       nan
Date:                Sun, 11 Dec 2022   Prob (F-statistic):                nan
Time:                        21:03:08   Log-Likelihood:                 194.43
No. Observations:                 143   AIC:                            -386.9
Df Residuals:                     142   BIC:                            -383.9
Df Model:                           0                                         
Covariance Type:                  HAC                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.0017      0.004     -0.458      0.6