# 风险溢价

## 加载模块

In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from scipy import stats
from scipy.stats import mstats
import statsmodels.api as sm

## 读入文件

In [2]:
df_merge = pd.read_csv('../data/stock-accounting.csv')
df_merge['date'] = pd.to_datetime(df_merge['date'])

## Fama-MacBeth 回归

剔除 `nan` 数据

In [3]:
df_merge['size'] = df_merge['market-cap'].apply(np.log)

idx_fac = ['beta', 'size', 'book-to-market', 'profit', 'investment', 'past-return']
idx_par = ['const'] + idx_fac
df_fit = df_merge[['PERMNO', 'date', 'stg', 'RET-RF'] + idx_fac].copy()
# df_fit['investment'] = df_fit['investment'].apply(np.exp)
df_fit[idx_fac] = df_fit.groupby('PERMNO')[idx_fac].shift(1)
df_fit = df_fit.replace([np.inf, -np.inf], np.nan)
df_fit = df_fit.dropna()
df_fit[idx_fac] = df_fit[idx_fac].apply(mstats.winsorize, limits=[0.01, 0.01])
df_fit.shape[0]

986307

In [4]:
time_series = df_fit['date'].unique()
time_series = np.sort(time_series)

idx_stg = ['Intro', 'Growth', 'Mature', 'Shakeout', 'Decline']

df_params = pd.DataFrame(columns=['stg', 'date'] + idx_par)
df_fit_info = pd.DataFrame(columns=['stg', 'date', 'R Squared', 'Observations'])

for stg in ['All'] + idx_stg:
    if stg == 'All':
        idx = True
    else:
        idx = (df_fit['stg'] == stg)

    for t in time_series[1:]:
        df_date = df_fit[(idx) & (df_fit['date'] == t)].copy()
        x = df_date[idx_fac]
        z = (x - x.mean()) / x.std()
        z = z.clip(-3, 3)
        z = sm.add_constant(z)
        y = df_date['RET-RF']
        result = sm.OLS(y, z).fit()
        df_params.loc[len(df_params)] = [stg, t] + list(result.params)
        df_fit_info.loc[len(df_params)] = [stg, t, result.rsquared, result.nobs]

### EXHIBIT 8

先定义 significance

In [5]:
def significance_p(pvalue):
    if pvalue < 0.01:
        return '***'
    elif pvalue < 0.05:
        return '**'
    elif pvalue < 0.1:
        return '*'
    else:
        return ''

In [6]:
df_reg = pd.DataFrame(index=idx_par, columns=['All'] + idx_stg + ['F-Test'])
df_reg_t = pd.DataFrame(index=idx_par, columns=['All'] + idx_stg + ['F-Test'])
df_reg_s = pd.DataFrame(index=idx_par, columns=['All'] + idx_stg + ['F-Test'])
df_reg_info = pd.DataFrame(index=['R Squared', 'Observations'], columns = ['All'] + idx_stg + ['F-Test'])
for stg in ['All'] + idx_stg:
    df_par = df_params.loc[df_params['stg'] == stg, idx_par]
    df_reg[stg] = df_par.mean() * 100
    t = np.sqrt(407) * df_par.mean() / df_par.std()
    df_reg_t[stg] = t
    p = 2 * (1 - t.apply(abs).apply(stats.t.cdf, df=407 - 1))
    df_reg_s[stg] = p.apply(significance_p)
    df_reg_info.loc['R Squared', stg] = df_fit_info.loc[df_fit_info['stg'] == stg, 'R Squared'].mean()
    df_reg_info.loc['Observations', stg] = df_fit_info.loc[df_fit_info['stg'] == stg, 'Observations'].sum()
df_reg_t = df_reg_t.rename({p: p + '-t' for p in idx_par})
df_reg_s = df_reg_s.rename({p: p + '-s' for p in idx_par})
df_reg = pd.concat([df_reg, df_reg_t, df_reg_s, df_reg_info])
df_reg = df_reg.loc[['beta', 'beta-t', 'beta-s', 'size', 'size-t', 'size-s',
                 'book-to-market', 'book-to-market-t', 'book-to-market-s',
                 'profit', 'profit-t', 'profit-s', 'investment', 'investment-t', 'investment-s',
                 'past-return', 'past-return-t', 'past-return-s', 'const', 'const-t', 'const-s',
                 'R Squared', 'Observations']]

F-Test

In [7]:
for fac in idx_fac:
    group_coef = [100 * df_params.loc[df_params['stg'] == stg, fac] for stg in idx_stg]
    F, p = stats.f_oneway(*group_coef)
    df_reg.loc[fac, 'F-Test'] = F
    df_reg.loc[fac + '-s', 'F-Test'] = significance_p(p)

In [8]:
df_reg.to_csv('../exhibits/Exhibit8.csv')
pd.options.display.float_format = '{:.3f}'.format
df_reg

Unnamed: 0,All,Intro,Growth,Mature,Shakeout,Decline,F-Test
beta,0.109,0.133,0.086,0.163,0.110,0.277,0.401
beta-t,1.120,0.940,0.874,1.987,0.928,2.034,
beta-s,,,,**,,**,
size,-0.231,-0.544,-0.159,-0.260,-0.262,-0.371,1.542
size-t,-2.608,-3.658,-1.884,-3.474,-2.653,-2.344,
size-s,***,***,*,***,***,**,
book-to-market,0.461,0.523,0.208,0.396,0.603,1.034,5.560
book-to-market-t,6.146,3.439,2.126,4.755,4.688,6.078,
book-to-market-s,***,***,**,***,***,***,***
profit,0.294,0.182,0.122,0.213,0.140,-0.026,0.648


## 多空投资组合：全样本估值

### 全样本估算超额收益率

In [9]:
df_coef_full = df_params.groupby('stg')[idx_par].mean()

df_shift = df_merge[['PERMNO', 'date', 'EXCHCD', 'stg', 'adj-prc',
                     'RET', 'RET-RF', 'RF', 'market-cap']].copy()
df_shift[['RET', 'RET-RF', 'RF']] = df_shift.groupby('PERMNO')[['RET', 'RET-RF', 'RF']].shift(-1)
# df_shift['market-cap'] = df_shift.groupby('PERMNO')['market-cap'].shift(1)
df_shift = df_shift.iloc[df_fit.index]

x_mean = df_fit.groupby('date')[idx_fac].transform('mean')
x_std = df_fit.groupby('date')[idx_fac].transform('std')
z_un = (df_fit[idx_fac] - x_mean) / x_std
z_un = z_un.clip(-3, 3)
z_un['const'] = 1
x_mean = df_fit.groupby(['date', 'stg'])[idx_fac].transform('mean')
x_std = df_fit.groupby(['date', 'stg'])[idx_fac].transform('std')
z_stg = (df_fit[idx_fac] - x_mean) / x_std
z_stg = z_stg.clip(-3, 3)
z_stg['const'] = 1

ret_un_full = (z_un[idx_par] * df_coef_full.loc['All', idx_par]).sum(axis=1)
ret_un_full.name = 'pred-RET-RF'
df_stg = df_fit[['stg']].replace('Unknown', 'All')
df_coef_stg_full = df_stg.join(df_coef_full, on='stg')
ret_stg_full = (z_stg[idx_par] * df_coef_stg_full[idx_par]).sum(axis=1)
ret_stg_full.name = 'pred-RET-RF'

### 计算不同组合内月超额收益率

定义函数，根据预测的超额收益率计算不同组的超额收益率

In [10]:
def share_weight(group):
    g = group.copy()
    weight = g['market-cap'] / g['market-cap'].sum()
    weight.fillna(0)
    weight.name = 'weight'
    return pd.concat([g['PERMNO'], weight], axis=1)
def exc_ret(group):
    g = group.copy()
    rf = g['RF'].mean()
    return (g['RET'] * g['weight']).sum() - rf

ret_group = ['low', '2', '3', '4', 'high']

def exc_calc(ret_exp):
    df_exc = pd.DataFrame(columns=['date'] + ret_group)
    ret = ret_exp.copy()
    for i, t in enumerate(time_series[1:]):
        idx = (df_shift['date'] == t)
        idx2 = idx & (df_shift['EXCHCD'] == 1)
        ret_date = ret[idx].copy()
        shift_date = df_shift[idx].copy()
        q = ret.loc[idx2].quantile([0., 0.2, 0.4, 0.6, 0.8, 1.])
        qcut = pd.cut(ret_date, q, right=True, labels=ret_group)
        qcut[ret_date <= q[0.2]] = 'low'
        qcut[ret_date > q[0.8]] = 'high'
        qcut.name = 'qcut'
        shift_date = pd.concat([shift_date, qcut], axis=1)
        weight = shift_date.groupby('qcut', observed=False)[['PERMNO', 'adj-prc', 'market-cap']].apply(share_weight).reset_index()
        shift_date = pd.merge(
            shift_date,
            weight[['PERMNO', 'weight']],
            on='PERMNO', how='left'
        )
        df_exc.loc[i, 'date'] = t
        df_exc.loc[i, ret_group] = shift_date.groupby('qcut', observed=False)[['adj-prc', 'RET', 'RET-RF', 'weight', 'RF']].apply(exc_ret)
    return df_exc

In [11]:
exc_un_full = exc_calc(ret_un_full)
group_un_full = exc_un_full[ret_group].mean() * 100
print(group_un_full)
print(group_un_full['high'] - group_un_full['low'])
exc_stg_full = exc_calc(ret_stg_full)
group_stg_full = exc_stg_full[ret_group].mean() * 100
print(group_stg_full)
print(group_stg_full['high'] - group_stg_full['low'])

low    0.688
2      0.819
3      0.981
4      1.021
high   1.093
dtype: object
0.40581202029811436
low    0.657
2      0.756
3      0.944
4      1.217
high   1.160
dtype: object
0.5033801709282817


## 多空投资组合：滚动窗口估值

## EXHIBIT 9