# 风险溢价

## 加载模块

In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import statsmodels.api as sm
from numpy.lib.stride_tricks import as_strided as stride

## 读入文件

In [2]:
df_merge = pd.read_csv('../data/stock-accounting.csv')
df_merge['date'] = pd.to_datetime(df_merge['date'])

In [3]:
df_factor = pd.read_csv('../data/F-F_Research_Data_5_Factors_2x3.csv', skiprows=3, nrows=738)
df_factor = df_factor.rename(columns={'Unnamed: 0': 'dint'})
df_factor['dint'] = df_factor['dint'].astype(int)

time_series = df_merge['date'].unique()
time_series = np.sort(time_series)
df_time = pd.DataFrame({'date': time_series})
df_time['dint'] = df_time['date'].dt.year * 100 + df_time['date'].dt.month
df_factor = pd.merge(
    df_time,
    df_factor,
    on='dint',
    how='left'
)

## 计算 beta

估计最大可能保留的数据行数

In [4]:
df_dura = df_merge.groupby('PERMNO')['date'].count().reset_index()
df_dura = df_dura[df_dura['date'] >= 60]
print(df_dura['date'].sum() - 60 * df_dura.shape[0])

901786


`Pandas.DataFrame.rolling.apply()` 不能多列返回多列，故使用网友提供的自建 `roll()` 函数

来源：[DataFrame rolling apply 多列 return 多列](https://zhuanlan.zhihu.com/p/91100281)

In [71]:
def roll(df: pd.DataFrame, window: int, **kwargs):
    v = df.reset_index().values
    dim0, dim1 = v.shape
    stride0, stride1 = v.strides
    stride_values = stride(v, (dim0 - (window - 1), window, dim1), (stride0, stride0, stride1))

    rolled_df = pd.concat({
        row: pd.DataFrame(values[:, 1:], columns=df.columns, index=values[:, 0].flatten())
        for row, values in zip(df.index[window - 1:], stride_values)
    })
    return rolled_df.groupby(level=0, **kwargs)

def roll_np(df: pd.DataFrame, apply_func: callable, window: int,
         return_col_num: int, **kwargs):
    v = df.reset_index().values
    dim0, dim1 = v.shape
    stride0, stride1 = v.strides

    stride_values = stride(v, (dim0 - (window - 1), window, dim1), (stride0, stride0, stride1))
    result_values = np.full((dim0, return_col_num), np.nan)
    for idx, values in enumerate(stride_values, window - 1):
        result_values[idx, ] = apply_func(values, **kwargs)

    return result_values

定义函数：`beta` 回归、滚动窗口回归

In [100]:
def beta_reg(df_stock):
    dfs = df_stock
    x = sm.add_constant(dfs['Mkt-RF']).astype(float)
    y = dfs['RET-RF'].astype(float)
    result = sm.OLS(y, x).fit()
    return result.params['Mkt-RF']

def rolling_reg(df_stock):
    dfs = df_stock.copy()
    if dfs.shape[0] < 60:
        return pd.Series(np.nan, index=np.arange(dfs.shape[0]))
    else:
        return roll(dfs, 60).apply(beta_reg)

def beta_reg_np(df_stock):
    '''
    df_stock 至少需要包含列: ['RET-RF', 'Mkt-RF']
    '''
    x = df_stock[:, 2]
    x = np.column_stack([np.ones(x.size), x])
    y = df_stock[:, 1]
    result = sm.OLS(y, x).fit()
    return result.params[1]

def rolling_reg_np(df_stock):
    '''
    df_stock 至少需要包含列: ['RET-RF', 'Mkt-RF']
    '''
    dfs = df_stock.copy()
    if dfs.shape[0] < 60:
        return pd.Series(np.nan, index=dfs.index, name='beta')
    else:
        result_np = roll_np(df=dfs[['RET-RF', 'Mkt-RF']], apply_func=beta_reg_np,
                            window=60, return_col_num=1)
        result_np = result_np.T[0]
        return pd.Series(result_np, index=dfs.index, name='beta')

计算 `beta`

In [7]:
df_merge = pd.merge(
    df_merge,
    df_factor[['date', 'Mkt-RF', 'RF']],
    on='date', how='left'
)
df_merge[['Mkt-RF', 'RF']] *= 0.01

In [92]:
%%time
df_merge['RET-RF'] = df_merge['RET'] - df_merge['RF']
df_beta = df_merge.groupby('PERMNO')[['RET-RF', 'Mkt-RF']].apply(rolling_reg_np).reset_index()

CPU times: total: 3min 3s
Wall time: 3min 4s


In [99]:
df_merge['beta'] = df_beta['beta']

## Fama-MacBeth 回归

剔除 `nan` 数据

In [104]:
idx_fac = ['beta', 'market-cap', 'book-to-market', 'profit', 'investment', 'past-return']
df_fit = df_merge[['stg', 'RET-RF'] + idx_fac]
df_fit = df_fit.dropna()
df_fit.shape[0]

790998

## 风险溢价估计

## 多空投资组合

## 滚动窗口估值

## EXHIBIT 9