In [253]:
import pandas as pd
import numpy as np
import scipy.stats as st

In [256]:
def rank_data(x):
    '''
    得到同一日期内各个证券beta的排序
    '''
    x["rank"] = st.rankdata(x["weight_beta"])
    x = x.drop(["weight_beta"],axis = 1)
    return x

In [257]:
def rank_weight(x):
    '''
    得到同一日期内各个证券的投资权重
    '''
    z_bar = np.mean(x["rank"])
    k = 2 / (np.sum(np.abs(x["rank"] - z_bar)))
    w_H = k * np.maximum(x["rank"] - z_bar,0)
    w_L = k * -np.minimum(x["rank"] - z_bar,0)
    x["w_H"] = w_H
    x["w_L"] = w_L
    return x

In [259]:
stock = pd.read_csv(r"D:\学习资料\实证金融学\data_replication\stock1_ex_ante_beta.csv")

# 计算weight_beta
w = 0.6
beta_xs = 1
stock["weight_beta"] = w * stock["beta"] + (1 - w) * beta_xs
# 提取出有效的行
stock["obs"] = list(stock.groupby("id")["id"].apply(lambda v:pd.Series(range(0,len(v)))))
stock_val = stock.loc[stock["obs"] >= 1826,:]

# 计算同一日期各个证券beta的排序
ranks = stock_val.loc[pd.notna(stock_val["weight_beta"]),:].groupby("date")["id","date","weight_beta"].apply(rank_data)
# 计算同一日期各个证券的投资权重
ranks = ranks.groupby("date").apply(rank_weight)
# 将投资权重merge进入stock_val，得到stock_fin
stock_fin = pd.merge(stock_val,ranks,on=['id','date'],how = "left")
stock_fin.sort_values(by=['id','date'],inplace=True)

# 将少量缺失的return设置为用0来填充
stock_fin.loc[pd.isna(stock_fin["ret"]),"ret"] = 0
# 如果某天某证券的beta没法计算，则将他们的投资权重设置为0，即不投资他们
stock_fin.loc[pd.isna(stock_fin["weight_beta"]),"w_H"] = 0
stock_fin.loc[pd.isna(stock_fin["weight_beta"]),"w_L"] = 0
# 如果某天某证券的beta没法计算，则将他们的beta设置为1。因为该证券的投资权重为0，所以不管设置他们的beta为多少，都不会影响BAB Factor的计算
stock_fin.loc[pd.isna(stock_fin["weight_beta"]),"weight_beta"] = 1

# 计算high/low beta portfolio的收益
stock_fin["w_H1"] = stock_fin.groupby("id")["w_H"].shift(1)
stock_fin["w_L1"] = stock_fin.groupby("id")["w_L"].shift(1)
r_L = stock_fin.loc[stock_fin["obs"] > 1826,:].groupby("date").apply(lambda x: np.dot(x["ret"],x["w_L1"]))
r_H = stock_fin.loc[stock_fin["obs"] > 1826,:].groupby("date").apply(lambda x: np.dot(x["ret"],x["w_H1"]))
# 计算high/low beta portfolio的beta
beta_L = stock_fin.loc[stock_fin["obs"] >= 1826,:].groupby("date").apply(lambda x: np.dot(x["weight_beta"],x["w_L"]))
beta_H = stock_fin.loc[stock_fin["obs"] >= 1826,:].groupby("date").apply(lambda x: np.dot(x["weight_beta"],x["w_H"]))
beta_L.name = "beta_L"
r_L.name = "r_L"
beta_H.name = "beta_H"
r_H.name = "r_H"
# 计算BAB factor
BAB_factor_L = pd.merge(beta_L,r_L,how = "outer",on = "date")
BAB_factor_H = pd.merge(beta_H,r_H,how = "outer",on = "date")
BAB_factor = pd.merge(BAB_factor_L,BAB_factor_H, how = "outer",on = "date")
market = stock_fin[["date","mr"]]
market = market.drop_duplicates(subset=["date","mr"])
market = market.set_index("date")
BAB_factor = pd.merge(BAB_factor,market, how = "outer", on = "date")
BAB_factor["beta_L1"] =  BAB_factor["beta_L"].shift(1)
BAB_factor["beta_H1"] =  BAB_factor["beta_H"].shift(1)
BAB_factor["BAB"] = (BAB_factor["r_L"] - BAB_factor["mr"]) / BAB_factor["beta_L1"] - (BAB_factor["r_H"] - BAB_factor["mr"]) / BAB_factor["beta_L1"]

In [260]:
# -----------------------------------------以下为测试代码----------------------------------------------------