In [26]:
import pandas as pd
from pathlib import Path

# 1. 基础配置

In [27]:
root_path = Path('../')
mom_path = root_path / 'data' / 'momentum_factors.pkl'
BM_mcap = root_path / 'data' / 'BM_mcap.pkl'
csi500_path = root_path / 'data' / 'csi500_mask_monthly.pkl'

backtest = pd.to_datetime('2014-01-01')
end = pd.to_datetime('2024-01-01')

# 2. 根据 CSI500 掩码筛选样本股票
# - 仅保留在回测区间内曾进入 CSI500 的股票；
# - 记录样本股票数量，便于检查。

In [28]:
mask = pd.read_pickle(csi500_path)
mask_slice = mask.loc[backtest:end] # 选取时间区间

tickers = mask_slice.columns[mask_slice.any(axis=0)].tolist()   # 只要这一段时间内曾经为 True 的股票就纳入
tickers = sorted(tickers)
tickers = list(set(tickers))
mask = mask[tickers]
print(len(tickers))

1261


# 3. 读取动量与 LOGBM/LOGME 数据

In [29]:
mom = pd.read_pickle(mom_path)
BM_mcap = pd.read_pickle(BM_mcap)

# 4. 依据样本股票过滤数据
# 确保两个数据集仅包含目标股票，避免无效索引。

In [30]:
mom_mask = mom.index.get_level_values("asset").isin(tickers)
mom = mom[mom_mask]
BM_mcap_mask = BM_mcap.index.get_level_values("asset").isin(tickers)
BM_mcap = BM_mcap[BM_mcap_mask]

# 6. 因子合并

In [33]:
data = pd.concat([BM_mcap, mom],axis=1)
data

Unnamed: 0_level_0,Unnamed: 1_level_0,LOGBM,LOGME,mom_minus1_0,mom_minus12_minus1,mom_minus36_minus12,mom_minus11_minus2
date,asset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2014-01-31,000630.SZ,-0.144187,23.266707,-0.106786,-0.486667,-0.291048,-0.400022
2014-01-31,000830.SZ,0.013693,22.429445,-0.089588,-0.107991,-0.171462,-0.063966
2014-01-31,600183.SH,-0.463860,22.673411,0.000000,0.121703,-0.391172,0.188129
2014-01-31,600797.SH,-1.101209,22.424620,0.181004,0.273973,-0.316381,0.421296
2014-01-31,002573.SZ,-1.687602,23.192397,0.000000,1.084805,,0.967867
...,...,...,...,...,...,...,...
2023-12-31,600337.SH,-0.041526,22.130229,0.029851,-0.049645,-0.382977,-0.143345
2023-12-31,600859.SH,0.082621,23.620654,-0.080069,-0.380146,-0.116800,-0.370823
2023-12-31,600655.SH,0.407367,23.909402,-0.037209,-0.109750,-0.089189,-0.136875
2023-12-31,600636.SH,-0.538538,22.254360,0.018357,0.101871,-0.283442,-0.104571


In [35]:
data.to_csv(root_path / 'data' / 'LOGBM_LOGME_MOM.csv')