In [90]:
import pandas as pd
import numpy as np
from pathlib import Path

# 1. 基础环境与路径配置

In [91]:
# 根目录与数据路径
root_path = Path('../')
returns_path = root_path / 'data' / 'monthly_returns.pkl'
csi500_path = root_path / 'data' / 'csi500_mask_monthly.pkl'

# 回测区间参数
start = pd.to_datetime('2009-01-01')
backtest = pd.to_datetime('2014-01-01')
end = pd.to_datetime('2024-01-01')

# for test
# start = pd.to_datetime('2023-01-03')
# backtest = pd.to_datetime('2023-06-01')

# 2. 读取收益率数据并截取区间

In [92]:
returns = pd.read_pickle(returns_path)
returns = returns.loc[start:end]
returns.head()

asset,000001.SZ,000002.SZ,000003.SZ,000004.SZ,000005.SZ,000006.SZ,000007.SZ,000008.SZ,000009.SZ,000010.SZ,...,688788.SH,688789.SH,688793.SH,688798.SH,688799.SH,688800.SH,688819.SH,688981.SH,689009.SH,T00018.SH
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2009-01-31,0.230444,0.091473,0.0,0.182584,0.108871,0.111562,0.111111,0.208145,0.467337,0.116279,...,,,,,,,,,,0.0
2009-02-28,0.185567,0.015625,0.0,0.054632,0.076364,0.175182,0.180556,0.073034,0.356164,0.098958,...,,,,,,,,,,0.0
2009-03-31,0.155072,0.158042,0.0,0.240991,0.277027,0.397516,0.072941,0.240838,0.233586,0.35703,...,,,,,,,,,,0.0
2009-04-30,0.023839,0.024155,0.0,0.15245,-0.071429,0.09,0.059211,-0.053446,0.0,0.122235,...,,,,,,,,,,0.0
2009-05-31,0.093137,0.148585,0.0,0.070866,0.156695,0.160041,0.016563,0.02526,0.032753,-0.045643,...,,,,,,,,,,0.0


In [93]:
returns.head()

asset,000001.SZ,000002.SZ,000003.SZ,000004.SZ,000005.SZ,000006.SZ,000007.SZ,000008.SZ,000009.SZ,000010.SZ,...,688788.SH,688789.SH,688793.SH,688798.SH,688799.SH,688800.SH,688819.SH,688981.SH,689009.SH,T00018.SH
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2009-01-31,0.230444,0.091473,0.0,0.182584,0.108871,0.111562,0.111111,0.208145,0.467337,0.116279,...,,,,,,,,,,0.0
2009-02-28,0.185567,0.015625,0.0,0.054632,0.076364,0.175182,0.180556,0.073034,0.356164,0.098958,...,,,,,,,,,,0.0
2009-03-31,0.155072,0.158042,0.0,0.240991,0.277027,0.397516,0.072941,0.240838,0.233586,0.35703,...,,,,,,,,,,0.0
2009-04-30,0.023839,0.024155,0.0,0.15245,-0.071429,0.09,0.059211,-0.053446,0.0,0.122235,...,,,,,,,,,,0.0
2009-05-31,0.093137,0.148585,0.0,0.070866,0.156695,0.160041,0.016563,0.02526,0.032753,-0.045643,...,,,,,,,,,,0.0


# 3. 计算动量因子
# - `mom_minus1_0`：最近一个月收益
# - `mom_minus12_minus1`：过去12-1个月累积收益
# - `mom_minus36_minus12`：过去36-12个月累积收益
# - `mom_minus11_minus2`：过去11-2个月累积收益
# 注：为保证复利效果，统一使用 `np.log1p` 与 `np.exp` 计算滚动收益。

In [94]:
mom_minus1_0 = returns
mom_minus1_0 = mom_minus1_0.loc[backtest:end]
mom_minus1_0.head()

asset,000001.SZ,000002.SZ,000003.SZ,000004.SZ,000005.SZ,000006.SZ,000007.SZ,000008.SZ,000009.SZ,000010.SZ,...,688788.SH,688789.SH,688793.SH,688798.SH,688799.SH,688800.SH,688819.SH,688981.SH,689009.SH,T00018.SH
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-01-31,-0.069388,-0.080946,0.0,0.055794,-0.06,-0.11359,-0.028818,-0.047778,-0.008466,-0.100391,...,,,,,,,,,,0.0
2014-02-28,-0.023684,-0.089431,0.0,0.146341,0.0,-0.016018,-0.041543,0.121354,0.19317,0.056522,...,,,,,,,,,,0.0
2014-03-31,-0.032345,0.203869,0.0,-0.094326,0.076596,0.253488,-0.059598,-0.011446,-0.057245,-0.133059,...,,,,,,,,,,0.0
2014-04-30,0.034355,-0.023486,0.0,0.054033,-0.094862,-0.100186,-0.005761,-0.023158,-0.045541,-0.129747,...,,,,,,,,,,0.0
2014-05-31,0.032316,0.141757,0.0,-0.023031,-0.017467,-0.028866,-0.047185,-0.057112,0.014911,-0.047273,...,,,,,,,,,,0.0


In [95]:
mom_minus12_minus1 = np.exp(
    np.log1p(returns).rolling(window=11).sum().shift(1)
) - 1
mom_minus12_minus1 = mom_minus12_minus1.loc[backtest:end]
mom_minus12_minus1.head()

asset,000001.SZ,000002.SZ,000003.SZ,000004.SZ,000005.SZ,000006.SZ,000007.SZ,000008.SZ,000009.SZ,000010.SZ,...,688788.SH,688789.SH,688793.SH,688798.SH,688799.SH,688800.SH,688819.SH,688981.SH,689009.SH,T00018.SH
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-01-31,-0.062901,-0.320578,0.0,0.25,-0.193548,-0.044386,-0.241115,0.462226,0.121388,0.640489,...,,,,,,,,,,0.0
2014-02-28,-0.199966,-0.375054,0.0,0.223881,-0.219269,-0.100907,-0.313646,0.24293,0.185126,0.475799,...,,,,,,,,,,0.0
2014-03-31,-0.107108,-0.365365,0.0,0.477987,-0.12963,0.024275,-0.181242,0.301286,0.293379,0.559214,...,,,,,,,,,,0.0
2014-04-30,-0.070379,-0.254684,0.0,0.200188,-0.030651,0.313097,-0.25,0.386861,0.224256,0.058972,...,,,,,,,,,,0.0
2014-05-31,-0.143348,-0.340568,0.0,0.147485,-0.167273,0.039759,-0.288994,0.06422,-0.10628,-0.078427,...,,,,,,,,,,0.0


In [96]:
# rolling(window=24)：取连续24个月；shift(12)：排除最近12个月（t-11到t月）
mom_minus36_minus12 = np.exp(
    np.log1p(returns).rolling(window=24).sum().shift(12)
) - 1
mom_minus36_minus12 = mom_minus36_minus12.loc[backtest:end]
mom_minus36_minus12.head()

asset,000001.SZ,000002.SZ,000003.SZ,000004.SZ,000005.SZ,000006.SZ,000007.SZ,000008.SZ,000009.SZ,000010.SZ,...,688788.SH,688789.SH,688793.SH,688798.SH,688799.SH,688800.SH,688819.SH,688981.SH,689009.SH,T00018.SH
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-01-31,0.388432,0.504895,0.0,-0.232922,-0.114286,0.333161,1.190419,0.02073,-0.452702,0.278195,...,,,,,,,,,,0.0
2014-02-28,0.454553,0.511013,0.0,-0.269091,-0.291765,0.23688,1.265283,0.052672,-0.583209,0.278195,...,,,,,,,,,,0.0
2014-03-31,0.260548,0.272242,0.0,-0.276176,-0.333333,-0.016162,0.803429,0.25382,-0.451882,0.278195,...,,,,,,,,,,0.0
2014-04-30,0.035113,0.336463,0.0,-0.152191,-0.323834,-0.029359,0.770492,0.118367,-0.431584,0.631579,...,,,,,,,,,,0.0
2014-05-31,0.1974,0.575249,0.0,-0.044788,-0.287565,0.215648,1.16986,0.54473,-0.199545,0.631579,...,,,,,,,,,,0.0


In [97]:
mom_minus11_minus2 = np.exp(
    np.log1p(returns).rolling(window=9).sum().shift(2)
) - 1
mom_minus11_minus2 = mom_minus11_minus2.loc[backtest:end]
mom_minus11_minus2.head()

asset,000001.SZ,000002.SZ,000003.SZ,000004.SZ,000005.SZ,000006.SZ,000007.SZ,000008.SZ,000009.SZ,000010.SZ,...,688788.SH,688789.SH,688793.SH,688798.SH,688799.SH,688800.SH,688819.SH,688981.SH,689009.SH,T00018.SH
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-01-31,-0.045573,-0.258194,0.0,0.244776,-0.116279,0.191247,-0.279022,0.457578,0.238248,0.612685,...,,,,,,,,,,0.0
2014-02-28,-0.017257,-0.241649,0.0,0.221174,-0.074074,0.174344,-0.120406,0.218687,0.093241,0.640489,...,,,,,,,,,,0.0
2014-03-31,-0.016001,-0.320095,0.0,0.156015,-0.099617,0.064608,-0.167901,0.251095,0.088357,0.156156,...,,,,,,,,,,0.0
2014-04-30,-0.144117,-0.439065,0.0,0.202046,-0.145455,-0.078152,-0.239553,0.102064,-0.006781,0.221504,...,,,,,,,,,,0.0
2014-05-31,0.080241,-0.17868,0.0,0.277,0.109649,0.265258,-0.083019,0.155718,0.104056,0.058972,...,,,,,,,,,,0.0


# 4. 辅助函数：宽表转长表

In [98]:
def wide_to_long_with_colname(wide_df, col_name):
    # stack() 将资产列转为行，得到 (date, asset) 双索引的Series
    long_df = wide_df.stack(dropna=False, future_stack=False).to_frame(name=col_name)
    long_df.index.set_names(["date", "asset"], inplace=True)
    return long_df

# 5. 合并长表数据

In [99]:
long_mom1 = wide_to_long_with_colname(mom_minus1_0, 'mom_minus1_0')
long_mom12 = wide_to_long_with_colname(mom_minus12_minus1, 'mom_minus12_minus1')
long_mom36 = wide_to_long_with_colname(mom_minus36_minus12, 'mom_minus36_minus12')
long_mom11 = wide_to_long_with_colname(mom_minus11_minus2, 'mom_minus11_minus2')

  long_df = wide_df.stack(dropna=False, future_stack=False).to_frame(name=col_name)
  long_df = wide_df.stack(dropna=False, future_stack=False).to_frame(name=col_name)
  long_df = wide_df.stack(dropna=False, future_stack=False).to_frame(name=col_name)
  long_df = wide_df.stack(dropna=False, future_stack=False).to_frame(name=col_name)


In [100]:
final_df = pd.concat([long_mom1, long_mom12, long_mom36, long_mom11], axis=1)
# 查看结果
final_df

Unnamed: 0_level_0,Unnamed: 1_level_0,mom_minus1_0,mom_minus12_minus1,mom_minus36_minus12,mom_minus11_minus2
date,asset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-01-31,000001.SZ,-0.069388,-0.062901,0.388432,-0.045573
2014-01-31,000002.SZ,-0.080946,-0.320578,0.504895,-0.258194
2014-01-31,000003.SZ,0.000000,0.000000,0.000000,0.000000
2014-01-31,000004.SZ,0.055794,0.250000,-0.232922,0.244776
2014-01-31,000005.SZ,-0.060000,-0.193548,-0.114286,-0.116279
...,...,...,...,...,...
2023-12-31,688800.SH,-0.081600,-0.421846,,-0.472909
2023-12-31,688819.SH,-0.056757,-0.180683,,-0.172940
2023-12-31,688981.SH,-0.012663,0.305299,-0.287619,0.367654
2023-12-31,689009.SH,-0.103927,0.085602,-0.644722,-0.012052


# 6. 导出结果

In [101]:
final_df.to_pickle(root_path / 'data' / 'momentum_factors.pkl')