In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from tqdm import tqdm
import akshare as ak



# 数据处理

读取收盘价数据，并进行检查

In [2]:
#字段管理
fields = {
    'S_DQ_ADJCLOSE': 'close',
    'S_INFO_WINDCODE': 'stkcd',
    'TRADE_DT': 'date',
    'S_DQ_VOLUME': 'volume'
}

#读取数据的预处理
def preprocess(df: pd.DataFrame) -> pd.DataFrame:
    del df['Unnamed: 0']
    df.rename(columns=fields, inplace=True)
    df['date'] = df['date'].astype(str)
    return df
    

In [3]:
close_df = pd.read_csv("./MD/CLOSE.csv")
close_df = preprocess(close_df)
close_df.head()

Unnamed: 0,close,stkcd,date
0,2001.16,000001.SZ,20210901
1,3388.19,000002.SZ,20210901
2,75.51,000004.SZ,20210901
3,21.87,000005.SZ,20210901
4,174.99,000006.SZ,20210901


某些股票股价在很长一段时间内不变，且成交量为0  
例子如下

In [4]:
volume_df = pd.read_csv('./MD/S_DQ_VOLUME.csv')
volume_df = preprocess(volume_df)
close_df = pd.merge(close_df, volume_df, on=['stkcd', 'date'])

In [5]:
#本地数据
close_df[close_df['stkcd']=='873169.BJ'].iloc[20:40]

Unnamed: 0,close,stkcd,date,volume
98215,10.75,873169.BJ,20211008,0.0
102913,10.75,873169.BJ,20211011,0.0
107610,10.75,873169.BJ,20211012,0.0
112309,10.75,873169.BJ,20211013,0.0
117009,10.75,873169.BJ,20211014,0.0
121711,10.75,873169.BJ,20211015,0.0
126415,10.75,873169.BJ,20211018,0.0
131123,10.75,873169.BJ,20211019,0.0
135834,10.75,873169.BJ,20211020,0.0
140548,10.75,873169.BJ,20211021,0.0


In [6]:
#akshare东财接口历史数据
ak.stock_zh_a_hist('873169', 'daily', '20210901', '20220501')

Unnamed: 0,日期,开盘,收盘,最高,最低,成交量,成交额,振幅,涨跌幅,涨跌额,换手率
0,2021-09-07,4.65,4.65,4.65,4.65,1,465.0,0.0,-33.57,-2.35,0.0
1,2021-09-10,5.28,5.28,5.28,5.28,2,1056.0,0.0,13.55,0.63,0.0
2,2021-09-13,5.51,5.51,5.51,5.51,18,9918.0,0.0,4.36,0.23,0.04
3,2021-09-22,11.0,11.0,11.0,11.0,22,24200.0,0.0,99.64,5.49,0.04
4,2021-09-23,10.0,10.6,10.6,10.0,14,14600.0,5.45,-3.64,-0.4,0.03
5,2021-09-24,10.66,10.75,10.75,10.66,70,74950.0,0.85,1.42,0.15,0.14
6,2022-04-15,6.81,7.32,8.86,6.61,118142,89187165.0,37.5,22.0,1.32,49.09
7,2022-04-18,6.8,6.36,6.8,6.29,69164,44989380.0,6.97,-13.11,-0.96,28.74
8,2022-04-19,6.4,6.29,6.4,6.15,28918,18056879.0,3.93,-1.1,-0.07,12.02
9,2022-04-20,6.3,6.13,6.34,6.09,23386,14444087.0,3.97,-2.54,-0.16,9.72


无论是由于停牌，还是流动性过差，成交量为0的股票是比较异常的；而东财的接口也未统计成交量为0时的交易数据  
综合考虑，我剔除了成交量为0的数据

In [7]:
close_df = close_df[close_df['volume']!=0]

构造因子，需要计算月度收益率  
可能由于停牌，某些股票某些月份的交易日太少，处于异常状态，下面剔除月内交易日过少的数据

In [8]:
close_df['month'] = close_df['date'].apply(lambda x: x[:6])
close_df.head()

Unnamed: 0,close,stkcd,date,volume,month
0,2001.16,000001.SZ,20210901,2316894.09,202109
1,3388.19,000002.SZ,20210901,1251790.18,202109
2,75.51,000004.SZ,20210901,40542.0,202109
3,21.87,000005.SZ,20210901,608189.79,202109
4,174.99,000006.SZ,20210901,150387.04,202109


In [9]:
groups = close_df.groupby(['stkcd', 'month'])
counts = groups.apply(len)
print(counts.describe())
counts.quantile(0.05)

count    124329.000000
mean         19.220463
std           4.651075
min           1.000000
25%          19.000000
50%          20.000000
75%          22.000000
max          23.000000
dtype: float64


10.0

剔除月内交易日小于10的数据

In [11]:
counts = counts.reset_index()
counts.rename(columns={0:'days'}, inplace=True)
counts

Unnamed: 0,stkcd,month,days
0,000001.SZ,202109,20
1,000001.SZ,202110,16
2,000001.SZ,202111,22
3,000001.SZ,202112,23
4,000001.SZ,202201,19
...,...,...,...
124324,873593.BJ,202305,20
124325,873593.BJ,202306,20
124326,873593.BJ,202307,21
124327,873593.BJ,202308,23


In [12]:
close_df = pd.merge(close_df, counts, on=['stkcd', 'month'])
close_df = close_df[close_df['days'] > 10]
close_df.head()

Unnamed: 0,close,stkcd,date,volume,month,days
0,2001.16,000001.SZ,20210901,2316894.09,202109,20
1,2059.36,000001.SZ,20210902,2422603.54,202109,20
2,2019.07,000001.SZ,20210903,1394818.71,202109,20
3,2064.96,000001.SZ,20210906,1515225.56,202109,20
4,2153.38,000001.SZ,20210907,1622344.16,202109,20


转换成矩阵

In [14]:
close_df = close_df [['date', 'stkcd', 'close']]
close_df = close_df.pivot(index='date', columns='stkcd', values='close')
close_df.head()

stkcd,000001.SZ,000002.SZ,000004.SZ,000005.SZ,000006.SZ,000007.SZ,000008.SZ,000009.SZ,000010.SZ,000011.SZ,...,873122.BJ,873152.BJ,873167.BJ,873169.BJ,873223.BJ,873305.BJ,873339.BJ,873527.BJ,873576.BJ,873593.BJ
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
20210901,2001.16,3388.19,75.51,21.87,174.99,33.96,51.31,224.84,39.97,47.67,...,,21.85,,,,,,,11.36,
20210902,2059.36,3450.03,75.79,22.06,176.89,33.88,51.09,225.73,40.51,47.87,...,,22.06,,,,,,,,
20210903,2019.07,3406.09,75.67,21.69,177.27,33.96,51.31,203.15,40.62,48.45,...,,22.09,,,,,,10.32,12.38,
20210906,2064.96,3344.25,76.77,20.94,178.4,33.63,51.31,193.37,40.94,48.8,...,,32.77,,,,,,8.63,14.45,
20210907,2153.38,3350.76,77.9,21.22,179.92,33.63,51.76,208.57,41.16,49.45,...,,34.78,,,,,,,,


In [15]:
close_df.isna().any()

stkcd
000001.SZ    False
000002.SZ     True
000004.SZ     True
000005.SZ     True
000006.SZ    False
             ...  
873305.BJ     True
873339.BJ     True
873527.BJ     True
873576.BJ     True
873593.BJ     True
Length: 5346, dtype: bool

经检查，某些股票存在数据缺失  
可能原因是：停牌、未上市  

In [36]:
def calcu_monthly_return(close_df: pd.DataFrame, method: str):
    """
    依据日度收盘价，计算月度回报率
    对于缺失值存在两种处理
    1. method = drop
        剔除出现过缺失值的股票的所有数据
    2. method = fill
        用该月分第一个非nan值作为月初收盘价，最后一个非nan值作为月末收盘价
    """
    data = close_df.copy()
    data = data.iloc[:-1]  # 2023年9月1日为最后一行，该月仅有这一交易日，剔除
    data['month'] = data.index.to_series().apply(lambda x: x[:6])
    
    if method == 'drop':
        data = data.dropna(axis=1)
        groups = data.groupby('month')
        monthly_return = groups.apply(lambda column: column.iloc[-1]/column.iloc[0] - 1)
        return monthly_return
    elif method == 'fill':
        groups = data.groupby('month')
        def _calcu_return(column):
            if len(column[column.notna()]) < 10:
                return np.nan
            fir_idx = column.first_valid_index()
            last_idx = column.last_valid_index()
            return  column.loc[last_idx]/column.loc[fir_idx] - 1
        monthly_return = groups.apply(lambda column: column.apply(_calcu_return))
        return monthly_return
    else:
        raise ValueError("menthod参数仅能为 'drop' 或 'fill'")
        

In [38]:
m_return1 = calcu_monthly_return(close_df, 'drop')
m_return1

stkcd,000001.SZ,000006.SZ,000011.SZ,000012.SZ,000014.SZ,000019.SZ,000021.SZ,000025.SZ,000026.SZ,000027.SZ,...,688800.SH,688819.SH,830832.BJ,831726.BJ,832000.BJ,832566.BJ,832885.BJ,834599.BJ,835640.BJ,839167.BJ
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
202109,0.002798,-0.028116,-0.017831,-0.099824,-0.016684,0.007157,-0.127329,-0.049879,-0.073337,0.194413,...,-0.253098,-0.060747,0.356935,0.632856,0.655473,0.460606,0.369373,0.217815,0.071278,0.077238
202110,0.046136,-0.08952,-0.107554,-0.111983,-0.055308,-0.090968,0.005467,-0.050049,-0.080963,-0.159425,...,0.172786,-0.070369,-0.016511,-0.01291,-0.169349,-0.06075,-0.276684,-0.017544,-0.134555,-0.127031
202111,-0.100568,-0.014406,-0.01117,0.085173,0.034474,0.007647,0.048883,0.037132,0.060326,0.008757,...,0.472192,0.124056,-0.049067,-0.000329,-0.072653,-0.019868,0.183566,0.044579,0.058734,0.124797
202112,-0.06576,0.072095,0.091525,0.006067,0.099949,0.031207,0.0141,-0.002676,-0.004115,-0.002413,...,0.067689,-0.099896,-0.023844,0.181519,0.0891,0.063193,-0.106336,0.031375,-0.048317,-0.080161
202201,-0.049822,-0.053033,-0.075274,-0.10305,-0.006074,-0.062031,-0.176061,-0.05,-0.124663,-0.145461,...,-0.157088,-0.128045,0.020255,-0.163288,-0.117264,-0.105556,-0.068552,-0.0695,-0.108402,-0.130927
202202,-0.039048,-0.011366,-0.024254,-0.071532,0.057303,0.129915,0.008151,-0.021761,-0.020683,0.013988,...,0.026004,-0.045134,-0.119048,-0.061502,-0.134668,-0.058038,-0.101937,-0.112472,-0.051852,-0.102713
202203,-0.033921,0.175829,0.188849,-0.129073,0.393494,0.093583,-0.1662,0.031362,-0.116425,-0.108119,...,0.024824,-0.126836,-0.045492,-0.159799,-0.115789,-0.070652,-0.14507,-0.112449,-0.144623,-0.168576
202204,-0.027304,-0.195567,-0.160682,-0.239496,-0.123511,-0.127051,-0.136372,-0.249567,-0.084269,-0.097357,...,-0.107257,-0.205674,-0.314286,-0.122044,-0.186207,0.001719,-0.16935,0.008424,-0.103673,-0.404305
202205,-0.075719,0.032692,0.087658,0.076102,0.060788,0.22956,0.114858,0.747331,0.17312,0.060365,...,-0.012239,0.166545,0.041667,0.176617,0.009986,0.020529,0.058091,0.033952,0.069479,0.411909
202206,0.063921,0.066585,0.025923,0.238851,-0.064597,-0.093956,0.062707,0.087022,0.011194,0.04309,...,0.186031,0.082171,0.062954,0.009322,0.021362,-0.012645,0.117573,-0.033749,0.002542,0.181618


In [39]:
m_return2 = calcu_monthly_return(close_df, 'fill')
m_return2

stkcd,000001.SZ,000002.SZ,000004.SZ,000005.SZ,000006.SZ,000007.SZ,000008.SZ,000009.SZ,000010.SZ,000011.SZ,...,873122.BJ,873152.BJ,873167.BJ,873169.BJ,873223.BJ,873305.BJ,873339.BJ,873527.BJ,873576.BJ,873593.BJ
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
202109,0.002798,0.023535,0.021454,-0.106081,-0.028116,0.031802,-0.017346,-0.241594,-0.008006,-0.017831,...,,0.989474,,,,,,-0.012597,0.362676,
202110,0.046136,-0.142654,-0.123684,0.009448,-0.08952,0.009179,-0.080211,0.181968,0.034105,-0.107554,...,,,,,,,,-0.107814,,
202111,-0.100568,0.003851,0.117475,0.013986,-0.014406,0.280577,0.0,-0.135137,-0.042254,-0.01117,...,,0.917078,,,,,,-0.00884,,
202112,-0.06576,0.069843,-0.003535,0.036461,0.072095,-0.001583,0.342572,-0.170235,-0.040624,0.091525,...,,-0.248448,,,,,0.103738,-0.046615,,
202201,-0.049822,-0.007809,0.043944,-0.093278,-0.053033,0.022476,-0.135552,-0.085,-0.12285,-0.075274,...,,0.046703,,,,,-0.036437,0.105137,,
202202,-0.039048,-0.081779,0.002401,0.023081,-0.011366,-0.031022,0.083748,0.033888,0.054989,-0.024254,...,,-0.160728,,,,,,0.162248,,
202203,-0.033921,-0.011868,-0.075699,-0.013195,0.175829,-0.003232,-0.082971,-0.202421,-0.073392,0.188849,...,,-0.443145,,,,,0.076274,-0.138336,,
202204,-0.027304,-0.061957,-0.3569,-0.173141,-0.195567,0.047295,-0.125481,-0.206308,-0.169102,-0.160682,...,,-0.117007,,-0.151639,,,0.001945,-0.056085,,
202205,-0.075719,-0.083378,-0.191516,0.05036,0.032692,0.125532,0.247828,0.28168,0.25146,0.087658,...,,0.05061,,-0.033742,,,,-0.067027,,
202206,0.063921,0.146531,-0.076747,-0.026544,0.066585,-0.025545,-0.06368,0.079187,0.129673,0.025923,...,,0.024708,,0.021705,-0.115556,,,0.017045,,


# TSMOM

参考：  
https://zhuanlan.zhihu.com/p/60802784
https://zhuanlan.zhihu.com/p/120835716

# FF3MOM
参考：
https://zhuanlan.zhihu.com/p/95259084  
Fama E F, French K R. The cross‐section of expected stock returns[J]. the Journal of Finance, 1992, 47(2): 427-465.  
Blitz, D., J. Huij, and M. Martens (2011). Residual momentum. Journal of Empirical Finance, Vol. 18(3), 506 – 521.