# 因子回测 - Gitlab

## 导入模块

In [3]:
import matplotlib
import pandas as pd
import numpy as np
import sunlandsdatasdk as sd
import feather
from datetime import datetime, timedelta
import sys

sys.path.append('../../public-Git/')
from alphalens_custom.tears import sd_get_Analysis_results
from alphalens_custom.utils import get_clean_factor_and_forward_returns

import warnings
warnings.filterwarnings("ignore")

## 数据准备, 行情 / 因子值

### 读入行情

In [4]:
prices = feather.read_dataframe('../data/StockPriceK1d_20240630.feather')
prices = prices[prices['date'] >= '2019-01-01']
prices['close_adj'] = prices['close'] * prices['adj']
close = prices.pivot(index='date', columns='issue', values='close_adj')

In [5]:
close

issue,000001,000002,000004,000005,000006,000007,000008,000009,000010,000011,...,688787,688788,688789,688793,688798,688799,688800,688819,688981,689009
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-01-02,992.808454,3409.765172,65.265624,24.744500,176.264678,67.262247,86.425781,31.664860,35.340521,32.608339,...,,,,,,,,,,
2019-01-03,1002.531279,3434.018732,66.037757,24.837176,176.264678,64.860024,85.534793,31.223025,35.340521,32.852989,...,,,,,,,,,,
2019-01-04,1053.306032,3556.713211,67.460109,25.485908,180.029554,66.268224,87.094021,32.180334,35.771503,33.412189,...,,,,,,,,,,
2019-01-07,1052.225718,3573.833371,69.979704,26.319993,182.767646,66.765236,90.657971,32.769448,36.633467,34.285939,...,,,,,,,,,,
2019-01-08,1043.583207,3566.699971,68.313520,26.227316,182.083123,65.025695,92.217199,33.284922,36.310230,34.041289,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-06-24,1247.992367,1270.109471,38.891159,,151.408162,38.187064,43.023923,77.172065,20.902625,33.786478,...,74.576040,33.280618,160.773136,41.754995,80.695248,45.151187,39.882813,26.061313,47.26,39.083379
2024-06-25,1260.497300,1291.913926,38.362857,,150.613368,38.932582,43.023923,77.707982,18.855461,33.786478,...,72.287912,33.266328,155.230246,41.740797,79.542057,44.921037,38.364144,25.985960,45.54,39.546619
2024-06-26,1260.497300,1306.450229,39.094352,,152.202955,39.098252,43.472088,78.958456,17.454769,34.007016,...,83.601436,33.937942,154.746343,42.138328,82.031262,45.370875,39.627336,26.448841,46.47,39.274717
2024-06-27,1266.749767,1271.926509,42.995660,,148.228988,39.098252,43.023923,77.707982,16.808296,33.874693,...,78.968682,33.009115,153.573244,41.527834,79.893639,44.084129,38.562848,25.673784,45.71,37.431826


### 读入因子文件

In [48]:
factor = feather.read_dataframe('../data/ret_jump.feather')
factor = factor.pivot(index='date', columns='issue', values='ret_jump')
factor = factor.reindex(index=close.index, columns=close.columns)

In [49]:
factor

issue,000001,000002,000004,000005,000006,000007,000008,000009,000010,000011,...,688787,688788,688789,688793,688798,688799,688800,688819,688981,689009
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-01-02,,,,,,,,,,,...,,,,,,,,,,
2019-01-03,,,,,,,,,,,...,,,,,,,,,,
2019-01-04,,,,,,,,,,,...,,,,,,,,,,
2019-01-07,,,,,,,,,,,...,,,,,,,,,,
2019-01-08,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-06-24,,,,,,,,,,,...,,,,,,,,,,
2024-06-25,,,,,,,,,,,...,,,,,,,,,,
2024-06-26,,,,,,,,,,,...,,,,,,,,,,
2024-06-27,,,,,,,,,,,...,,,,,,,,,,


## 因子分析前置处理

### 上市日期限制标志

In [24]:
def filter_stock_byDate(close, filterdays_pre, filterdays_end):
    """
    按上市和退市时间标记可用时间为1，不可用为nan
    Parameters
    ----------
    close : 带有date和issue的时间序列.
    filterdays_pre : 上市filterdays_pre之后置标记
    filterdays_end : 退市filterdays_pre之前置标记

    Returns
    -------
    date,issue,filterFlag

    """
    close = close.astype('float64')
    close[close < 0.01] = np.nan
    filterFlag_pre = close.shift(filterdays_pre)
    filterFlag_pre.iloc[:filterdays_pre] = close.iloc[0]
    filterFlag_end = close.shift(-filterdays_end)
    filterFlag_end.iloc[-filterdays_end:] = close.iloc[-1]

    filterFlag = close*np.nan
    filterFlag[close.notna()] = 1
    filterFlag[filterFlag_pre.isna()] = np.nan
    filterFlag[filterFlag_end.isna()] = np.nan

    return filterFlag

In [51]:
filterFlag = filter_stock_byDate(close, 90, 90)
factor = factor * filterFlag

### 去除涨跌停股票

In [52]:
is_limit_buy = prices.pivot(index='date', columns='issue', values='is_limit_buy')
is_limit_sell = prices.pivot(index='date', columns='issue', values='is_limit_sell')
factor[is_limit_buy == 1] = np.nan
factor[is_limit_sell == 1] = np.nan

### 调整因子矩阵

In [56]:
factor = pd.DataFrame(factor.stack().rename('factor'))

In [57]:
factor

Unnamed: 0_level_0,Unnamed: 1_level_0,factor
date,issue,Unnamed: 2_level_1
2019-01-31,000001,0.066792
2019-01-31,000002,0.074818
2019-01-31,000004,0.028603
2019-01-31,000005,0.282652
2019-01-31,000006,0.059905
...,...,...
2024-06-28,688799,0.045917
2024-06-28,688800,0.003513
2024-06-28,688819,-0.039342
2024-06-28,688981,0.113162


## 因子分析模块