# 因子回测

## 导入模块

In [8]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import feather
import os

## 读入日线数据

### 读入日线数据

In [2]:
price_1d = feather.read_dataframe('../data/StockPriceK1d_20240630.feather')

### 计算调仓日

In [49]:
trade_date = price_1d['date'].sort_values().unique()
mes = pd.date_range(start='2019-08-01', end=trade_date[-1]+pd.DateOffset(months=1), freq='1ME')
adj_date = np.array([], dtype=np.datetime64)
for me in mes:
    trade_date_before = trade_date[trade_date <= me]
    ad = trade_date_before[-1]
    adj_date = np.append(adj_date, ad)
map_mon_adj = {ad.year * 100 + ad.month: ad for ad in adj_date}

### 剔除股票

In [4]:
def IPO_time(price_1d, timedelta='180D'):
    start_date = price_1d['date'].min()
    idx_ipo = price_1d['date'] >= start_date + pd.Timedelta(timedelta)
    return price_1d[idx_ipo]

price_1d = (
    price_1d
        .groupby('issue')[['issue', 'date', 'close', 'ret', 'is_limit_buy', 'is_limit_sell']]
        .apply(IPO_time)
        .reset_index(drop=True)
)
price_1d = price_1d[(~price_1d['is_limit_buy'].astype(bool)) & (~price_1d['is_limit_sell'].astype(bool))]
price_1d = price_1d[(price_1d['date'] >= '2019-08-01') & (price_1d['date'] <= '2024-11-30')]
price_1d = price_1d[['issue', 'date', 'ret']]

### 计算每两个调仓日之间的收益

In [55]:
price_1d['year_mon'] = price_1d['date'].dt.year * 100 + price_1d['date'].dt.month

def ret_acc(ret):
    return (1 + ret).prod() - 1

def ret_acc_2(ret):
    log_ret = (1 + ret).apply(np.log)
    sum_log_ret = log_ret.sum()
    return np.exp(sum_log_ret) - 1

price_adj = price_1d.groupby(['issue', 'year_mon'])['ret'].apply(ret_acc).reset_index()
price_adj['date'] = price_adj['year_mon'].apply(map_mon)

CPU times: total: 28.7 s
Wall time: 31.5 s
CPU times: total: 49.2 s
Wall time: 52.7 s
         issue  year_mon       ret
0       000001    201908  0.002123
1       000001    201909  0.100989
2       000001    201910  0.042976
3       000001    201911 -0.059656
4       000001    201912  0.075867
...        ...       ...       ...
256147  689009    202402  0.298201
256148  689009    202403 -0.011551
256149  689009    202404  0.242404
256150  689009    202405  0.015163
256151  689009    202406 -0.018662

[256152 rows x 3 columns]
         issue  year_mon       ret
0       000001    201908  0.002123
1       000001    201909  0.100989
2       000001    201910  0.042976
3       000001    201911 -0.059656
4       000001    201912  0.075867
...        ...       ...       ...
256147  689009    202402  0.298201
256148  689009    202403 -0.011551
256149  689009    202404  0.242404
256150  689009    202405  0.015163
256151  689009    202406 -0.018662

[256152 rows x 3 columns]


## IC 测试

### 读入测试数据

In [22]:
N_connect_num = pd.DataFrame(columns=['issue', 'date', 'N_connect'])
N_connect_size = pd.DataFrame(columns=['issue', 'date', 'N_connect'])

for ad in adj_date:
    ad_str = ad.strftime('%Y%m%d')
    file_num = f'../data/N_connect/N_connect_num_{ad_str}.feather'
    file_size = f'../data/N_connect/N_connect_size_{ad_str}.feather'
    if (not os.path.exists(file_num)) or (not os.path.exists(file_size)):
        continue
    num_daily = feather.read_dataframe(file_num)
    num_daily['date'] = ad
    size_daily = feather.read_dataframe(file_size)
    size_daily['date'] = ad
    if N_connect_num.empty:
        N_connect_num = num_daily
    else:
        N_connect_num = pd.concat([N_connect_num, num_daily])
    if N_connect_size.empty:
        N_connect_size = size_daily
    else:
        N_connect_size = pd.concat([N_connect_size, size_daily])

In [23]:
factor = N_connect_num.copy()

## 分组测试

## 点度中心性回测