# 因子回测

## 导入模块

In [65]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from scipy import stats
import feather
import os

## 读入日线数据

### 读入日线数据

In [66]:
price_1d = feather.read_dataframe('../data/StockPriceK1d_20240630.feather')

### 计算调仓日

In [67]:
trade_date = price_1d['date'].sort_values().unique()
mes = pd.date_range(start='2019-08-01', end=trade_date[-1]+pd.DateOffset(months=1), freq='1ME')
adj_date = np.array([], dtype=np.datetime64)
for me in mes:
    trade_date_before = trade_date[trade_date <= me]
    ad = trade_date_before[-1]
    adj_date = np.append(adj_date, ad)
map_mon_adj = {ad.year * 100 + ad.month: ad for ad in adj_date}

### 剔除股票

In [68]:
# def IPO_time(price_1d, timedelta='180D'):
#     start_date = price_1d['date'].min()
#     idx_ipo = price_1d['date'] >= start_date + pd.Timedelta(timedelta)
#     return price_1d[idx_ipo]

# price_1d = (
#     price_1d
#         .groupby('issue')[['issue', 'date', 'preclose', 'close', 'ret', 'is_limit_buy', 'is_limit_sell']]
#         .apply(IPO_time)
#         .reset_index(drop=True)
# )
# price_1d = price_1d[(~price_1d['is_limit_buy'].astype(bool)) & (~price_1d['is_limit_sell'].astype(bool))]
price_1d = price_1d[(price_1d['date'] >= '2019-08-01') & (price_1d['date'] <= '2024-12-31')]
# price_1d = price_1d[['issue', 'date', 'preclose', 'close', 'ret']]

### 计算每两个调仓日之间的收益

In [73]:
price_1d['year_mon'] = price_1d['date'].dt.year * 100 + price_1d['date'].dt.month

def ret_acc_temp(ret):
    return (1 + ret).prod() - 1

def ret_acc(price_1d):
    preclose = price_1d.iloc[0]['preclose']
    close = price_1d.iloc[-1]['close']
    return (close - preclose) / preclose

price_adj = (
    price_1d
        .groupby(['issue', 'year_mon'])[['preclose', 'close']]
        .apply(ret_acc)
        .reset_index()
)
price_adj = price_adj.rename(columns={0: 'ret'})
price_adj['date'] = price_adj['year_mon'].apply(map_mon_adj.get)
price_adj['ret_next'] = price_adj.groupby('issue')['ret'].shift(-1)
price_adj = price_adj[['issue', 'date', 'ret_next']]
price_adj = price_adj.dropna(subset='ret_next')
price_adj

Unnamed: 0,issue,date,ret_next
0,000001,2019-08-30,0.100989
1,000001,2019-09-30,0.042976
2,000001,2019-10-31,-0.059656
3,000001,2019-11-29,0.075867
4,000001,2019-12-31,-0.055319
...,...,...,...
266533,689009,2024-01-31,0.298201
266534,689009,2024-02-29,-0.011551
266535,689009,2024-03-29,0.242404
266536,689009,2024-04-30,0.008062


## 市值行业中心化

### 读入申万行业

### 读入市值

## IC 测试

In [28]:
def IC_calc_once(factor, factor_col, ret_col):
    IC, p = stats.spearmanr(factor[factor_col], factor[ret_col])
    return IC

def IC_calc(factor, factor_col):
    f = factor.copy()
    f = pd.merge(
        f.sort_values(['issue', 'date']),
        price_adj.sort_values(['issue', 'date']),
        on=['issue', 'date'],
        how='inner'
    )  
    IC = (
        f
            .groupby('date')[[factor_col, 'ret_next']]
            .apply(IC_calc_once, factor_col='N_connect', ret_col='ret_next')
    )
    return IC

## 分组测试

## 点度中心性回测

### 读入数据

In [29]:
N_connect_num = feather.read_dataframe('../data/N_connect_bak/N_connect_num.feather')
N_connect_size = feather.read_dataframe('../data/N_connect_bak/N_connect_size.feather')

### IC 测试

In [35]:
df_IC = pd.DataFrame(columns=['factor', 'IC_mean', 'IC_IR', 't', "max", 'min'])
IC_num = IC_calc(N_connect_num, factor_col='N_connect')
df_IC.loc[0]=[
        'N_connect_num',
        IC_num.mean(),
        IC_num.mean() / IC_num.std(),
        np.sqrt(len(IC_num)) * IC_num.mean() / IC_num.std(),
        IC_num.max(),
        IC_num.min()
    ]
IC_size = IC_calc(N_connect_size, factor_col='N_connect')
df_IC.loc[1]=[
        'N_connect_size',
        IC_size.mean(),
        IC_size.mean() / IC_size.std(),
        np.sqrt(len(IC_size)) * IC_size.mean() / IC_size.std(),
        IC_size.max(),
        IC_size.min()
    ]
df_IC

Unnamed: 0,factor,IC_mean,IC_IR,t,max,min
0,N_connect_num,0.021078,0.290565,2.21288,0.134607,-0.172123
1,N_connect_size,0.029021,0.351431,2.676418,0.179029,-0.207791


### 分组测试