In [None]:
import os
from os import path
import pandas as pd
import sys
basepath = path.join(os.getcwd(), "export_cn_ex")

In [2]:
file_names = os.listdir(basepath)
print(file_names)

['SZ000511.csv', 'SH600312.csv', 'SH601319.csv', 'SZ300769.csv', 'SH600566.csv', 'SZ300207.csv', 'SH600581.csv', 'SH601788.csv', 'SH600887.csv', 'SZ000800.csv', 'SH600601.csv', 'SZ000627.csv', 'SZ002371.csv', 'SH601360.csv', 'SH600161.csv', 'SZ002183.csv', 'SZ002602.csv', 'SH600535.csv', 'SH600198.csv', 'SH601916.csv', 'SH601666.csv', 'SZ000060.csv', 'SZ002120.csv', 'SZ000962.csv', 'SH603392.csv', 'SH600027.csv', 'SZ000027.csv', 'SH603260.csv', 'SH600760.csv', 'SH600035.csv', 'SZ000066.csv', 'SZ300919.csv', 'SZ000961.csv', 'SH601991.csv', 'SH600221.csv', 'SZ002230.csv', 'SZ000758.csv', 'SZ000937.csv', 'SH601555.csv', 'SH600121.csv', 'SH600276.csv', 'SH600895.csv', 'SH601236.csv', 'SH603185.csv', 'SH601699.csv', 'SH600961.csv', 'SZ002146.csv', 'SH600118.csv', 'SZ002110.csv', 'SZ000686.csv', 'SH601162.csv', 'SH601607.csv', 'SZ002916.csv', 'SH600616.csv', 'SZ300002.csv', 'SH600282.csv', 'SH601933.csv', 'SH601558.csv', 'SZ000703.csv', 'SH601728.csv', 'SZ002292.csv', 'SZ002174.csv', 'SH6005

In [3]:
import os
import pandas as pd


# 读取csi300.txt中的股票代码
with open("csi300.txt", "r") as f:
    stock_codes = [line.strip() for line in f.readlines()]

dfs = []
for stock_code in stock_codes:
    fname = f"{stock_code[:8]}.csv"
    fpath = os.path.join(basepath, fname)
    if os.path.exists(fpath):
        df = pd.read_csv(fpath, parse_dates=['date'])
        df['stock_code'] = stock_code[:8]
        dfs.append(df)

all_df = pd.concat(dfs, ignore_index=True)
all_df.set_index(['stock_code', 'date'], inplace=True)
all_df.sort_index(inplace=True)

print(all_df.head())

                            open       high        low      close   preclose  \
stock_code date                                                                
SH600000   2015-05-04  67.162262  67.162262  65.669767  66.453327  67.423448   
           2015-05-05  66.154828  66.229452  62.871339  63.729524  66.453327   
           2015-05-06  63.729524  65.333956  63.319088  63.654899  63.729524   
           2015-05-07  63.542962  64.475771  63.356400  63.692211  63.654899   
           2015-05-08  64.139960  64.550396  62.759402  63.841461  63.692211   

                             volume        amount      turn  tradestatus  \
stock_code date                                                            
SH600000   2015-05-04  6.916328e+07  4.597699e+09  1.729333          1.0   
           2015-05-05  1.006064e+08  6.483576e+09  2.515527          1.0   
           2015-05-06  8.136923e+07  5.225100e+09  2.034527          1.0   
           2015-05-07  5.745835e+07  3.674226e+09  1.436668

In [4]:
all_df.to_csv("all_stock_data.csv")

In [None]:
all_df_loaded = pd.read_csv("all_stock_data.csv", parse_dates=['date'])
all_df_loaded.set_index(['stock_code', 'date'], inplace=True)
all_df_loaded.sort_index(inplace=True)

In [7]:
import pandas as pd
import numpy as np
from scipy.stats import spearmanr


# 读取合并后的数据
all_df_loaded = pd.read_csv("all_stock_data.csv", parse_dates=['date'])
all_df_loaded.set_index(['stock_code', 'date'], inplace=True)
all_df_loaded.sort_index(inplace=True)


# 定义你的因子函数，输入为单个股票的DataFrame
def factor_func(df, lookback_period_vwma=20, lookback_period_momentum=10, market_regime='Neutral'):
    close = df['close']
    volume = df['volume']

    # VWMA Calculation
    vwma = (close * volume).rolling(window=lookback_period_vwma, min_periods=lookback_period_vwma//2).sum() / \
           volume.rolling(window=lookback_period_vwma, min_periods=lookback_period_vwma//2).sum()

    # VWMA Smoothed
    vwma_smoothed = vwma.ewm(span=lookback_period_vwma, adjust=False).mean()

    # Price Momentum
    returns = close.pct_change(lookback_period_momentum)
    momentum_smoothed = returns.ewm(span=lookback_period_momentum, adjust=False).mean()

    # Rolling Normalization Parameters
    rolling_window = 252
    minp = 60

    # Normalize VWMA and Momentum (no lookahead)
    vwma_mean = vwma_smoothed.rolling(rolling_window, min_periods=minp).mean().shift(1)
    vwma_std = vwma_smoothed.rolling(rolling_window, min_periods=minp).std().shift(1)
    vwma_normalized = (vwma_smoothed - vwma_mean) / vwma_std

    momentum_mean = momentum_smoothed.rolling(rolling_window, min_periods=minp).mean().shift(1)
    momentum_std = momentum_smoothed.rolling(rolling_window, min_periods=minp).std().shift(1)
    momentum_normalized = (momentum_smoothed - momentum_mean) / momentum_std

    # Combine Factors Based on Market Regime
    if market_regime.lower() == 'bullish':
        factor = 0.7 * vwma_normalized + 0.3 * momentum_normalized
    elif market_regime.lower() == 'bearish':
        factor = 0.3 * vwma_normalized + 0.7 * momentum_normalized
    else:  # Neutral or Sideways
        factor = 0.5 * vwma_normalized + 0.5 * momentum_normalized

    return factor.shift(1)  # Shift to avoid lookahead

# 计算每只股票的因子值
all_df_loaded['factor'] = all_df_loaded.groupby('stock_code').apply(lambda g: factor_func(g)).reset_index(level=0, drop=True)

# 计算未来6日收益率
all_df_loaded['future_return_6d'] = all_df_loaded.groupby('stock_code')['close'].shift(-6) / all_df_loaded['close'] - 1

# 取所有日期
start_date = pd.Timestamp('2021-01-01')
end_date = pd.Timestamp('2024-01-01')
all_dates = all_df_loaded.index.get_level_values('date').unique()
all_dates = all_dates[(all_dates >= start_date) & (all_dates <= end_date)]
ic_values = []

for date in all_dates:
    daily = all_df_loaded.xs(date, level='date')
    factors = daily['factor']
    returns = daily['future_return_6d']
    mask = factors.notna() & returns.notna()
    if mask.sum() >= 10:
        ic, _ = spearmanr(factors[mask], returns[mask])
        if not np.isnan(ic):
            ic_values.append(ic)

mean_ic = np.mean(ic_values)
print(f"Mean IC: {mean_ic:.10f}")

Mean IC: -0.0144855946
