In [4]:
from ginkgo import services
from ginkgo.enums import FREQUENCY_TYPES, ADJUSTMENT_TYPES
import pandas as pd
from scipy.stats import spearmanr

In [2]:
bar_service = services.data.bar_service()

In [3]:
all_data = bar_service.get_bars(start_date="2023-01-01", end_date="2023-12-31", frequency=FREQUENCY_TYPES.DAY,adjustment_type=ADJUSTMENT_TYPES.FORE, as_dataframe=True)

缓存结果


KeyboardInterrupt: 

In [None]:
import pandas as pd
from scipy.stats import spearmanr
import matplotlib.pyplot as plt

# 确保排序
all_data = all_data.sort_values(['code', 'timestamp'])

# 计算 5 日动量因子（如果还没算）
all_data['momentum_5'] = all_data.groupby('code')['close'].pct_change(5)

# 未来5日收益率（如果还没算）
all_data['future_ret_5'] = all_data.groupby('code')['close'].shift(-5) / all_data['close'] - 1

# 计算每日 IC
def calc_daily_ic(df, factor_col):
    ic_list = []
    for ts, group in df.groupby('timestamp'):
        valid = group.dropna(subset=[factor_col, 'future_ret_5'])
        if len(valid) > 5:
            ic = spearmanr(valid[factor_col], valid['future_ret_5'])[0]
            ic_list.append({'timestamp': ts, 'IC': ic})
    ic_df = pd.DataFrame(ic_list).sort_values('timestamp')
    return ic_df

ic_5 = calc_daily_ic(all_data, 'momentum_5')

print("5日动量平均IC:", ic_5['IC'].mean())
print("IC 标准差:", ic_5['IC'].std())

# 画图
plt.figure(figsize=(12,6))
plt.plot(ic_5['timestamp'], ic_5['IC'], label='Momentum 5-day IC')
plt.axhline(0, color='black', linestyle='--', linewidth=1)
plt.title('Daily Rank IC of 5-day Momentum Factor')
plt.xlabel('Date')
plt.ylabel('Spearman Rank IC')
plt.legend()
plt.show()


In [None]:
import pandas as pd
from scipy.stats import spearmanr
import itertools
import matplotlib.pyplot as plt
from tqdm import tqdm  # 需要先安装：pip install tqdm

def find_best_momentum_ic_with_log(df, n_list, m_list):
    df = df.sort_values(['code', 'timestamp']).copy()
    results = []
    ic_time_series = {}

    combos = list(itertools.product(n_list, m_list))
    print(f"Total combinations to process: {len(combos)}")

    for n, m in tqdm(combos, desc="Processing (n,m) combos"):
        factor_col = f'momentum_{n}'
        fut_ret_col = f'future_ret_{m}'

        # 清理旧列，防止重复计算影响内存
        if factor_col in df.columns:
            df.drop(columns=[factor_col], inplace=True)
        if fut_ret_col in df.columns:
            df.drop(columns=[fut_ret_col], inplace=True)

        df[factor_col] = df.groupby('code')['close'].pct_change(n)
        df[fut_ret_col] = df.groupby('code')['close'].shift(-m) / df['close'] - 1

        ic_list = []
        dates = []
        for ts, group in df.groupby('timestamp'):
            valid = group.dropna(subset=[factor_col, fut_ret_col])
            if len(valid) > 5:
                ic = spearmanr(valid[factor_col], valid[fut_ret_col])[0]
                ic_list.append(ic)
                dates.append(ts)

        if len(ic_list) == 0:
            print(f"Skipping (n={n}, m={m}): no valid IC values")
            continue

        mean_ic = pd.Series(ic_list).mean()
        std_ic = pd.Series(ic_list).std()
        score = mean_ic / std_ic if std_ic != 0 else 0

        print(f"(n={n}, m={m}) mean IC: {mean_ic:.4f}, std IC: {std_ic:.4f}, score: {score:.4f}")

        results.append({
            'momentum_n': n,
            'future_m': m,
            'mean_IC': mean_ic,
            'std_IC': std_ic,
            'score': score
        })

        ic_time_series[(n, m)] = pd.Series(ic_list, index=dates)

    res_df = pd.DataFrame(results).sort_values('score', ascending=False).reset_index(drop=True)

    # 画最佳组合的图
    if not res_df.empty:
        best = res_df.iloc[0]
        best_key = (best['momentum_n'], best['future_m'])
        best_ic_series = ic_time_series[best_key]

        plt.figure(figsize=(14,6))
        plt.subplot(1,2,1)
        best_ic_series.plot(title=f'IC Time Series (Momentum {best_key[0]} / Future {best_key[1]})')
        plt.axhline(0, color='black', linestyle='--')
        plt.ylabel('Spearman Rank IC')

        plt.subplot(1,2,2)
        best_ic_series.hist(bins=40)
        plt.title('IC Distribution')
        plt.xlabel('IC Value')
        plt.ylabel('Frequency')

        plt.tight_layout()
        plt.show()

    return res_df


In [None]:
# 使用示例
n_range = [3, 5, 7, 10, 20]
m_range = [3, 5, 7, 10, 14, 30]

best_ic_df = find_best_momentum_ic_with_log(all_data, n_range, m_range)
print(best_ic_df.head(10))