In [7]:
# Please install OpenAI SDK first: `pip3 install openai`
from openai import OpenAI
from machine_lib import * 
import pandas as pd
import json

my_api_key = get_deepseek_api()
client = OpenAI(api_key=my_api_key, base_url="https://api.deepseek.com")

#登陆worldquant，后续用于回测
s = login()


In [8]:
# 获取数据字段
df_datafields = get_datafields(s, dataset_id = 'pv1', region='USA', universe='TOP3000', delay=1)

# 转换数据字段为文本
len_datafield, text_datafields = convert_datafields_to_text(df_datafields)

# 读取operators.xlsx文件
df_operators = pd.read_excel('operators.xlsx')

# 转换为文本
operators = convert_operator_to_text(df_operators)
# print(operators)

In [9]:
# 第一轮的系统提示词
system_prompt_round1 = """
你是一个金融量化领域专家，你首先需要完成以下工作：
1. 理解发送给你的operators的含义及使用方法；
2. 理解发送给你的金融数据的字段含义、描述、使用方法；
3. 根据operators和金融数据，构造alpha表达式。
"""

# 第一轮的用户提示词
user_prompt_round1 = """
你需要根据以下数据, 构造1个alpha表达式:
operators: {operators}
financial_data: {text_datafields}
注意：
1、alpha表达式中的操作符和数据字段必须是operators和text_datafields中的内容,并保持一致。
2、构造完alpha表达式后, 你需要把第一步中用到的数据字段进行增强, 增强方法是: winsorize(ts_backfill(field, 120), std=4)
3、这是一个alpha表达式的例子: "ts_mean(winsorize(ts_backfill(close, 120), std=4), 5) / ts_mean(winsorize(ts_backfill(volume, 120), std=4), 20)"
4、返回alpha结果时, 只返回alpha表达式, 不要返回其他内容。
"""

def user_prompt_roundn(alpha_list, test_result):
    system_prompt = f"""
    一、上一轮的alpha表达式为{alpha_list}, 测试结果为{test_result}, 请根据上一轮的测试结果, 优化alpha表达式。
    二、最终的要求如下:
        1、alpha表达式中的操作符和数据字段必须在operators:和financial_data中;
        2、最终要求: sharpe>1.5, fitness>1.25, max_drawdown<0.1, turnover<0.1;
        3、返回结果只需包含alpha表达式, 不要返回其他内容。
    """
    return system_prompt

In [10]:
# 三、可学习下面的的alpha构造形式:
#         1、trade_when(group_rank(ts_std_dev(returns,60), sector) > 0.7, trade_when(ts_regression(returns, group_neutralize(group_zscore(trade_when(ts_corr(close, volume, 20) > 0, group_rank(-ts_std_dev(winsorize(ts_backfill(vec_avg(fnd6_newqeventv110_optfvgrq), 120), std=4), 22),densify(sector)), abs(returns) > 0.1),densify(bucket(group_rank(cap, sector),range='0.1, 1, 0.1'))),densify(bucket(rank(cap), range='0.1, 1, 0.1'))), 20, lag = 0, rettype = 2) > 0, group_neutralize(group_zscore(trade_when(ts_corr(close, volume, 20) > 0, group_rank(-ts_std_dev(winsorize(ts_backfill(vec_avg(fnd6_newqeventv110_optfvgrq), 120), std=4), 22),densify(sector)), abs(returns) > 0.1),densify(bucket(group_rank(cap, sector),range='0.1, 1, 0.1'))),densify(bucket(rank(cap), range='0.1, 1, 0.1'))), -1), abs(returns) > 0.1);
#         2、trade_when(ts_arg_max(close, 5) == 0, trade_when(ts_regression(returns, group_neutralize(group_zscore(trade_when(ts_corr(close, volume, 20) > 0, group_rank(-ts_std_dev(winsorize(ts_backfill(vec_avg(fnd6_newqeventv110_optfvgrq), 120), std=4), 22),densify(sector)), abs(returns) > 0.1),densify(bucket(group_rank(cap, sector),range='0.1, 1, 0.1'))),densify(bucket(rank(cap), range='0.1, 1, 0.1'))), 20, lag = 0, rettype = 2) > 0, group_neutralize(group_zscore(trade_when(ts_corr(close, volume, 20) > 0, group_rank(-ts_std_dev(winsorize(ts_backfill(vec_avg(fnd6_newqeventv110_optfvgrq), 120), std=4), 22),densify(sector)), abs(returns) > 0.1),densify(bucket(group_rank(cap, sector),range='0.1, 1, 0.1'))),densify(bucket(rank(cap), range='0.1, 1, 0.1'))), -1), -1)
#         3、group_neutralize(ts_sum(winsorize(ts_backfill(vec_sum(nws12_prez_short_interest), 120), std=4), 240),densify(sector))
#         4、trade_when(ts_regression(returns, group_neutralize(group_zscore(group_neutralize(trade_when(ts_arg_max(close, 20) == 0, group_rank(-ts_std_dev(winsorize(ts_backfill(vec_avg(fnd6_newqeventv110_optfvgrq), 120), std=4), 22),densify(sector)), -1),densify(market)),densify(market)),densify(market)), 20, lag = 0, rettype = 2) > 0, group_neutralize(group_zscore(group_neutralize(trade_when(ts_arg_max(close, 20) == 0, group_rank(-ts_std_dev(winsorize(ts_backfill(vec_avg(fnd6_newqeventv110_optfvgrq), 120), std=4), 22),densify(sector)), -1),densify(market)),densify(market)),densify(market)), abs(returns) > 0.1)
#         5、trade_when(ts_regression(returns, group_neutralize(group_zscore(trade_when(ts_corr(close, volume, 20) > 0, group_rank(-ts_std_dev(winsorize(ts_backfill(vec_avg(fnd6_newqeventv110_optfvgrq), 120), std=4), 22),densify(sector)), abs(returns) > 0.1),densify(bucket(rank(cap), range='0.1, 1, 0.1'))),densify(industry)), 20, lag = 0, rettype = 2) > 0, group_neutralize(group_zscore(trade_when(ts_corr(close, volume, 20) > 0, group_rank(-ts_std_dev(winsorize(ts_backfill(vec_avg(fnd6_newqeventv110_optfvgrq), 120), std=4), 22),densify(sector)), abs(returns) > 0.1),densify(bucket(rank(cap), range='0.1, 1, 0.1'))),densify(industry)), -1)
#         6、trade_when(ts_corr(close, volume, 5) < 0, group_zscore(-ts_std_dev(winsorize(ts_backfill(vec_avg(fnd6_eventv110_optlifeq), 120), std=4), 66),densify(bucket(group_rank(assets, sector),range='0.1, 1, 0.1'))), -1)

In [11]:
def generate_alpha(system_prompt, user_prompt, client):

    response = client.chat.completions.create(
        model="deepseek-chat",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ],
        stream=False
    )
    # 从response中提取alpha表达式
    alpha_list = []
    alpha = json.loads(response.choices[0].message.content)
    alpha_list.append(alpha)

    return alpha_list

def generate_test_result(alpha_list):
    fo_alpha_list = create_alpha_list(alpha_list)
    # 发送到worldquant 进行回测
    fo_pools = load_task_pool_single(fo_alpha_list, 3)
    single_simulate(fo_pools, "SUBINDUSTRY", "USA", "TOP3000", 0)
    # 找到最后一条记录，并提取该alpha的回测指标，结果结构：[alpha_id, exp, sharpe, turnover, fitness, returns, drawdown,margin, dateCreated, decay]   
    th_tracker = get_alphas("05-30", "06-30", -20, -20, "USA", 200, "submit")
    sorted_th_tracker = sorted(th_tracker, key=lambda x: x[-2])
    array = sorted_th_tracker[-1]
    # 转换为字典，方便查找回测结果
    keys = ['alpha_id', 'exp', 'sharpe', 'turnover', 'fitness', 'returns', 'drawdown','margin','dateCreated', 'decay']
    analysis_result = dict(zip(keys, array))
    # 我们需要的回测指标
    keys_need = ['sharpe', 'turnover', 'fitness','returns', 'drawdown']
    analysis_result_need = {key: analysis_result[key] for key in keys_need}
    return analysis_result_need

In [12]:
# 记录历史
history_messages = []
round_num = 0
alpha_list = []
analysis_result = {}

while True:        
    # 记录轮次，如果是第一轮，则需要发送system_prompt，并先构造alpha表达式
    if round_num == 0:
        alpha_list = generate_alpha(system_prompt_round1, user_prompt_round1, client)
        analysis_result = generate_test_result(alpha_list)
        print(f"这是第{round_num}轮，alpha表达式为{alpha_list}, 测试结果为{analysis_result}")
    
    else:
        # 从第二轮开始是优化alpha表达式
        user_prompt = user_prompt_roundn(alpha_list,analysis_result)
        # print(user_prompt)
        response = client.chat.completions.create(
            model="deepseek-chat",
            messages=[
                # *history_messages,
                {"role": "user", "content": user_prompt}
            ],
            stream=False
        )
        alpha_list = response.choices[0].message.content
        # print(alpha_list)
        alpha_list = alpha_list.split("'")[1]
        alpha_list = [alpha_list]
        
        # 获取回测结果
        analysis_result = generate_test_result(alpha_list)
        print(f"这是第{round_num}轮，alpha表达式为{alpha_list}, 测试结果为{analysis_result}")
        
        # 更新历史记录（保留最近3轮）
        history_messages.extend([
            {"role": "user", "content": user_prompt},
            {"role": "assistant", "content": response}
        ])
        history_messages = history_messages[-6:]  # 控制上下文长度
        
    round_num += 1
    if round_num == 10:
        break
    

这是第0轮，alpha表达式为['ts_corr(winsorize(ts_backfill(close, 120), std=4), winsorize(ts_backfill(volume, 120), std=4), 10)'], 测试结果为{'sharpe': -0.04, 'turnover': 0.5773, 'fitness': -0.0, 'returns': -0.0026, 'drawdown': 0.1968}
这是第1轮，alpha表达式为['ts_corr(winsorize(ts_backfill(close, 60), std=2), winsorize(ts_backfill(volume, 60), std=2), 20)'], 测试结果为{'sharpe': -1.18, 'turnover': 0.1827, 'fitness': -0.68, 'returns': -0.0607, 'drawdown': 0.3392}
这是第2轮，alpha表达式为['ts_corr(ts_mean(close, 10), ts_mean(volume, 10), 30)'], 测试结果为{'sharpe': -0.67, 'turnover': 0.1211, 'fitness': -0.32, 'returns': -0.0281, 'drawdown': 0.1979}
这是第3轮，alpha表达式为['ts_corr(ts_rank(close, 15), ts_rank(volume, 15), 5)'], 测试结果为{'sharpe': -1.46, 'turnover': 0.406, 'fitness': -0.56, 'returns': -0.0604, 'drawdown': 0.3053}
这是第4轮，alpha表达式为['ts_corr(ts_rank(close, 10), ts_rank(volume, 10), 3)'], 测试结果为{'sharpe': -1.23, 'turnover': 0.6745, 'fitness': -0.32, 'returns': -0.0457, 'drawdown': 0.2531}
这是第5轮，alpha表达式为['ts_corr(ts_rank(close, 5), 