In [1]:
def read_json_file(file_path):
    """
    读取指定路径的JSON文件，并返回解析后的对象。
    
    :param file_path: JSON文件的路径
    :return: 解析后的JSON数据（通常为dict或list）
    """
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data

In [3]:
def write_json_file(file_path, data):
    """
    将给定的数据写入到指定路径的JSON文件中。
    
    :param file_path: JSON文件的路径
    :param data: 要写入的数据（通常为dict或list）
    """
    with open(file_path, 'w', encoding='utf-8') as file:
        json.dump(data, file, ensure_ascii=False, indent=4)

In [33]:
def deepseek_qa(question):
    reasoning_content = ""  # 定义完整思考过程
    answer_content = ""     # 定义完整回复
    is_answering = False   # 判断是否结束思考过程并开始回复

    # 创建聊天完成请求
    stream = client.chat.completions.create(
        model="deepseek-r1",  # 此处以 deepseek-v3 为例，可按需更换模型名称
        messages=[
            {"role": "user", "content": question}
        ],
        stream=True
        # 解除以下注释会在最后一个chunk返回Token使用量
        # stream_options={
        #     "include_usage": True
        # }
    )

    # print("\n" + "=" * 20 + "思考过程" + "=" * 20 + "\n")

    for chunk in stream:
        # 处理usage信息
        if not getattr(chunk, 'choices', None):
            # print("\n" + "=" * 20 + "Token 使用情况" + "=" * 20 + "\n")
            print(mmm.usage)
            continue

        delta = chunk.choices[0].delta

        # 检查是否有reasoning_content属性
        if not hasattr(delta, 'reasoning_content'):
            continue

        # 处理空内容情况
        if not getattr(delta, 'reasoning_content', None) and not getattr(delta, 'content', None):
            continue

        # 处理开始回答的情况
        if not getattr(delta, 'reasoning_content', None) and not is_answering:
            print("\n" + "=" * 20 + "完整回复" + "=" * 20 + "\n")
            is_answering = True

        # 处理思考过程
        if getattr(delta, 'reasoning_content', None):
            print(delta.reasoning_content, end='', flush=True)
            reasoning_content += delta.reasoning_content
        # 处理回复内容
        elif getattr(delta, 'content', None):
            print(delta.content, end='', flush=True)
            answer_content += delta.content

    # 如果需要打印完整内容，解除以下的注释

    print("=" * 20 + "完整思考过程" + "=" * 20 + "\n")
    print(reasoning_content)
    print("=" * 20 + "完整回复" + "=" * 20 + "\n")
    print(answer_content)

    return reasoning_content, answer_content

In [37]:


def process_json_file(jsonfile, jsondir):
    filepath = f"{jsondir}\\{jsonfile}"
    json_data = read_json_file(filepath)
    print(f"{jsonfile} is running")
    reasoning_content, answer = deepseek_qa(json_data['message_1'])
    
    json_data["reasoning_content"] = reasoning_content
    json_data["content"] = answer
    
    write_json_file(filepath, json_data)
    print(f"{jsonfile} is done")
    
def main(jsondir):
    with ThreadPoolExecutor(max_workers=1) as executor:
        for jsonfile in os.listdir(jsondir):
            
            executor.submit(process_json_file, jsonfile, jsondir)
            

In [41]:
import json
import os
from openai import OpenAI
from concurrent.futures import ThreadPoolExecutor

client = OpenAI(
    # 若没有配置环境变量，请用百炼API Key将下行替换为：api_key="sk-xxx",
    api_key="", # 如何获取API Key：https://help.aliyun.com/zh/model-studio/developer-reference/get-api-key
    base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
)

jsondir = "F:\\Working\\ModelDistillation\\Chemistry"
main(jsondir)

024_008_014.json is running
