In [1]:
def read_json_file(file_path):
    """
    Read a JSON file from the specified path and return the parsed object.
    
    :param file_path: Path to the JSON file
    :return: Parsed JSON data (usually a dict or list)
    """
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data


In [3]:
def write_json_file(file_path, data):
    """
    Write the given data to a JSON file at the specified path.
    
    :param file_path: Path to the JSON file
    :param data: Data to be written (usually a dict or list)
    """
    with open(file_path, 'w', encoding='utf-8') as file:
        json.dump(data, file, ensure_ascii=False, indent=4)


In [33]:
def deepseek_qa(question):
    reasoning_content = ""  # define the complete reasoning process
    answer_content = ""     # define the complete response
    is_answering = False    # determine whether the reasoning has ended and the response has started

    # create chat completion request
    stream = client.chat.completions.create(
        model="deepseek-r1",  # using deepseek-r1 here as an example; replace with another model if needed
        messages=[
            {"role": "user", "content": question}
        ],
        stream=True
        # Uncomment the following to include token usage in the final chunk
        # stream_options={
        #     "include_usage": True
        # }
    )

    # print("\n" + "=" * 20 + "Reasoning Process" + "=" * 20 + "\n")

    for chunk in stream:
        # handle usage info
        if not getattr(chunk, 'choices', None):
            # print("\n" + "=" * 20 + "Token Usage" + "=" * 20 + "\n")
            print(chunk.usage)
            continue

        delta = chunk.choices[0].delta

        # check if reasoning_content attribute exists
        if not hasattr(delta, 'reasoning_content'):
            continue

        # handle empty content case
        if not getattr(delta, 'reasoning_content', None) and not getattr(delta, 'content', None):
            continue

        # handle the start of the answer
        if not getattr(delta, 'reasoning_content', None) and not is_answering:
            print("\n" + "=" * 20 + "Complete Response" + "=" * 20 + "\n")
            is_answering = True

        # handle reasoning process
        if getattr(delta, 'reasoning_content', None):
            print(delta.reasoning_content, end='', flush=True)
            reasoning_content += delta.reasoning_content
        # handle response content
        elif getattr(delta, 'content', None):
            print(delta.content, end='', flush=True)
            answer_content += delta.content

    # If you need to print the complete content, leave the following uncommented

    print("=" * 20 + "Complete Reasoning Process" + "=" * 20 + "\n")
    print(reasoning_content)
    print("=" * 20 + "Complete Response" + "=" * 20 + "\n")
    print(answer_content)

    return reasoning_content, answer_content


In [37]:


def process_json_file(jsonfile, jsondir):
    filepath = f"{jsondir}\\{jsonfile}"
    json_data = read_json_file(filepath)
    print(f"{jsonfile} is running")
    reasoning_content, answer = deepseek_qa(json_data['message_1'])
    
    json_data["reasoning_content"] = reasoning_content
    json_data["content"] = answer
    
    write_json_file(filepath, json_data)
    print(f"{jsonfile} is done")
    
def main(jsondir):
    with ThreadPoolExecutor(max_workers=1) as executor:
        for jsonfile in os.listdir(jsondir):
            
            executor.submit(process_json_file, jsonfile, jsondir)
            

In [41]:
import json
import os
from openai import OpenAI
from concurrent.futures import ThreadPoolExecutor

client = OpenAI(
    # If the environment variable is not configured, replace the following line with your Bailian API Key, e.g., api_key="sk-xxx",
    api_key="",  # How to get an API Key: https://help.aliyun.com/zh/model-studio/developer-reference/get-api-key
    base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
)

jsondir = "F:\\Working\\ModelDistillation\\Chemistry"
main(jsondir)


024_008_014.json is running
