In [None]:
import json
import os
import time
from typing import Literal, Tuple, Optional, Dict
from datetime import datetime

In [None]:
YesNo = Literal["yes", "no"]


JUDGE_SYSTEM_PROMPT = """You are a validation model designed for dataset quality control.

Your task is to rigorously evaluate the reliability of a large language model (LLM) response by comparing the provided original information (raw) with the model-generated answer (answer).

Follow these principles during evaluation:
1. You will be given both the original information (raw) and the model output (answer).
2. If the original information contains explicit facts, standard answers, or verifiable content, assess whether the answer is consistent with them.
3. If the original information is extracted from literature or long-form text without a unique standard answer, focus on evaluating the logical soundness of the reasoning process in the answer.
4. Identify any clear logical errors, hallucinations, unsupported claims, contradictions, or misinterpretations of the original information.
5. If the answer is logically coherent and reasonably supported by the original information, it may be considered reliable even if the wording is not identical.

You must output only a single JSON object and no additional text. The output format must be exactly:
{
  "verdict": "yes" or "no",
  "reason": "a brief explanation of the key reason for your judgment"
}
"""

JUDGE_USER_TEMPLATE = """Please evaluate the following content.

【Original Information (raw)】
{raw}

【Model Output (answer)】
{answer}

Determine whether the model output is reliable based on the original information.

Return strictly a JSON object in the following format and do not include any additional text:
{
  "verdict": "yes" or "no",
  "reason": "one concise sentence explaining the reason for your decision"
}
"""


def _extract_json_from_text(text: str) -> dict:
    """
    Compatible with cases where the judge model occasionally adds explanations before or after the JSON:
    Attempt to extract the content between the first `{` and the last `}` and parse it as JSON.
    """
    text = (text or "").strip()
    if not text:
        raise ValueError("Empty judge response")

    # 直接尝试
    try:
        return json.loads(text)
    except json.JSONDecodeError:
        pass

    # 截取大括号
    l = text.find("{")
    r = text.rfind("}")
    if l != -1 and r != -1 and r > l:
        candidate = text[l:r+1]
        return json.loads(candidate)

    raise ValueError(f"Judge response is not valid JSON: {text[:200]}...")


def judge_answer_yesno(
    client_check,
    raw_info: str,
    answer_content: str,
    *,
    judge_model: str,
    temperature: float = 0.9,
    max_retries: int = 3,
    retry_sleep_sec: float = 1.5,
) -> Tuple[YesNo, str, dict]:
    """
    Invoke the new large model to make a judgment and return:
    (verdict: 'yes'/'no', reason: str, full_json: dict)

    - Built-in simple retry
    """
    user_prompt = JUDGE_USER_TEMPLATE.format(raw=raw_info, answer=answer_content)

    last_err: Optional[Exception] = None
    for attempt in range(1, max_retries + 1):
        try:
            resp = client_check.chat.completions.create(
                model=judge_model,
                messages=[
                    {"role": "system", "content": JUDGE_SYSTEM_PROMPT},
                    {"role": "user", "content": user_prompt},
                ],
                temperature=temperature,
            )

            text = resp.choices[0].message.content
            obj = _extract_json_from_text(text)

            verdict = str(obj.get("verdict", "")).strip().lower()
            reason = str(obj.get("reason", "")).strip()

            if verdict not in ("yes", "no"):
                raise ValueError(f"Invalid verdict: {verdict}")

            return verdict, reason, obj

        except Exception as e:
            last_err = e
            if attempt < max_retries:
                time.sleep(retry_sleep_sec * attempt)
            else:
                break

    # On failure: conservatively return 'no' and include the error message to avoid interrupting the pipeline
    return "no", f"judge_failed: {last_err}", {"verdict": "no", "reason": f"judge_failed: {last_err}"}

In [None]:
def read_txt_file(file_path):
    """
    Read the specified text file and return its contents.
    
    :param file_path: Path to the text file
    :return: The content of the file as a string
    """
    # Use the with statement to open the file, ensuring it is properly closed afterwards
    with open(file_path, 'r', encoding='utf-8') as file:
        # Use the read() method to read all contents of the file into a string variable
        data = file.read()
    # print(type(data))
    # At this point, the 'data' variable contains all the contents of the file.
    # Note: If the file has multiple lines, 'data' will also include newline characters '\n'.
    return data


In [3]:
def write_txt_file(file_path, data):
    """
    Write the given data to a text file at the specified path.
    
    :param file_path: Path to the text file
    :param data: Data to be written (string)
    """
    with open(file_path, 'w', encoding='utf-8') as file:
        file.write(data)


In [None]:
def update_check_json(checkdir: str, filename: str, verdict: YesNo) -> Dict[str, YesNo]:
    os.makedirs(os.path.dirname(checkdir), exist_ok=True)

    if os.path.exists(checkdir):
        try:
            with open(checkdir, "r", encoding="utf-8") as f:
                data = json.load(f)
        except Exception:
            data = {}
    else:
        data = {}

    data[filename] = verdict

    with open(checkdir, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

    return data

In [None]:
def update_check_detail_json(
    checkdir_detail: str,
    filename: str,
    verdict: YesNo,
    reason: str,
    judge_obj: dict,
    *,
    source_path: str = "",
    txt_path: str = "",
    raw_info: str = "",
    answer_content: str = "",
    preview_len: int = 30000,
):
    if os.path.exists(checkdir_detail):
        try:
            with open(checkdir_detail, "r", encoding="utf-8") as f:
                data = json.load(f)
        except Exception:
            data = {}
    else:
        data = {}

    data[filename] = {
        "verdict": verdict,
        "reason": reason,
        "judge": judge_obj,
        "source_path": source_path,
        "txt_path": txt_path,
        "ts": datetime.now().isoformat(timespec="seconds"),
        "raw_preview": (raw_info[:preview_len] if raw_info else ""),
        "answer_preview": (answer_content[:preview_len] if answer_content else ""),
    }

    os.makedirs(os.path.dirname(checkdir_detail), exist_ok=True)
    with open(checkdir_detail, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

In [5]:
import re

def extract_qa_pairs(text):
    """
    Extract question and answer pairs from the given text, supporting two different formats,
    and remove specific begin and end markers.
    
    Args:
        text (str): The text containing questions and answers.
    
    Returns:
        list: Each element is a sublist containing a question and its corresponding answer.
    """
    try:
        # Remove extra spaces or line breaks between the start and end markers
        clean_text = text.split('<|begin_of_questions_and_answers|>\n')[1].split('<|end_of_questions_and_answers|>')[0]
    except IndexError:
        raise ValueError("The text does not contain the expected begin and end markers.")
    
    # Define regex patterns for two different formats
    pattern_no_markdown = r'\d+\.\sQuestion:\s(.*?)\s+Answer:\s(.*?)(?=\n\d+\.|\Z)'
    pattern_markdown = r'\d+\.\s\*\*Question:\*\*\s(.*?)\s+\*\*Answer:\*\*\s(.*?)(?=\n\d+\.|\Z)'

    # Find question-answer pairs without Markdown markers
    qa_pairs_no_md = re.findall(pattern_no_markdown, clean_text, re.DOTALL | re.MULTILINE)
    # Find question-answer pairs with Markdown markers and remove the Markdown symbols
    qa_pairs_md = re.findall(pattern_markdown, clean_text, re.DOTALL | re.MULTILINE)
    qa_pairs_md_cleaned = [[q.replace("**", "").strip(), a.replace("**", "").strip()] for q, a in qa_pairs_md]

    # Combine results
    qa_list = [[q.strip(), a.strip()] for q, a in qa_pairs_no_md] + qa_pairs_md_cleaned
    
    return qa_list


In [None]:
def deepseek_qa(txtfile, txtdir):
    file_path = f"{txtdir}\\{txtfile}"
    txt = read_txt_file(file_path)
    txt2 = read_txt_file(f"{qadir}\\{txtfile}")
    qa_list = extract_qa_pairs(txt2)
    
    count = 0
    print(f"{txtfile} is running")

    for qa in qa_list:
        count += 1
        reasoning_content = ""  # define the complete reasoning process
        answer_content = ""     # define the complete response
        is_answering = False    # determine whether the reasoning has ended and the response has started
        # print(qa[0])
        # create chat completion request
        stream = client.chat.completions.create(
            model="qwen-turbo-latest",  # using qwen-turbo-latest here as an example; replace with another model if needed
            messages=[
                {"role": "user", "content": f"""Based on a comprehensive review in the field of Metal-Organic Frameworks (MOFs) and related questions, generate a detailed and complete chain of scientific reasoning. Ensure that your reasoning process is rigorous and logically coherent, utilizing scientific theories and facts for analysis. The chain of reasoning can be open and flexible, not confined to a rigid structure, but it should clearly indicate the beginning and end of the reasoning.

Please use `<|begin_of_thought|>` to mark the start of the reasoning chain and `<|end_of_thought|>` to mark the end.

Don't mention "this literature show" or "this review show" or anything like that. This is very important. Even if you use the literature, your answer should still give the other person a style of thinking that is all about you.

The thought chain is as detailed as possible.
---

**Example Structure:**

1. **Understanding the Background:** Briefly explain the background information and main questions.
  
2. **Application of Knowledge:** Invoke relevant scientific principles and known facts related to the problem.
  
3. **Analysis Integration:** Integrate key information from the review into the analysis process.
  
4. **Reasoning Expansion:** Use logical reasoning to explore potential paths to a solution.
  
5. **Solution Evaluation:** Assess the plausibility and feasibility of different solutions.
  
6. **Conclusion Formation:** Draw clear scientific conclusions or hypotheses.
  
7. **Open Exploration:** Suggest possible future research directions or applications.
  

**Open Thought Chain Template:**

<|begin_of_thought|>

1. Preliminary Analysis: Clarify the subject and background information.
  
2. Theoretical Application: Identify and apply relevant scientific theories to support the analysis.
  
3. Logical Step-by-Step Reasoning: Gradually expand the reasoning, using review information to deepen the analysis.
  
4. Possibility Discussion: Explore potential conclusions and hypotheses, considering various scientific perspectives.
  
5. Result Summary: Summarize analysis results to form clear scientific conclusions.
  
6. Exploration Directions: Propose possible future research directions or application areas.
  

<|end_of_thought|>

Question:
{qa[0]}

Answer:
{qa[1]}

There is Artical:
{txt}"""}
        ],
        stream=True
        # Uncomment the following to include token usage in the final chunk
        # stream_options={
        #     "include_usage": True
        # }
    )


        for chunk in stream:
            # Handle usage information
            if not getattr(chunk, 'choices', None):
                print("\n" + "=" * 20 + "Token Usage" + "=" * 20 + "\n")
                print(chunk.usage)
                continue
    
            delta = chunk.choices[0].delta
    
            # Handle response content
            if getattr(delta, 'content', None):
                # print(delta.content, end='', flush=True)
                answer_content += delta.content
    
        # If you need to print the full content, uncomment the following
    
        # print("=" * 20 + "Complete Response" + "=" * 20 + "\n")
        # print(answer_content)
        content = f"""<|begin_of_question|>\n\n{qa[0]}\n\n<|end_of_question|>\n\n<|begin_of_answer|>\n\n{qa[1]}\n\n<|end_of_answer|>\n\n{answer_content}"""
        print(content)
        print("+++++++++++++++++++")
        print(txt)
        verdict, reason, judge_obj = judge_answer_yesno(
            client_check,
            raw_info=txt,
            answer_content=content,
            judge_model="ModelName",   # Replace with your reviewer model name

        )

        # Summary: filename -> yes/no
        update_check_json(checkdir, os.path.basename(file_path), verdict)

        # Details: filename -> verdict / reason / original judge JSON
        update_check_detail_json(
            checkdir_detail,
            os.path.basename(file_path),
            verdict,
            reason,
            judge_obj,
            source_path=file_path,              
            txt_path=file_path,                 
            raw_info=txt,
            answer_content=content,
        )
        write_txt_file(f"{file_path[:-4]}_{count}.txt", content)
        # print(f"file_path[:-4]_{count}.txt")
    return f"{txtfile} is done"

In [11]:
def main(txtdir):
    futures = []
    with ThreadPoolExecutor(max_workers=5) as executor:
        for txtfile in os.listdir(txtdir):
            if txtfile.endswith('.txt'):  # ensure only text files are processed
                future = executor.submit(deepseek_qa, txtfile, txtdir)
                futures.append(future)
        
        # collect results
        for future in as_completed(futures):
            try:
                result = future.result()
                print(result)
            except Exception as e:
                print(f"An exception occurred: {e}")


In [None]:
import os
from openai import OpenAI
from concurrent.futures import ThreadPoolExecutor, as_completed

client = OpenAI(
    # If the environment variable is not configured, replace the following line with your Bailian API Key, e.g., api_key="sk-xxx",
    api_key="",  # How to get an API Key: https://help.aliyun.com/zh/model-studio/developer-reference/get-api-key
    base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
)
client_check = OpenAI(
    # If the environment variable is not configured, replace the following line with your Bailian API Key, e.g., api_key="sk-xxx",
    api_key="",  # How to get an API Key: https://help.aliyun.com/zh/model-studio/developer-reference/get-api-key
    base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
)
qadir = "F:\\Working\\ModelDistillation\\qa"
txtdir = "F:\\Working\\ModelDistillation\\review"
checkdir = "F:\\Working\\ModelDistillation\\\\DDrevise\\test2\\check.json"
checkdir_detail = r"F:\Working\ModelDistillation\DDrevise\test2\\check_detail.json"
main(txtdir)
