In [77]:
model = 'Here is the completed code segment with all population data filled in:\n\n```problog\n{\n  "HASH": "23C2CC6E",\n  "Code": "pop(china, 8250).\npop(india, 5863).\npop(ussr, 2521).\npop(usa, 2119).\npop(indonesia, 1276).\npop(japan, 1097).\npop(brazil, 1042).\npop(bangladesh, 750).\npop(pakistan, 682).\npop(w_germany, 613).\npop(nigeria, 613).\npop(mexico, 581).\npop(uk, 559).\npop(italy, 554).\npop(france, 525).\npop(philippines, 415).\npop(thailand, 410).\npop(turkey, 383).\npop(egypt, 364).\npop(spain, 352).\npop(poland, 337).\npop(s_korea, 335).\npop(iran, 320).\npop(ethiopia, 272).\npop(argentina, 251)."\n}\n```'


In [85]:
import re
import json
import uuid
import hashlib
from problog.program import PrologString, Clause, AnnotatedDisjunction, Term
from typing import Literal, List, Union, Tuple, Dict, Any
from langgraph.graph import END, StateGraph
from state import BasicState, Mode
from config import paths
def _find_all_blocks(type:Literal["report","code","other"], text:str, ext_mark:str="") -> List[dict]:
    """
    find block with certain patterns in json form.
    """
    blocks:List[dict] = []
    
    if type == "other" or type == "report":
        pattern = r"```(?:report|[a-z]*)?\n(.*?)```"
    elif type == "code":
        pattern = r"```(?:problog|[a-z]*)?\n(.*?)```"
    else:
        raise ValueError("you must choose from ['report','code','other']")
    
    matches = re.findall(pattern, text, re.DOTALL)
    if not matches:
        pattern = r"```(?:json|[a-z]*)?\n(.*?)```"
        matches = re.findall(pattern, text, re.DOTALL)

    if matches:
        for match in matches:
            try:
                # 直接尝试解析 JSON
                match_json = json.loads(match)
            except json.JSONDecodeError:
                # 对于 ProbLog 代码，需要特殊处理转义
                # 保护 ProbLog 特殊操作符
                protected_match = match
                
                # 创建特殊操作符的映射
                special_patterns = {
                    r'\\\\=': '<<<BACKSLASH_BACKSLASH_EQUALS>>>',
                    r'\\\\\\\\=': '<<<QUAD_BACKSLASH_EQUALS>>>',  # 处理已经有四个反斜杠的情况
                    r'\\n'        : '<<<ESCAPED_NEWLINE>>>',      # 新增这一行
                }
                # 临时替换特殊操作符
                for pattern, placeholder in special_patterns.items():
                    protected_match = re.sub(pattern, placeholder, protected_match)
                
                # 修复其他无效的转义序列
                fixed_code = re.sub(r'\\(?!["\\/bfnrtu])', r'\\\\', protected_match)

                # 还原特殊操作符
                for pattern, placeholder in special_patterns.items():
                    fixed_code = fixed_code.replace(placeholder, pattern)
                
                try:
                    match_json = json.loads(fixed_code)
                except json.JSONDecodeError as e:
                    print(f"Failed to parse JSON: {e}")
                    print(f"Original match: {match}")
                    print(f"Fixed code: {fixed_code}")
                    continue

            # 处理不同类型的块
            if type == "other":
                try:
                    blocks.append({ext_mark:match_json})
                except ValueError:
                    blocks.append({ext_mark:None})

            elif type == "code":
                try:
                    blocks.append({match_json["HASH"]:match_json["Code"]})
                except (ValueError, KeyError):
                    blocks.append({match_json.get("HASH", "unknown"):f"could not parse the code block of {match_json}"})
            
            elif type == "report":
                try:
                    blocks.append({match_json["HASH"]:match_json})
                except (ValueError, KeyError):
                    blocks.append({match_json.get("HASH", "unknown"):f"could not parse the report block of {match_json}"})

        return blocks
    else:
        return None

In [84]:
import json
matches = _find_all_blocks("code",model)
print(matches)
# data 现在就是一个标准 dict，可以直接操作


fixed_code {
  "HASH": "23C2CC6E",
  "Code": "pop(china, 8250).
pop(india, 5863).
pop(ussr, 2521).
pop(usa, 2119).
pop(indonesia, 1276).
pop(japan, 1097).
pop(brazil, 1042).
pop(bangladesh, 750).
pop(pakistan, 682).
pop(w_germany, 613).
pop(nigeria, 613).
pop(mexico, 581).
pop(uk, 559).
pop(italy, 554).
pop(france, 525).
pop(philippines, 415).
pop(thailand, 410).
pop(turkey, 383).
pop(egypt, 364).
pop(spain, 352).
pop(poland, 337).
pop(s_korea, 335).
pop(iran, 320).
pop(ethiopia, 272).
pop(argentina, 251)."
}

Failed to parse JSON: Invalid control character at: line 3 column 29 (char 52)
Original match: {
  "HASH": "23C2CC6E",
  "Code": "pop(china, 8250).
pop(india, 5863).
pop(ussr, 2521).
pop(usa, 2119).
pop(indonesia, 1276).
pop(japan, 1097).
pop(brazil, 1042).
pop(bangladesh, 750).
pop(pakistan, 682).
pop(w_germany, 613).
pop(nigeria, 613).
pop(mexico, 581).
pop(uk, 559).
pop(italy, 554).
pop(france, 525).
pop(philippines, 415).
pop(thailand, 410).
pop(turkey, 383).
pop(egypt, 364).

In [65]:
for match in matches:
    print(match)
    res = json.loads(match)
    print(res)

In [6]:
def _find_all_blocks(type: Literal["report", "code", "other"], text: str, ext_mark: str = "") -> List[dict]:
    """
    修复版的解析函数，正确处理ProbLog代码中的转义序列
    """
    blocks: List[dict] = []
    
    # 根据类型选择正则表达式
    if type == "other" or type == "report":
        pattern = r"```(?:report|[a-z]*)?\n(.*?)```"
    elif type == "code":
        pattern = r"```(?:problog|[a-z]*)?\n(.*?)```"
    else:
        raise ValueError("you must choose from ['report','code','other']")
    
    matches = re.findall(pattern, text, re.DOTALL)
    
    if not matches:
        pattern = r"```(?:json|[a-z]*)?\n(.*?)```"
        matches = re.findall(pattern, text, re.DOTALL)
    
    for match in matches:
        match = match.strip()
        
        try:
            # 直接尝试解析JSON
            match_json = json.loads(match)
        except json.JSONDecodeError:
            # 智能处理转义序列
            try:
                # 创建一个映射来临时替换特殊序列
                replacements = {
                    r'\\\\=': '___DOUBLE_BACKSLASH_EQUALS___',
                    r'\\=': '___BACKSLASH_EQUALS___',
                    r'\\n': '___BACKSLASH_N___',
                    '\n': '\\n',  # 将实际的换行符替换为转义的换行符
                }
                
                processed_match = match
                
                # 首先处理实际的换行符
                processed_match = processed_match.replace('\n', '\\n')
                
                # 然后处理其他特殊序列（按长度降序，确保先处理长的）
                for pattern, replacement in sorted(replacements.items(), key=lambda x: len(x[0]), reverse=True):
                    if pattern != '\n':  # 跳过已处理的换行符
                        processed_match = processed_match.replace(pattern, replacement)
                
                # 尝试解析JSON
                match_json = json.loads(processed_match)
                
                # 还原特殊序列
                if isinstance(match_json, dict) and "Code" in match_json:
                    code = match_json["Code"]
                    # 还原顺序也很重要
                    for pattern, replacement in replacements.items():
                        if pattern != '\n':  # 不需要还原换行符
                            code = code.replace(replacement, pattern)
                    match_json["Code"] = code
                
            except json.JSONDecodeError as e:
                print(f"JSON解析失败: {e}")
                print(f"原始内容: {repr(match)}")
                print(f"处理后内容: {repr(processed_match)}")
                continue
        
        # 根据类型处理解析结果
        if type == "other":
            blocks.append({ext_mark: match_json})
        elif type == "code":
            if isinstance(match_json, dict) and "HASH" in match_json and "Code" in match_json:
                blocks.append({match_json["HASH"]: match_json["Code"]})
            else:
                blocks.append({"unknown": f"code: invalid structure - {match_json}"})
        elif type == "report":
            if isinstance(match_json, dict) and "HASH" in match_json:
                blocks.append({match_json["HASH"]: match_json})
            else:
                blocks.append({"unknown": f"report: invalid structure - {match_json}"})
    
    return blocks

In [7]:

model = 'Here is the completed code segment for the {LANGDA} placeholder:\n\n```problog\n{"HASH": "CD6DE9CB","Code": "query_pop([C1,D1,C2,D2]) :- \n    density(C1,D1),\n    density(C2,D2),\n    C1 \\\\= C2,\n    abs(D1 - D2) =< max(D1,D2)*0.05."}\n```\n\nThis implementation:\n1. Uses the `density/2` predicate to get population densities for two countries\n2. Ensures they are different countries with `C1 \\= C2`\n3. Checks if their densities are within 5% of each other using arithmetic comparison\n4. Returns the country names and densities in the format [C1,D1,C2,D2]\n\nThe 5% threshold can be adjusted by changing the 0.05 factor as needed.'

rep = _find_all_blocks("code", model)
print(rep)

[{'CD6DE9CB': 'query_pop([C1,D1,C2,D2]) :- \n    density(C1,D1),\n    density(C2,D2),\n    C1 \\\\= C2,\n    abs(D1 - D2) =< max(D1,D2)*0.05.'}]


In [19]:
def _replace_placeholder(template: str, replacement_list, placeholder="{{LANGDA}}") -> str:
    """
    Replaces placeholders in a template with items from a replacement list.
    
    args:
        template: template with placeholders
        replacement_list: the list of content that will fit into the placeholder.
        placeholder: default as {{LANGDA}}
        - if the value is None, the corresponding placeholder remains unchanged. 
        - valid input forms: List[str] or List[dict]
    """
    # Extract values from replacement items
    replace_str_list = []
    if replacement_list and all(isinstance(item, dict) for item in replacement_list):
        for item in replacement_list:
            _, value = next(iter(item.items()))
            replace_str_list.append(value)
    else:
        replace_str_list = replacement_list

    # Split the template by placeholder
    segments = template.split(placeholder)
    result = segments[0]
    
    # Process each segment after the first
    for i, seg in enumerate(segments[1:]):
        # Check if we have a replacement for this placeholder
        if i < len(replace_str_list) and replace_str_list[i] is not None:
            replace_text = replace_str_list[i]
            
            # Check for duplicate punctuation at the boundaries
            if replace_text and seg:
                replace_ends_with_punct = replace_text.rstrip()[-1:] in ".,;" if replace_text.rstrip() else False
                seg_starts_with_punct = seg.lstrip()[:1] in ".,;" if seg.lstrip() else False
                
                # Handle duplicate punctuation
                if replace_ends_with_punct and seg_starts_with_punct:
                    # Find position where the actual text ends in replace_text
                    replace_text_end = len(replace_text.rstrip())
                    # Find position where the actual text starts in seg
                    seg_text_start = len(seg) - len(seg.lstrip())
                    
                    # Add replace_text without the trailing punctuation
                    result += replace_text[:replace_text_end-1] 
                    # Add seg with its leading whitespace and punctuation
                    result += seg[:seg_text_start+1] + seg[seg_text_start+1:]
                    continue
            
            # Normal case: just add the replacement and segment
            result += replace_text
        else:
            # No replacement available, keep the placeholder
            result += placeholder
        
        result += seg
        
    return result

In [20]:
model = """
 
digit(O), all_different([O,R,N,Y,E,D]),
 sumdigit(C2, E, O, N, C3),
 
 digit(M), all_different([M,O,R,N,Y,E,D]),
 sumdigit(C3, S, M, O, C4),
 
{{LANGDA}}
  . 
 all_different([S,E,N,D,M,O,R,Y]).
sumdigit(C, A, B, S, 0) :-
 X is C + A + B,
 X < 10,
 S = X.
sumdigit(C, A, B, S, 1) :-
 X is C + A + B,
 X >= 10,
 S is X - 10.
"""
match = """digit(S), leftdigit(S), all_different([S,M,O,R,N,Y,E,D]),
 sumdigit(C4, 0, 0, M, _).  """

print(_replace_placeholder(model,[match]))




digit(O), all_different([O,R,N,Y,E,D]),
 sumdigit(C2, E, O, N, C3),

 digit(M), all_different([M,O,R,N,Y,E,D]),
 sumdigit(C3, S, M, O, C4),

digit(S), leftdigit(S), all_different([S,M,O,R,N,Y,E,D]),
 sumdigit(C4, 0, 0, M, _)
  . 
 all_different([S,E,N,D,M,O,R,Y]).
sumdigit(C, A, B, S, 0) :-
 X is C + A + B,
 X < 10,
 S = X.
sumdigit(C, A, B, S, 1) :-
 X is C + A + B,
 X >= 10,
 S is X - 10.



In [4]:
model = """
//the completed original code here
```
</Final_Answer>

*** split ***
In section <origin_code> and <generated_code> you will be give two codes,
- in <origin_code> there's incomplete code with {{LANGDA}} blocks.
- in <generated_
"""


lines = model.split("*** split ***")
print(lines[0])
print("======================")
print(lines[1])



//the completed original code here
```
</Final_Answer>



In section <origin_code> and <generated_code> you will be give two codes,
- in <origin_code> there's incomplete code with <langda> blocks.
- in <generated_



In [14]:
def merge_with_overlap_tokens(s1: str, s2: str) -> str:
    tokens1 = s1.split()
    tokens2 = s2.split()
    max_overlap = min(len(tokens1), len(tokens2))
    for k in range(max_overlap, 0, -1):
        if tokens1[-k:] == tokens2[:k]:
            return " ".join(tokens1 + tokens2[k:])
    return " ".join(tokens1 + tokens2)

# 测试
a = """This new 
cat 
 
"""

b = """  new   cat is cute,
but I dont like it
"""
print( merge_with_overlap_tokens(a, b) )
# 输出: This new cat is cute


This new cat is cute, but I dont like it


In [30]:
model = """
% -------------------------
% Basic rules of rock-paper-scissors
% -------------------------
% Three gestures
move(rock).
move(paper).
move(scissor).
% Win-lose relationship: X beats Y
beats(rock, scissor).
beats(scissor, paper).
beats(paper, rock).
% -------------------------
% Calculate the result of the game
% -------------------------
result(X, X, draw) :-

{{LANGDA}}
.
result(X, Y, win) :-
{{LANGDA}}
.
result(X, Y, lose) :-
{{LANGDA}}
.
% End of recursion: empty list corresponds to empty result
play([], [], []).
% Recursive advancement: take out each round of gestures, calculate the results, and continue
play([P1|P1T], [P2|P2T], [R|Rs]) :-
% The correct call is result(P1,P2,R), not semicolon
result(P1, P2, R),
% (Optional) Update the score according to R
play(P1T, P2T, Rs).
compute_score([], 0).
{{LANGDA}}
.
{{LANGDA}}
.
{{LANGDA}}
.
compute_score([draw | Rs], S) :- compute_score(Rs, S1), S is S1.
determine_winner(P1Moves,P2Moves,Winner) :- 
 
{{LANGDA}}
,
compute_score(Results,S), 
( S > 0, Winner = player1 
; S < 0, Winner = player2 
; S = 0, Winner = draw 
).
query(determine_winner([rock,rock,rock],[paper,paper,scissor],W)).
"""

In [36]:
import re
import json
import uuid
import hashlib
from problog.program import PrologString, Clause, AnnotatedDisjunction, Term
from typing import Literal, List, Union, Tuple, Dict, Any
from langgraph.graph import END, StateGraph
from state import BasicState, Mode
from config import paths

codes = [
{"3F277A35": "move(X)."},{"D91BB7A0": "beats(X, Y)."},{"0940BB04": "beats(Y, X)."},
{"583E41B6": "compute_score([win | Rs], S) :- compute_score(Rs, S1), S is S1 + 1."},
{"D324979D": "compute_score([lose | Rs], S) :- compute_score(Rs, S1), S is S1 - 1."},
{"FD850DEC": "compute_score([draw | Rs], S) :- compute_score(Rs, S1), S is S1."},
{"3FEB17D7": "play(P1Moves, P2Moves, Results),"}]

import re

def _tokenize_problog(s):
    # Tokenize Prolog code into meaningful units, including punctuation
    pattern = r":-|\w+|[^\w\s]"
    return [(m.group(), m.start(), m.end()) for m in re.finditer(pattern, s)]

def _merge_problog_preserve(s1, s2):
    # Tokenize both strings
    tokens1 = _tokenize_problog(s1)
    tokens2 = _tokenize_problog(s2)

    texts1 = [t for t, _, _ in tokens1]
    texts2 = [t for t, _, _ in tokens2]
    
    # Find the longest token overlap
    max_k = min(len(texts1), len(texts2))
    for k in range(max_k, 0, -1):
        if texts1[-k:] == texts2[:k]:
            # Calculate where to splice in the original s2
            j_start = tokens2[k-1][2]
            return s1 + s2[j_start:]
    
    # Fallback: no overlap
    return s1 + s2

def _replace_placeholder(template:str, replacement_list:Union[List[str],List[dict]], placeholder="{{LANGDA}}") -> str:
    """
    Replaces placeholders in a template with items from a replacement list.
    
    args:
        template: template with placeholders
        replacement_list: the list of content that will fit into the placeholder.
        placeholder: default as {{LANGDA}}
        - if the value is None, the corresponding placeholder remains unchanged. 
        - valid input forms: List[str] or List[dict]
    """

    # Extract values from replacement items
    replace_str_list = []
    if replacement_list and all(isinstance(item, dict) for item in replacement_list):
        for item in replacement_list:
            _, value = next(iter(item.items()))
            replace_str_list.append(value)
    else:
        replace_str_list = replacement_list

    # Split the template by placeholder
    segments = template.split(placeholder)
    result = segments[0]
    
    # Process each segment after the first
    for i, seg in enumerate(segments[1:]):

        replace_text = replace_str_list[i]
        # Check if we have a replacement for this placeholder
        if i < len(replace_str_list) and replace_text is not None:
            # !!! SYNTAX FIX !!!
            # deal with overlap: segment[0] & "overlap text" + "overlap text" & replace_text
            result = _merge_problog_preserve(result, replace_text)
        else:
            # No replacement available, keep the placeholder
            result += placeholder

        # !!! SYNTAX FIX !!!
        # deal with overlap: replace_text & "overlap text" + "overlap text" & segment[1]
        result = _merge_problog_preserve(result, seg)
        
    return result

print(_replace_placeholder(model,codes))


% -------------------------
% Basic rules of rock-paper-scissors
% -------------------------
% Three gestures
move(rock).
move(paper).
move(scissor).
% Win-lose relationship: X beats Y
beats(rock, scissor).
beats(scissor, paper).
beats(paper, rock).
% -------------------------
% Calculate the result of the game
% -------------------------
result(X, X, draw) :-

move(X).
result(X, Y, win) :-
beats(X, Y).
result(X, Y, lose) :-
beats(Y, X).
% End of recursion: empty list corresponds to empty result
play([], [], []).
% Recursive advancement: take out each round of gestures, calculate the results, and continue
play([P1|P1T], [P2|P2T], [R|Rs]) :-
% The correct call is result(P1,P2,R), not semicolon
result(P1, P2, R),
% (Optional) Update the score according to R
play(P1T, P2T, Rs).
compute_score([], 0).
compute_score([win | Rs], S) :- compute_score(Rs, S1), S is S1 + 1.
compute_score([lose | Rs], S) :- compute_score(Rs, S1), S is S1 - 1.
compute_score([draw | Rs], S) :- compute_score(Rs, S1)

In [16]:
evaluated_result = '```report\n{"HASH": "E09B4F54","Report": "The code block implements a noisy-or combination for infection probabilities but contains logical errors in probability accumulation. The exclusionary conditions (`\\+`) prevent proper probability combination from multiple sources, and the `inf(Y)` predicate is not properly defined. This results in incorrect probability calculations (0.0316 vs expected higher values). The implementation needs to be revised to properly combine probabilities using ProbLog\'s noisy-or semantics and ensure all dependencies are correctly defined.","Need_regenerate": true,"Dependencies": []}\n```'

_find_all_blocks("report",evaluated_result)

{"HASH": "E09B4F54","Report": "The code block implements a noisy-or combination for infection probabilities but contains logical errors in probability accumulation. The exclusionary conditions (`\+`) prevent proper probability combination from multiple sources, and the `inf(Y)` predicate is not properly defined. This results in incorrect probability calculations (0.0316 vs expected higher values). The implementation needs to be revised to properly combine probabilities using ProbLog's noisy-or semantics and ensure all dependencies are correctly defined.","Need_regenerate": true,"Dependencies": []}

{"HASH": "E09B4F54","Report": "The code block implements a noisy-or combination for infection probabilities but contains logical errors in probability accumulation. The exclusionary conditions (`\+`) prevent proper probability combination from multiple sources, and the `inf(Y)` predicate is not properly defined. This results in incorrect probability calculations (0.0316 vs expected higher va

[]

In [1]:
import re
import json
import uuid
import hashlib
from typing import Literal, List, Union, Tuple, Dict, Any

def _robust_find_block(text:str, block_type:str="report") -> List[str]:
    """Manually find all ``` blocks, this is essential, because we need to ignore ``` blocks in quote"""
    blocks = []
    i = 0
    while i < len(text):
        # Find Start pattern:
        start_pattern = f"```{block_type}"
        start_pos = text.find(start_pattern, i)
        # print("**start_pos",start_pos)
        if start_pos == -1:
            break

        start_content = start_pos + len(start_pattern)

        # Find REAL End pattern:
        j = start_content
        in_quotes = False
        while j < len(text):
            char = text[j]

            # Escaped state check
            if char == '\\' and j + 1 < len(text):
                j += 2
                continue

            # Quoted state check
            if char == '"':
                in_quotes = not in_quotes
                # print("in_quotes",in_quotes,text[j:j+20])

            if not in_quotes and text[j:j+3] == '```':
                block_content = text[start_content:j].strip()
                blocks.append(block_content)
                i = j + 3
                # print("**block_content",block_content)
                break
            j += 1
        else:
            break
            
    return blocks

def _find_all_blocks(type: Literal["report", "code", "final"], text: str) -> List[dict]:
    """
    Find and parse code blocks in the text according to the specified type.
    
    Args:
        type: The type of blocks to find ("report", "code", or "final")
        text: The text to search for blocks
        ext_mark: Optional mark for "final" type blocks
        
    Returns:
        List of dictionaries containing the parsed blocks
    """
    blocks: List[dict] = []
    # Select pattern based on purpose
    if type == "report":
        matches = _robust_find_block(text, "report")
        if not matches:
            matches = _robust_find_block(text, "json")
        # print("**matches",matches)
    elif type == "code":
        pattern = r"```(?:problog|[a-z]*)?\n(.*?)```"
        matches = re.findall(pattern, text, re.DOTALL)
        if not matches:
            pattern = r"```(?:json|[a-z]*)?\n(.*?)```"
            matches = re.findall(pattern, text, re.DOTALL)
    else:
        raise ValueError("you must choose from ['report','code','final']")
    
    for match in matches:
        match_str = match.strip()
        
        try:
            # Try to parse the JSON directly
            match_json = json.loads(match_str)
            # When it succeed...
            if type == "final":
                blocks.append(match_json)
            elif type == "code":
                if isinstance(match_json, dict) and "HASH" in match_json and "Code" in match_json:
                    blocks.append({match_json["HASH"]: match_json["Code"]})
                else:
                    raise TypeError("could not parse code, retry with manually construction")
            elif type == "report":
                if isinstance(match_json, dict) and "HASH" in match_json:
                    blocks.append({match_json["HASH"]: match_json})
                else:
                    raise TypeError("could not parse report, retry with manually construction")

        except json.JSONDecodeError:
            # If JSON parsing fails, try manually constructing a dictionary
            try:
                if type == "code":
                    hash_value = re.search(r'"HASH":\s*"([^"]+)"', match_str).group(1)
                    code_value = re.search(r'"Code":\s*"((?:\\.|[^"])*)"', match_str).group(1)
                    
                    # Unescape the code
                    code_value = code_value.replace('\\"', '"').replace('\\\\', '\\')
                    blocks.append({hash_value: code_value})
                
                elif type == "report":
                    hash_value = re.search(r'"HASH":\s*"([^"]+)"', match_str).group(1)
                    error_summary = re.search(r'"ErrorSummary":\s*"((?:[^"\\]|\\.)*)"', match_str).group(1)
                    suggested_fix = re.search(r'"SuggestedFix":\s*"((?:[^"\\]|\\.)*)"', match_str).group(1)

                    need_regen = re.search(r'"NeedRegenerate":\s*(true|false)', match_str).group(1)

                    # Unescape the report
                    error_summary = error_summary.replace('\\"', '"').replace('\\\\', '\\')
                    suggested_fix = suggested_fix.replace('\\"', '"').replace('\\\\', '\\')
                    blocks.append({hash_value: {
                        "HASH":hash_value,
                        "ErrorSummary": error_summary,
                        "SuggestedFix": suggested_fix,
                        "NeedRegenerate": need_regen
                    }})

                elif type == "final":
                    # {{"Report": "Fill in your analysis here...", "Validity_form": true|false,"Validity_result": true|false}}
                    report_value = re.search(r'"Report":\s*"((?:\\.|[^"])*)"', match_str).group(1)
                    validity_form_value = re.search(r'"Validity_form":\s*(true|false)', match_str).group(1)
                    validity_result_value = re.search(r'"Validity_result":\s*(true|false)', match_str).group(1)

                    # Unescape the report
                    report_value = report_value.replace('\\"', '"').replace('\\\\', '\\')
                    blocks.append({
                        "Report": report_value,
                        "Validity_form": validity_form_value,
                        "Validity_result": validity_result_value,
                    })

            except Exception as e:
                print(f"Parsing failed: {e}")
                print(f"Original content: {repr(match_str)}")
                continue
    
    return blocks

In [11]:
def _find_all_blocks(type: Literal["report", "code", "final"], text: str) -> List[dict]:
    """
    Find and parse code blocks in the text according to the specified type.
    
    Args:
        type: The type of blocks to find ("report", "code", or "final")
        text: The text to search for blocks
        ext_mark: Optional mark for "final" type blocks
        
    Returns:
        List of dictionaries containing the parsed blocks
    """
    blocks: List[dict] = []
    
    # Select pattern based on purpose
    if type == "final" or type == "report":
        pattern = r"```(?:report|[a-z]*)?\n(.*?)```"
    elif type == "code":
        pattern = r"```(?:problog|[a-z]*)?\n(.*?)```"
    else:
        raise ValueError("you must choose from ['report','code','final']")
    
    matches = re.findall(pattern, text, re.DOTALL)
    
    if not matches:
        pattern = r"```(?:json|[a-z]*)?\n(.*?)```"
        matches = re.findall(pattern, text, re.DOTALL)
    
    for match in matches:
        match_str = match.strip()
        
        try:
            # Try to parse the JSON directly
            match_json = json.loads(match_str)
            # When it succeed...
            if type == "final":
                blocks.append(match_json)
            elif type == "code":
                if isinstance(match_json, dict) and "HASH" in match_json and "Code" in match_json:
                    blocks.append({match_json["HASH"]: match_json["Code"]})
                else:
                    raise TypeError("could not parse code, retry with manually construction")
            elif type == "report":
                if isinstance(match_json, dict) and "HASH" in match_json:
                    blocks.append({match_json["HASH"]: match_json})
                else:
                    raise TypeError("could not parse report, retry with manually construction")

        except json.JSONDecodeError:
            # If JSON parsing fails, try manually constructing a dictionary
            try:
                if type == "code":
                    hash_value = re.search(r'"HASH":\s*"([^"]+)"', match_str).group(1)
                    code_value = re.search(r'"Code":\s*"((?:\\.|[^"])*)"', match_str).group(1)
                    
                    # Unescape the code
                    code_value = code_value.replace('\\"', '"').replace('\\\\', '\\')
                    blocks.append({hash_value: code_value})
                
                elif type == "report":
                    hash_value = re.search(r'"HASH":\s*"([^"]+)"', match_str).group(1)
                    error_summary = re.search(r'"ErrorSummary":\s*"((?:\\.|[^"])*)"', match_str).group(1)
                    suggested_fix = re.search(r'"SuggestedFix":\s*"((?:\\.|[^"])*)"', match_str).group(1)
                    need_regen = re.search(r'"NeedRegenerate":\s*(true|false)', match_str).group(1)

                    # Unescape the report
                    report_value = report_value.replace('\\"', '"').replace('\\\\', '\\')
                    blocks.append({hash_value: {
                        "HASH":hash_value,
                        "ErrorSummary": error_summary,
                        "SuggestedFix": suggested_fix,
                        "NeedRegenerate": need_regen
                    }})

                elif type == "final":
                    # {{"Report": "Fill in your analysis here...", "Validity_form": true|false,"Validity_result": true|false}}
                    report_value = re.search(r'"Report":\s*"((?:\\.|[^"])*)"', match_str).group(1)
                    validity_form_value = re.search(r'"Validity_form":\s*(true|false)', match_str).group(1)
                    validity_result_value = re.search(r'"Validity_result":\s*(true|false)', match_str).group(1)

                    # Unescape the report
                    report_value = report_value.replace('\\"', '"').replace('\\\\', '\\')
                    blocks.append({
                        "Report": report_value,
                        "Validity_form": validity_form_value,
                        "Validity_result": validity_result_value,
                    })

            except Exception as e:
                print(f"Parsing failed: {e}")
                print(f"Original content: {repr(match_str)}")
                continue
    
    return blocks

转义的可能性: \\+, =\\=, \\=, 

In [2]:
model_eval1 = '```report\n{\n  "HASH": "E09B4F54",\n  "Report": "The `infFromContact/2` rule is partially implemented and contains critical issues \\+. It correctly handles the case where `X` is not susceptible (`\\+susceptible(X)`), but it does not account for the case where `X` is susceptible (which should have a higher infection probability of 0.8). \+Additionally, the rule depends on the undefined `inf/1` predicate, causing a runtime error. The rule also fails to incorporate the travel risk component specified in the requirements.",\n  "Need_regenerate": true,\n  "Dependencies": []\n}\n```'
model_eval2 = 'Here is the evaluation report for each code module in JSON format:\n\n```report\n[\n    {\n        "HASH": "99C92279",\n        "ErrorSummary": "No issues found",\n        "SuggestedFix": "None needed",\n        "Dependencies": [],\n        "NeedRegenerate": false\n    },\n    {\n        "HASH": "B6292BC0",\n        "ErrorSummary": "No issues found",\n        "SuggestedFix": "None needed",\n        "Dependencies": [],\n        "NeedRegenerate": false\n    },\n    {\n        "HASH": "22D05CCC",\n        "ErrorSummary": "The `expand` predicate fails to handle arithmetic operations correctly, causing a CallModeError when evaluating `prove(expand(double(3),6))`.",\n        "SuggestedFix": "Modify the `expand` predicate to explicitly evaluate arithmetic expressions before proving `B`. For example:\\n```\\nprove(expand(A,B)) :-\\n    expand(A, B),\\n    (number(B) -> true ; prove(B)).\\n```",\n        "Dependencies": [],\n        "NeedRegenerate": true\n    },\n    {\n        "HASH": "5AFBB985",\n        "ErrorSummary": "No issues found",\n        "SuggestedFix": "None needed",\n        "Dependencies": [],\n        "NeedRegenerate": false\n    },\n    {\n        "HASH": "62B0CEA4",\n        "ErrorSummary": "No issues found",\n        "SuggestedFix": "None needed",\n        "Dependencies": [],\n        "NeedRegenerate": false\n    }\n]\n```'
model_eval3 = """
```report
{
  "HASH": "779B4ADF",
  "Report": "The code block contains a critical syntax error due to the use of the `->` operator, which is not supported in ProbLog. This operator is used in the `cumulative_effect` predicate, causing the `goal_reached` predicate to fail. Additionally, the predicate does not fully implement the cumulative positioning behavior as intended. The logical flow is disrupted, and the predicate lacks proper handling of all cases. Recommendations include replacing the `->` operator with supported alternatives (e.g., `(Pos =:= OldPos, true) ; (Pos =\= OldPos, cumulative_effect(Prev, OldPos))`), validating all predicates against ProbLog's supported syntax, and testing incrementally to ensure correct integration.",
  "Need_regenerate": true,
  "Dependencies": []
}
```
```report
{
  "HASH": "E09B4F54",
  "Report": "The `inf/1` rules in this code block are not correctly implementing the requirements. The issues include: 1) Incorrect handling of negation (`\+ susceptible(X)`) in probabilistic contexts, 2) Probabilistic annotations (`0.6::inf(X)` and `0.8::inf(X)`) not being applied dynamically as intended, and 3) The `riskyTravel(X)` rule not being evaluated (as evidenced by test results showing only initial infection probabilities). The block needs regeneration to properly implement contact transmission (with susceptibility conditions) and travel risk probabilities.",
  "Need_regenerate": true,
  "Dependencies": []
}
```

"""
model_eval4 = '```report\n{\n  "HASH": "2D3597DC",\n  "ErrorSummary": "The `almost_equal` predicate in the `parse/2` rules causes a `CallModeError` when comparing non-ground arguments, as it attempts to use `</2` with non-ground terms.",\n  "SuggestedFix": "Modify the `almost_equal` predicate to ensure both arguments are ground before comparison. For example:\\n```prolog\\nalmost_equal(X, Y) :- \\n    ground(X), ground(Y), \\n    abs(X - Y) < 0.0001.\\nalmost_equal(X, Y) :- \\n    var(Y), \\n    Y is float(X).\\n```\\nAlso, ensure the `parse/2` rules handle the order of operations correctly if needed.",\n  "Dependencies": [],\n  "NeedRegenerate": true\n}\n```'
model_eval5 = '```report\n{\n  "HASH": "2D3597DC",\n  "ErrorSummary": "The `almost_equal` predicate in the `parse/2` rules causes a `CallModeError` due to non-ground arguments when evaluating `abs(X - Y) < 0.0001`. The second clause of `almost_equal` does not properly handle cases where `Y` is a variable.",\n  "SuggestedFix": "Modify the `almost_equal` predicate to ensure both arguments are ground before comparison. For example:\\n```prolog\\nalmost_equal(X, Y) :- \\n    ground(X), ground(Y), \\n    abs(X - Y) < 0.0001.\\nalmost_equal(X, Y) :- \\n    var(Y), \\n    Y = float(X).\\n```\\nAlso, ensure the `parse/2` rules instantiate variables before calling `almost_equal`.",\n  "Dependencies": [],\n  "NeedRegenerate": true\n}\n```'
model_eval6 = '```report\n{\n  "HASH": "EA5E8D55",\n  "ErrorSummary": "The code uses the `->` operator incorrectly in Problog, leading to an \'UnknownClause\' error. The conditional logic for coin flips is not compatible with Problog\'s probabilistic reasoning.",\n  "SuggestedFix": "Replace the conditional logic with Problog\'s probabilistic syntax. For example, rewrite the `coins_r/3` predicate as follows:\\n\\n```\\ncoins_r(SC, SC, 0).\\ncoins_r(SC, S, CNT) :-\\n    CNT > 0,\\n    CNT1 is CNT - 1,\\n    coin(C),  % Probabilistic fact\\n    SC1 is SC + 1,\\n    coins_r(SC1, S, CNT1).\\n```\\n\\nAlternatively, use Problog\'s built-in probabilistic rules to model the coin flips.",\n  "Dependencies": [],\n  "NeedRegenerate": true\n}\n```'
model_eval7 ="""
```report
<Overall_Analysis> 
The provided ProbLog code aims to simulate a rock-paper-scissors game and determine the winner based on a series of moves. The code includes predicates for defining moves, calculating results, and computing scores. However, the test results indicate a failure due to an "UnknownClause" error, suggesting a missing or incorrectly defined predicate. The overall structure of the code is logical, but the error prevents it from executing as intended.
</Overall_Analysis>

<Error_Summary> 
1. **Test Failure**: The test `query(determine_winner([rock,rock,rock],[paper,paper,scissor],W))` failed with the error: `No clauses found for ''->'/2' at 48:16.`
2. **Root Cause**: The error occurs because the `determine_winner` predicate is not properly defined or terminated. The code snippet for `determine_winner` is incomplete and lacks a proper closing parenthesis or semicolon, leading to a parsing error. Additionally, the `play/3` and `compute_score/2` predicates are correctly defined but not properly integrated into the `determine_winner` predicate.
</Error_Summary>

<Recommendations>
1. **Fix Syntax Errors**: Ensure the `determine_winner` predicate is properly defined and terminated. For example:
   ```prolog
   determine_winner(P1Moves, P2Moves, Winner) :-
       play(P1Moves, P2Moves, Results),
       compute_score(Results, Score),
       (Score > 0 -> Winner = 'Player 1';
        Score < 0 -> Winner = 'Player 2';
        Winner = 'Draw').
   ```
2. **Verify Predicate Definitions**: Double-check that all predicates (`move/1`, `beats/2`, `result/3`, `play/3`, `compute_score/2`) are correctly defined and accessible.
3. **Test Incrementally**: Test each predicate individually to ensure they work as expected before combining them in the `determine_winner` predicate.
4. **Debugging**: Use debugging tools or print statements to trace the execution flow and identify where the error occurs.
</Recommendations>
```

"""



In [4]:
print(_robust_find_block(model_eval7))

['<Overall_Analysis> \nThe provided ProbLog code aims to simulate a rock-paper-scissors game and determine the winner based on a series of moves. The code includes predicates for defining moves, calculating results, and computing scores. However, the test results indicate a failure due to an "UnknownClause" error, suggesting a missing or incorrectly defined predicate. The overall structure of the code is logical, but the error prevents it from executing as intended.\n</Overall_Analysis>\n\n<Error_Summary> \n1. **Test Failure**: The test `query(determine_winner([rock,rock,rock],[paper,paper,scissor],W))` failed with the error: `No clauses found for \'\'->\'/2\' at 48:16.`\n2. **Root Cause**: The error occurs because the `determine_winner` predicate is not properly defined or terminated. The code snippet for `determine_winner` is incomplete and lacks a proper closing parenthesis or semicolon, leading to a parsing error. Additionally, the `play/3` and `compute_score/2` predicates are corr

In [54]:
model_gnrt1 = """
Here are the extracted and formatted code blocks from the `<generated_code>` that correspond to the `<langda>` blocks in the `<origin_code>`:

```problog
{\n"HASH": "A", "Code": "prove(implies(P,Q)) :-\n    \+ prove(P); prove(Q)."}
```

```problog
{\n"HASH": "B", "Code": "=\="}
```

```problog
{"HASH": "C", "Code": "prove(opposite(P)\\= :-    \\+ prove(P)."}
```

```problog
{"HASH": "D", "Code": "prove(extend(List,Elem,Extended)) :-\n    Extended = [Elem|List]."}
```

```problog
{"HASH": "E", "Code": "expand(double(X), Y) :-\n    Y is X * 2."}
```
"""

In [53]:
model_gnrt2 = """
Here are the extracted and formatted code blocks from the generated code that correspond to the <langda> blocks in the origin code:

```problog
{"HASH": "F", "Code": "flap_position(Time,Pos) :- \n Time > 0, \n attempted_flap_position(Time,Pos), \n legal_flap_position(Pos)."}
```

```problog
{"HASH": "G", "Code": "overrun_exception(Time) :-\n Time > 0,\n attempted_flap_position(Time,Pos),\n \+ legal_flap_position(Pos)."}
```

```problog
{"HASH": "H", "Code": "goal_reached(Time) :-\n flap_position(Time,Pos),\n goal(Pos)."}
```
"""

In [45]:
model_gnrt3 = """
Here are the extracted and formatted code blocks from the `<generated_code>` that correspond to the `<langda>` blocks in the `<origin_code>`:

```problog
{"HASH": "I", "Code": "0.5::open_door(A) ; 0.5::open_door(B) :-\n    select_door(Selected),\n    member(A, [1,2,3]),\n    member(B, [1,2,3]),\n    A \\= Selected,\n    B \\= Selected,\n    A \\= B,\n    \+ prize(A),\n    \+ prize(B)."}
```

```problog
{"HASH": "J", "Code": "0.5::open_door(A) ; 0.5::open_door(B) :-    select_door(Selected),    member(A, [1,2,3]),    member(B, [1,2,3]),    A \\= Selected,    B \\= Selected,\   A \\= B,    \+ prize(A),    \+ prize(B)."}
```

```problog
{"HASH": "K", "Code": "open_door(A) :-\n    select_door(Selected),\n    member(A, [1,2,3]),\n    A \\= Selected,\n    \+ prize(A),\n    member(B, [1,2,3]),\n    B \\= Selected,\n    B \\= A,\n    prize(B)."}
```

```problog
{"HASH": "L", "Code": "win_keep :-\n    select_door(Door),\n    prize(Door)."}
```
"""

In [52]:
model_gnrt4 = """
Based on the origin_code and generated_code provided, I'll extract and format the code block that corresponds to the <Langda> section in the origin_code. Here's the completed code snippet for the infection logic:

```problog
{"HASH": "E09B4F54", "Code": "0.6::inf(X) :- contact(X,Y), inf(Y), \+susceptible(X).\n0.8::inf(X) :- contact(X,Y), inf(Y), susceptible(X).\n0.2::inf(X) :- riskyTravel(X), \+inf(X).\ninf(X) :- initialInf(X)."}
```

This code implements:
1. Contact transmission with 0.6 probability for non-susceptible individuals
2. Contact transmission with 0.8 probability for susceptible individuals
3. Additional 0.2 infection probability from risky travel
4. Initial infection probability from initialInf/1 predicate

The hash E09B4F54 matches the one specified in the <Langda> section of the origin_code.


Based on the origin_code and generated_code provided, here's the extracted and formatted code block that corresponds to the <Langda> section:

```problog
{"HASH": "E09B4F54", "Code": "0.6::inf(X) :- contact(X,Y), inf(Y), \+susceptible(X).\n0.8::inf(X) :- contact(X,Y), inf(Y), susceptiple(X).\n0.2::inf(X) :- riskyTravel(X)."}
```

This code implements:
1. Contact transmission with different probabilities based on susceptibility
2. Additional infection risk from travel
3. Matches the requirements specified in the <Langda> section
4. Uses the correct hash E09B4F54 from the origin code
"""

In [2]:
model_gnrt4 = """
Here are the extracted and formatted code blocks from the `<generated_code>` that correspond to the `<langda>` blocks in the `<origin_code>`:

```problog
{"HASH": "C5B4A895", "Code": "0.5::open_door(A) ; 0.5::open_door(B) :-\n    select_door(Selected),\n    member(A, [1,2,3]),\n    member(B, [1,2,3]),\n    A \\= B,\n    A \\= Selected,\n    B \\= Selected,\n    \+ prize(A),\n    \+ prize(B)."}
```

```problog
{"HASH": "338175EC", "Code": "open_door(A) :-\n    select_door(Selected),\n    member(A, [1,2,3]),\n    A \\= Selected,\n    \+ prize(A),\n    member(B, [1,2,3]),\n    B \\= Selected,\n    B \\= A,\n    prize(B)."}
```

```problog
{"HASH": "B73E5085", "Code": "win_keep :-\n    select_door(Door),\n    prize(Door)."}
```

Here are the extracted and formatted code blocks from the <generated_code> that correspond to the <langda> blocks in <origin_code>:

```problog
{"HASH": "592D75CA", "Code": "% Random Door Opening Rules\nopen_door(D) :-\n    select_door(S),\n    prize(P),\n    member(D, [1,2,3]),\n    \+ select_door(D),\n    \+ prize(D),\n    random_select([D1, D2], [D1, D2]),\n    (D = D1 ; D = D2)."}
```

```problog
{"HASH": "592D75CA", "Code": "% Deterministic Door Opening Rules\nopen_door(D) :-\n    select_door(S),\n    prize(P),\n    member(D, [1,2,3]),\n    \+ select_door(D),\n    (prize(D1), \+ select_door(D1), D1 \\= D, \+ prize(D) -> D = D1)."}
```

Here are the extracted and formatted code blocks from the generated code that correspond to the <langda> blocks in the origin code:

```problog
{"HASH": "B79F5C58", "Code": "flap_position(Time,Pos) :-\n Time > 0,\n attempted_flap_position(Time,Pos),\n legal_flap_position(Pos)."}
```

```problog
{"HASH": "E185A34B", "Code": "overrun_exception(Time) :-\n Time > 0,\n attempted_flap_position(Time,Pos),\n \+ legal_flap_position(Pos)."}
```

```problog
{"HASH": "779B4ADF", "Code": "goal_reached(Time) :-\n flap_position(Time,Pos),\n goal(Pos)."}
```
"""
model_gnrt5 = """Here are the extracted and formatted code blocks from the generated code that correspond to the <langda> blocks in the origin_code:

```problog
{"HASH": "BCB17653", "Code": "at(key, Pos, T) :-\n    carrying(key, T),\n    at(robot, Pos, T).\nat(key, Pos, s(T)) :-\n    at(key, Pos, T),\n    \+ carrying(key, s(T)),\n    \+ do(goto(_), T)."}
```"""
model_gnrt6 = """Based on the provided origin_code and generated_code, I'll extract and format the code block that corresponds to the <langda> section in the origin_code.

The <langda> section in origin_code contains a Code_Block with hash BCB17653, and this exact same code block appears in the generated_code (twice, but they're identical).

Here's the formatted JSON output:

```problog
{"HASH": "BCB17653", "Code": "at(key, Pos, T) :-\n    carrying(key, T),\n    at(robot, Pos, T).\nat(key, Pos, s(T)) :-\n    at(key, Pos, T),\n    \+ carrying(key, s(T)).\nat(key, Pos, 0) :-\n    initially_at(key, Pos)."}
```
Here are the extracted and formatted code blocks from the generated code that correspond to the <langda> blocks in the origin_code:

```problog
{"HASH": "BCB17653", "Code": "at(key, Pos, T) :-\n    carrying(key, T),\n    at(robot, Pos, T).\nat(key, Pos, s(T)) :-\n    at(key, Pos, T),\n    \+ carrying(key, s(T)),\n    \+ do(goto(_), T)."}
```
"""


In [4]:
import pprint
ret = _find_all_blocks("code",model_gnrt6)
print(ret)

{"HASH": "BCB17653", "Code": "at(key, Pos, T) :-
    carrying(key, T),
    at(robot, Pos, T).
at(key, Pos, s(T)) :-
    at(key, Pos, T),
    \+ carrying(key, s(T)).
at(key, Pos, 0) :-
    initially_at(key, Pos)."}

{"HASH": "BCB17653", "Code": "at(key, Pos, T) :-
    carrying(key, T),
    at(robot, Pos, T).
at(key, Pos, s(T)) :-
    at(key, Pos, T),
    \+ carrying(key, s(T)),
    \+ do(goto(_), T)."}

{"HASH": "BCB17653", "Code": "at(key, Pos, T) :-
    carrying(key, T),
    at(robot, Pos, T).
at(key, Pos, s(T)) :-
    at(key, Pos, T),
    \+ carrying(key, s(T)).
at(key, Pos, 0) :-
    initially_at(key, Pos)."}
{ }
{"HASH": "BCB17653", "Code": "at(key, Pos, T) :-
    carrying(key, T),
    at(robot, Pos, T).
at(key, Pos, s(T)) :-
    at(key, Pos, T),
    \+ carrying(key, s(T)),
    \+ do(goto(_), T)."}
{ }
[{'BCB17653': 'at(key, Pos, T) :-\n    carrying(key, T),\n    at(robot, Pos, T).\nat(key, Pos, s(T)) :-\n    at(key, Pos, T),\n    \\+ carrying(key, s(T)).\nat(key, Pos, 0) :-\n    i

In [38]:
ret[0]["BCB17653"]

'at(key, Pos, T) :-\n    carrying(key, T),\n    at(robot, Pos, T).\nat(key, Pos, s(T)) :-\n    at(key, Pos, T),\n    \\+ carrying(key, s(T)),\n    \\+ do(goto(_), T).'