In [None]:
import re
import json
import warnings

def escape_json_format_curly_braces(json_string: str) -> str:
    """
    Identifies the 'JSON_format' field inside a JSON string and escapes curly brackets within its value.
    """
    # Regular expression to find the "JSON_format" key and its string value
    # pattern = r'"JSON_format"\s*:\s*"([^"]*)"'
    # pattern = r'"JSON_format"\s*:\s*"\{[^}]+\}"'
    pattern = r'"JSON_format"\s*:\s*"\{.*?\}"'
    match = re.search(pattern, json_string)
    if match:
        content_inside_quotes = match.group(0)
        escaped_content = content_inside_quotes.replace('"JSON_format":', "").strip()[1:-1].replace("{", "{{").replace("}", "}}").replace('"', "'")
        escaped_content = '"JSON_format": "' + escaped_content + '"' 

        return re.sub(pattern, escaped_content, json_string) 
        # return escaped_content
    else:
        print("No match found.")
        return json_string

def extract_json_content(raw_response: str):
    """
    Extract and parse JSON content from LLM response with robust error handling

    Args:
        raw_response: Raw string response from the LLM

    Returns:
        Parsed JSON dict or None if unrecoverable
    """
    raw_response = raw_response.strip()
    raw_response = re.sub(r"'", '"', raw_response)
    raw_response = escape_json_format_curly_braces(raw_response)

    try:
        # try to extract the first JSON object from the raw response.
        brace_match = re.search(r"^\s*\n*(\{[\s\S]+\})\s*\n*$", raw_response, re.DOTALL)
        if brace_match:
            return json.loads(brace_match.group(1))
    except Exception:
        pass
    try:
        # Fallback: Try to extract JSON from a markdown JSON block.
        code_block_match = re.search(r"```json\s*\n(\{[\s\S]+\})\s*\n```", raw_response, re.DOTALL)
        if code_block_match:
            return json.loads(code_block_match.group(1))
    except Exception:
        pass
    try:
        # Fallback 2: Try to extract JSON from a markdown code block.
        code_block_match = re.search(r"```\s*\n(\{[\s\S]+\})\s*\n```", raw_response, re.DOTALL)
        if code_block_match:
            return code_block_match.group(1)
    except Exception:
        pass
    # Last resort: Find JSON-like structures in text
    json_candidates = re.findall(r'\{.*\}', raw_response, re.DOTALL)
    for candidate in json_candidates:
        try:
            return json.loads(candidate)
        except json.JSONDecodeError as e:
            warnings.warn(f"JSON decode error in candidate: {e}\nContent: {candidate}")
            continue

    warnings.warn("No valid JSON found in response")
    return None


In [29]:
json_str = """{
    "evaluator_specification": {
        "name": "Label Generation Evaluator",
        "definition": "Evaluates the relevance of generated labels in a structured codebook to the theme of politics.",
        "prompt_template": {
                "Context": "You have been provided with a codebook that includes labels generated from categorizing themes in transcripts. The focus of these labels is meant to be politics.",
                "Task": "Evaluate whether all the labels in the codebook are relevant to the theme of politics.",
                "Possible Scores": ["Low", "Mid", "High"],
                "JSON_format": "{'score': str, 'justification': str}"
        }
    }
}"""
extract_json_content(json_str)
json.loads(json_str)

{'evaluator_specification': {'name': 'Label Generation Evaluator',
  'definition': 'Evaluates the relevance of generated labels in a structured codebook to the theme of politics.',
  'prompt_template': {'Context': 'You have been provided with a codebook that includes labels generated from categorizing themes in transcripts. The focus of these labels is meant to be politics.',
   'Task': 'Evaluate whether all the labels in the codebook are relevant to the theme of politics.',
   'Possible Scores': ['Low', 'Mid', 'High'],
   'JSON_format': "{'score': str, 'justification': str}"}}}

In [125]:
json_str = """{
    "evaluator_specification": {
        "name": "Politics Label Evaluation",
        "definition": 'Evaluates the relevance of generated labels in the context of political themes.',
        "prompt_template": {
            "Context": 'You are given a set of labels generated for categorizing themes from transcripts. Your task is to evaluate whether these labels are centered around political themes.',
            "Task": 'Determine the relevance of each label in relation to political themes and categorize the overall result.',
            "Possible Scores": ['Excellent', 'Good', 'Fair', 'Poor'],
            "JSON_format": '{ 'labels': [ {'label': 'string', 'score': 'string'} ] }'
        }
    }
}
"""
            # "JSON_format": '{ "labels": "string" }'
extract_json_content(json_str)

{'evaluator_specification': {'name': 'Politics Label Evaluation',
  'definition': 'Evaluates the relevance of generated labels in the context of political themes.',
  'prompt_template': {'Context': 'You are given a set of labels generated for categorizing themes from transcripts. Your task is to evaluate whether these labels are centered around political themes.',
   'Task': 'Determine the relevance of each label in relation to political themes and categorize the overall result.',
   'Possible Scores': ['Excellent', 'Good', 'Fair', 'Poor'],
   'JSON_format': "{{ 'labels': [ {{'label': 'string', 'score': 'string'}} ] }}"}}}

In [120]:
text = '''
"prompt_template": {
    "Context": "You have been provided with a codebook that includes labels generated from categorizing themes in transcripts. The focus of these labels is meant to be politics.",
    "Task": "Evaluate whether all the labels in the codebook are relevant to the theme of politics.",
    "Possible Scores": ["Low", "Mid", "High"],
    "JSON_format": "{'score': str, 'justification': str}"
}
'''
text = '''"JSON_format": "{ "labels": "string" }"'''
text = '''"JSON_format": "{ "labels": [ {"label": "string", "score": "string"} ] }"'''
# pattern = r'"JSON_format"\s*:\s*"\{[^}]+\}"'
pattern = r'"JSON_format"\s*:\s*"\{.*?\}"'

match = re.search(pattern, text)

# Print result
if match:
    print("Match found:", match.group(0))
else:
    print("No match found.")
re.sub(pattern, escape_curly_braces, json_string)

Match found: "JSON_format": "{ "labels": [ {"label": "string", "score": "string"} ] }"


NameError: name 'escape_curly_braces' is not defined