In [None]:
import contextlib
from io import StringIO
import re
import subprocess
import json
from tqdm import tqdm

input_path = "/home/nfs02/laizj/experiment/uncertainty_analysis/analysis_unknown/results/qwen72b-generate_template_and_code_gsm8k-train.json"
output_path = "/home/nfs02/laizj/experiment/uncertainty_analysis/analysis_unknown/results/qwen72b-generate_template_and_code_gsm8k-train_parsed.json"

success = 0
template_generation_fault_count = 0 # 无法生成模板
template_generation_mistake_count = 0 # 生成的模板与原始问题不匹配
python_generation_fault_count = 0 # 无法生成 Python 代码
python_run_fault_count = 0 # 无法运行的 Python 代码
python_run_mistake_count = 0 # Python 代码运行结果错误
template_python_not_algined_count = 0 # 模板中的变量与 Python 代码中的变量不对齐

# def runcode(code):
#     """执行 Python 代码并返回输出"""
#     try:
#         result = subprocess.run(['python', '-c', code], capture_output=True, text=True)
#         output = float(result.stdout.strip())
#     except Exception as e:
#         return None
#     return output

def runcode(code):
    """执行 Python 代码并返回输出"""
    output = ""
    try:
        # 捕获标准输出
        with StringIO() as buf, contextlib.redirect_stdout(buf):
            exec(code)
            output = float(buf.getvalue().strip())
    except Exception as e:
        return None
    return output

def extract_content(item):
    
    global success, python_run_fault_count, python_run_mistake_count, python_generation_fault_count, template_generation_fault_count, template_generation_mistake_count, template_python_not_algined_count
    
    generation = item["generated_texts"][0]
    # print(generation)
    # 提取第一次出现的### template后的内容
    template_match = re.search(r'### Template:(.*?)(?=###|$)', generation, re.DOTALL | re.IGNORECASE)
    template_content = template_match.group(1).strip() if template_match else None
    
    # 提取### python代码块中的代码
    python_code_match = re.search(r'### Python Code:\s*```(?:python)?\s*(.*?)\s*```', generation, re.DOTALL | re.IGNORECASE)
    python_code = python_code_match.group(1).strip() if python_code_match else None
    
    if python_code is None:
        python_run_fault_count += 1
    elif template_content is None:
        template_generation_fault_count += 1 
    elif abs(template_content.count(' ') - item["query"].count(' ')) / item["query"].count(' ') > 0.2:
        template_generation_mistake_count += 1
    else:
        python_result = runcode(python_code)
        if python_result is None:
            python_run_fault_count += 1
        elif abs(python_result - item["answer"]) > 1e-2:
            python_run_mistake_count += 1
        else:
            variables = re.findall(r'<([^>]+?)>', template_content)
            for var in variables:
                pattern = r'\b' + re.escape(var) + r'\s*?='
                if re.search(pattern, python_code) is None:
                    template_python_not_algined_count += 1
                    return False
            else:
                success += 1
                item["template"] = template_content
                item["python"] = python_code
            return True
        
    return False

with open(input_path, "r") as f:
    data = json.load(f)
    results = []
    for item in tqdm(data):
        if (extract_content(item)):
            results.append(item)
    with open(output_path, "w") as f:
        json.dump(results, f, ensure_ascii=False, indent=4)
        
    print(f"成功生成：{success}")
    print(f"无法生成模板：{template_generation_fault_count}")
    print(f"生成的模板与原始问题不匹配：{template_generation_mistake_count}")
    print(f"无法生成 Python 代码：{python_generation_fault_count}")
    print(f"无法运行的 Python 代码：{python_run_fault_count}")
    print(f"Python 代码运行结果错误：{python_run_mistake_count}")
    print(f"模板中的变量与 Python 代码中的变量不对齐：{template_python_not_algined_count}")
    total = success + python_run_fault_count + python_run_mistake_count + python_generation_fault_count + template_generation_fault_count + template_generation_mistake_count + template_python_not_algined_count
    print(f"{total=}")


100%|██████████| 7473/7473 [00:02<00:00, 3369.94it/s]


成功生成：7219
无法生成模板：0
生成的模板与原始问题不匹配：33
无法生成 Python 代码：0
无法运行的 Python 代码：39
Python 代码运行结果错误：87
模板中的变量与 Python 代码中的变量不对齐：95
total=7473


In [1]:
import json
import os
import re
import random
import ast
import string
import subprocess
from logging import exception
from tqdm.contrib.concurrent import process_map
from functools import partial
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("/home/nfs05/laizj/model/models--Qwen--Qwen2.5-Math-7B-Instruct/snapshots/ef9926d75ab1d54532f6a30dd5e760355eb9aa4d")
input_path = "/home/nfs02/laizj/experiment/uncertainty_analysis/analysis_unknown/results/qwen72b-generate_template_and_code_gsm8k-train_parsed.json"
output_path = "/home/nfs02/laizj/experiment/uncertainty_analysis/analysis_unknown/data/synthetic_data/qwen72b-generate_template_and_code_gsm8k-train_disturbed.json"

def is_positive_integer(value):
    try:
        return float(value).is_integer() and float(value) > 0
    except ValueError:
        return False

def randomize_value(original_value, max_fluct=1.0):
    """
    Returns a value randomly fluctuated within ±(max_fluct * original_value).
    If original_value is int, the result is rounded back to int.
    """

    lower_bound = original_value * (1 - max_fluct)
    if original_value > 0:
        lower_bound = max(0, lower_bound)
    upper_bound = original_value * (1 + max_fluct)
    if original_value < 0:
        upper_bound = min(0, upper_bound)
    if isinstance(original_value, float) and 0 < original_value < 1:
        lower_bound = max(0, lower_bound)
        upper_bound = min(1, upper_bound)

    if random.random() < 0.5:
        for _ in range(100):
            new_value = random.uniform(lower_bound, upper_bound)
            if new_value != original_value:
                break
            
            max_fluct += 0.1
            lower_bound = original_value * (1 - max_fluct)
            if original_value > 0:
                lower_bound = max(0, lower_bound)
            upper_bound = original_value * (1 + max_fluct)
            if original_value < 0:
                upper_bound = min(0, upper_bound)
            if isinstance(original_value, float) and 0 < original_value < 1:
                lower_bound = max(0, lower_bound)
                upper_bound = min(1, upper_bound)
    else:
        new_value = random.uniform(lower_bound, upper_bound)
    if isinstance(original_value, int):
        return int(round(new_value))
    return round(new_value, 2)


def randomize_code(original_code: str, original_query=None):

    # Split the code by lines
    lines = original_code.split('\n')

    # We’ll collect lines until we hit the first consecutive blank line
    # (i.e., an empty line).
    variable_lines = []

    for i, line in enumerate(lines):
        if variable_lines == [] and line.strip() == "":
            continue
        # Detect if the line is empty
        if line.strip() == "":
            # This is the first consecutive newline => stop collecting variable lines
            break
        else:
            variable_lines.append(line)

    # Use a regex to match lines of the form: name = number
    pattern = re.compile(r'^(\s*\w+)\s*=\s*([\d.]+)(.*)$')
    
    for _ in range(100):
        new_variable_lines = []
        replaced_variables = []
        for idx, line in enumerate(variable_lines):
            match = pattern.match(line)
            if match:
                # Extract the variable name, the original numeric value, and any trailing spaces
                prefix = match.group(1)  # e.g. "variables_a"
                original_value_str = match.group(2)  # e.g. "150"
                suffix = match.group(3)  # trailing spaces if any

                # Determine if it’s int or float
                if '.' in original_value_str:
                    original_value = float(original_value_str)
                else:
                    original_value = int(original_value_str)

                # Randomize
                new_val = randomize_value(original_value, max_fluct=0.5)
                replaced_variables.append((prefix, new_val))

                # Convert back to string
                if isinstance(original_value, float):
                    new_value_str = str(new_val)
                else:
                    new_value_str = str(new_val)

                # Rebuild the line
                new_variable_lines.append(prefix + " = " + new_value_str + suffix)
            else:
                new_variable_lines.append(line)

        final_code = '\n'.join(new_variable_lines) + '\n' + '\n'.join(lines[i:])
        try:
            result = subprocess.run(['python', '-c', final_code], capture_output=True, text=True)
            if result.returncode == 0 and is_positive_integer(result.stdout.strip()):
                if original_query:
                    for var, new_value in replaced_variables:
                        original_query = original_query.replace(f"<{var}>", str(new_value))
                    return {
                        "new_query": original_query, 
                        "new_code": final_code, 
                        "new_ans": float(result.stdout.strip())
                        }
                else:
                    return final_code
            else:
                # raise exception("The code is not valid.")
                pass
        except:
            pass
    return None

prompt = """### Instruction:
{}

### Response: 
Let’s think step by step."""

# messages = [
#     {"role": "system", "content": "Below is an instruction that describes a task. Write a response that appropriately completes the request. Output each step in a separate line, and explicitly state the final answer after the final step following the format \"The answer is\""},
#     {"role": "user", "content": prompt}
# ]


def process_item(item, tokenizer):
    r = randomize_code(item["python"], item["query"])
    if r:
        messages = [
            {"role": "system", "content": "Below is an instruction that describes a task. Write a response that appropriately completes the request. Output each step in a separate line, and explicitly state the final answer after the final step following the format \"The answer is\""},
            {"role": "user", "content": r["new_query"]} # type: ignore
        ]

        item["prompt"] = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True)
        # 将r的内容均添加到item中
        item.update(r)
        return item
    return None

with open(input_path, "r") as f:
    data = json.load(f)

# 使用偏函数固定tokenizer参数
process_func = partial(process_item, tokenizer=tokenizer)

# 多进程处理
results = process_map(
    process_func,
    data,
    max_workers=32,
    chunksize=20,
)

# 过滤掉None结果
results = [r for r in results if r is not None]

with open(output_path, "w") as f:
    json.dump(results, f, ensure_ascii=False, indent=4)
    
print(f"生成数量：{len(results)}")

  from .autonotebook import tqdm as notebook_tqdm
100%|██████████| 7219/7219 [02:09<00:00, 55.73it/s] 


生成数量：6737
