In [26]:
import os
import json
import subprocess
from dotenv import load_dotenv
from openai import OpenAI

# ============================================================
# 1. 加载 API Key & 初始化 LLM Client
# ============================================================
load_dotenv()
client = OpenAI(
    api_key=os.getenv("DASHSCOPE_API_KEY"),
    base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"
)

# ============================================================
# 2. 插桩 system_prompt（严格结构化 JSON 输出）
# ============================================================
INSTRUMENT_SYSTEM_PROMPT = """
你是一个严格结构化输出的 C 代码插桩工具。  
你的任务是：对用户提供的 C 源代码中的所有控制结构内部（if、else、else if、for、while、do-while）进行 fprintf(stderr, ...) 插桩。

===================【插桩规范】===================

1. block 定义：
   每个控制结构视为一个 block。
   每个函数内每个 block 必须分配一个递增的 blockID（1 开始）。

2. 插桩内容：
   block 内部首行前插入：
       fprintf(stderr, "[src/{file}] enter {fnName} {blockID}\\n");
   block 内部尾行前插入注释：
       // fprintf(stderr, "[src/{file}] exit {fnName} {blockID}\\n");

3. 保留原代码缩进、注释、所有代码。

4. 对每个 block 输出：
   filePath、fnName、blockID、startLine、endLine。

===================【输出格式（严格遵守）】===================

你必须输出一个 JSON 对象：

{
  "instrumented_code": "完整插桩 C 代码字符串",
  "mapping": [
      {
        "file": "count.c",
        "function": "main",
        "block_id": 1,
        "start_line": 27,
        "end_line": 29
      }
  ]
}

要求：
- 顶层必须是 JSON（无 Markdown、无解释、无 ```）
- 字符串中保持可编译格式
================================================

请对用户提供的 C 代码进行插桩，并按以上 JSON 格式输出。
"""

# ============================================================
# 3. 读取源代码
# ============================================================
SOURCE_FILE = "count.c"
with open(SOURCE_FILE, "r") as f:
    source_code = f.read()

# ============================================================
# 4. 调用 LLM 完成插桩
# ============================================================
resp = client.chat.completions.create(
    model="qwen3-coder-plus",
    messages=[
        {"role": "system", "content": INSTRUMENT_SYSTEM_PROMPT},
        {"role": "user", "content": source_code},
    ],
    temperature=0
)

instrument_result = resp.choices[0].message.content
instrument_json = json.loads(instrument_result)

instrumented_code = instrument_json["instrumented_code"]
mapping = instrument_json["mapping"]

# 写入文件
INSTR_FILE = "instr_count.c"
with open(INSTR_FILE, "w") as f:
    f.write(instrumented_code)

print("[+] 插桩完成，输出文件:", INSTR_FILE)

[+] 插桩完成，输出文件: instr_count.c


In [27]:
# ============================================================
# 5. 编译插桩后的代码
# ============================================================
BIN_PATH = "./instr_count"

subprocess.run(["gcc", "-o", BIN_PATH, INSTR_FILE], check=True)
print("[+] 编译成功:", BIN_PATH)

[+] 编译成功: ./instr_count


In [28]:
# ============================================================
# 6. 使用测试输入运行
# ============================================================
TEST_ARGS = ["1.00", "1.00001"]

run = subprocess.Popen(
    [BIN_PATH] + TEST_ARGS,
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE,
    text=True
)
stdout, stderr = run.communicate()

print("[+] 程序 stderr 输出：")
print(stderr)

[+] 程序 stderr 输出：
[src/count.c] enter main 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[src/count.c] enter count 1
[sr

In [56]:
# ============================================================
# 7. 执行抽象（EA）生成 Prompt（结构化 JSON）
# ============================================================
EA_SYSTEM_PROMPT = """
你是执行抽象（Execution Abstraction, EA）生成工具。

输入内容包括：
1. instrumented_code（插桩源码）
2. mapping（block 映射）
3. execution_log（stderr 打印）

任务：
- 根据 execution_log 中的 "enter"/"exit" 生成函数调用链
- 删除未执行 block（用注释替代），但保留其条件
- 输出执行抽象 EA，必须为 JSON：

{
  "call_chain": ["[src/count.c] main", "[src/count.c] count"],
  "executed_files": {
      "count.c": {
         "total_lines": 31,
         "content": "...删除未执行 block 后的代码..."
      }
  }
}

要求：
- JSON 顶层
- 严格无 Markdown、无多余文本
"""

ea_input = {
    "instrumented_code": instrumented_code,
    "mapping": mapping,
    "execution_log": stderr
}

resp2 = client.chat.completions.create(
    model="qwen3-coder-plus",
    messages=[
        {"role": "system", "content": EA_SYSTEM_PROMPT},
        {"role": "user", "content": json.dumps(ea_input, ensure_ascii=False)},
    ],
    temperature=0
)

ea_json = json.loads(resp2.choices[0].message.content)
print("[+] EA 生成成功")

[+] EA 生成成功


In [57]:
# ============================================================
# 8. 生成“到达未覆盖分支的自然语言约束”
# ============================================================
CONSTRAINT_SYSTEM_PROMPT = """
你是分支覆盖约束生成器。

输入：EA（执行抽象）
任务：
- 找出未覆盖的 block
- 用自然语言写出如何到达这些 block 的约束条件
- 输出 JSON：
{
  "constraints": [
     {"file": "...", "function": "...", "block_id": X, "condition": "自然语言约束"}
  ]
}
"""

resp3 = client.chat.completions.create(
    model="qwen3-coder-plus",
    messages=[
        {"role": "system", "content": CONSTRAINT_SYSTEM_PROMPT},
        {"role": "user", "content": json.dumps(ea_json, ensure_ascii=False)},
    ],
    temperature=0,
)

print(resp3.choices[0].message.content)
constraint_json = json.loads(resp3.choices[0].message.content.replace("```json", "").replace("```", ""))
print("[+] 未覆盖分支约束生成成功")
print(json.dumps(constraint_json, indent=2, ensure_ascii=False))

```json
{
  "constraints": [
    {
      "file": "src/count.c",
      "function": "count",
      "block_id": 2,
      "condition": "需要让for循环正常结束，即start和end参数需要相等，这样cur != end条件一开始就不成立，直接跳过循环体"
    }
  ]
}
```
[+] 未覆盖分支约束生成成功
{
  "constraints": [
    {
      "file": "src/count.c",
      "function": "count",
      "block_id": 2,
      "condition": "需要让for循环正常结束，即start和end参数需要相等，这样cur != end条件一开始就不成立，直接跳过循环体"
    }
  ]
}


In [58]:
# ============================================================
# 9. 根据约束生成新的测试输入
# ============================================================
INPUT_GEN_SYSTEM_PROMPT = """
你是测试输入生成器。

输入：自然语言约束 JSON
任务：
- 推导满足条件的一组新的测试输入 argv[1], argv[2]
- 输出 JSON：
{
  "new_inputs": ["1.23", "2.34"]
}
"""

resp4 = client.chat.completions.create(
    model="qwen3-coder-plus",
    messages=[
        {"role": "system", "content": INPUT_GEN_SYSTEM_PROMPT},
        {"role": "user", "content": json.dumps(constraint_json, ensure_ascii=False)},
    ],
    temperature=0,
)

new_inputs_json = json.loads(resp4.choices[0].message.content.replace("```json", "").replace("```", ""))
print("[+] 新测试输入生成成功：")
print(new_inputs_json)

[+] 新测试输入生成成功：
{'new_inputs': ['5.0', '5.0']}
