## Analyze result

In [None]:
import os

from utils import apply_diff, display_side_by_side_diff, extract_code_from_response, load_from_jsonl


def analyze_result(example, mode):
    """
    Analyze a single result example and print human-readable differences.

    Args:
        example: One result example
        mode: Either "find_replace" or "fully_rewrite"
    """
    print(f"\n{'=' * 60}")
    print(f"Example ID: {example.get('id', 'unknown')} | Language: {example.get('language', 'unknown')}")
    print(f"{'=' * 60}")

    ground_truth = example["ground_truth"]
    model_response = example["model_response"]

    if mode == "find_replace":
        original_code = example["original_code"]
        success, model_code = apply_diff(original_code, model_response)

        if not success:
            print("❌ DIFF APPLICATION FAILED")
            return

        print("✅ Diff applied successfully")
    else:  # fully_rewrite
        model_code = extract_code_from_response(model_response)

    # Check if they match
    exact_match = model_code == ground_truth
    print(f"Exact Match: {'✅ YES' if exact_match else '❌ NO'}")

    if exact_match:
        print("Perfect match! No differences to show.")
        return

    # Show the differences
    display_side_by_side_diff(ground_truth, model_code)


# Example usage:
if __name__ == "__main__":
    # Load results
    res_file_name = "fully_rewrite_gpt-5_results.jsonl"
    mode = "find_replace" if "find_replace" in res_file_name else "fully_rewrite"
    file_path = os.path.join("./results", res_file_name)
    results = load_from_jsonl(file_path)

    def find_random_failed_case():
        import random

        random_id = random.randint(0, len(results) - 1)
        while results[random_id]["label"]:
            random_id = random.randint(0, len(results) - 1)
        return random_id

    random_id = find_random_failed_case()
    analyze_result(results[random_id], mode)


Example ID: 260 | Language: python
Exact Match: ❌ NO

SIDE-BY-SIDE COMPARISON
Ground Truth                                                                                                                | Model Output                                                                                                               
--------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------
from __future__ import annotations                                                                                          | from __future__ import annotations                                                                                         
                                                                                                                            |                                              

# Case study


In [9]:
import json

# with open("benchmarks/fast_editing_benchmark_v0.jsonl", "r") as f:
#     for line in f:
#         data = json.loads(line)
#         print(data["user_prompt"])
#         break

print("=" * 100)

with open("benchmarks/fast_editing_benchmark_v2.jsonl", "r") as f:
    for line in f:
        data = json.loads(line)
        print(data["user_prompt"])
        break

print("=" * 100)


You are an expert code editing assistant. Your task is to modify the provided code according to the edit query using search-and-replace blocks.

## Format

Use the following syntax to specify edits:

<SEARCH>
exact original code snippet to find (leave empty for full rewrite)
</SEARCH>
<REPLACE>
replacement code snippet
</REPLACE>

### Rules
1. **Exact Matching**: The code in the `<SEARCH>` block must match the original code exactly, including all whitespace, indentation, and line breaks.
2. **Unique Matches**: Each `<SEARCH>` block must be unique. If similar text appears multiple times, add surrounding context to make it specific.
3. **Multiple Changes**: Use separate `<SEARCH>`/`<REPLACE>` block pairs for multiple changes.
4. **Full Rewrites**: To provide a complete rewrite of the entire code, use an empty `<SEARCH>` block followed by a `<REPLACE>` block containing the full new code:

<SEARCH>
</SEARCH>
<REPLACE>
entire rewritten code here
</REPLACE>

5. **No Extra Content**: Do not i

In [None]:
import json

with open("/mnt/local/yikai/slime/rl_data/results/gpt-41_fast_edit_v0_results_graded.jsonl", "r") as f:
    data = [json.loads(line) for line in f]


for item in data:
    if not item["format_success"]:
        print(item["model_response"])

In [3]:
# Launch server:
# python3 -m sglang.launch_server --model Qwen/Qwen3-8B --reasoning-parser qwen3

from openai import OpenAI
import json

with open("./benchmarks/fast_editing_benchmark_v1.jsonl", "r") as f:
    data = [json.loads(line) for line in f]

item = data[0]
system_prompt = item["system_prompt"]
user_prompt = item["user_prompt"]
ground_truth = item["ground_truth"]
original_code = item["original_code"]
language = item["metadata"]["language"]

port = 30000

client = OpenAI(
    api_key="",
    base_url=f"http://127.0.0.1:{port}/v1",
)

model = "Qwen/Qwen3-8B"
messages = []
if system_prompt:
    messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": user_prompt})

response = client.chat.completions.create(
    model=model,
    messages=messages,
    max_completion_tokens=16384,
    extra_body={
        "chat_template_kwargs": {"enable_thinking": True},
        "separate_reasoning": False
    }
)

print("Answer:", response.choices[0].message.content)

Answer: <think>


Looking at the original code, there's a section where the LocalInstallerArguments are built. Then, there's a Write-Verbose line that says "Installing $ToolName version $ToolVersion". But the user wants the installer arguments to be properly formatted for output. So maybe the current way of writing the verbose message isn't showing the arguments correctly. 

In the code, after building the LocalInstallerArguments, there's a Write-Verbose "Installing $ToolName version $ToolVersion" followed by executing the installer. The user probably wants the arguments to be displayed in the verbose output. So perhaps the current line isn't showing all the arguments. 

So the first part of the edit is to adjust the verbose command. The SEARCH block should be the exact line where the verbose message is written. The REPLACE should format the arguments. For example, maybe using the arguments in a more structured way, like listing them. But how are the arguments passed? The installerPath