In [1]:
import json

from agent.models.issue import Issue
from agent.models.mcphost import MCPHost
from agent.models.input import QueryFormatterInput

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
def get_issues(jsonl_path) -> list[Issue]:
    issues = []
    with open(jsonl_path, "r", encoding="utf-8") as f:
        for line in f:
            try:
                sample = json.loads(line)
        
                issue_repo = sample.get("repo")
                repo_url = "https://github.com/" + issue_repo
                
                base_commit = sample.get("base_commit")
                issue_description = sample.get("problem_statement")
                if issue_description:
                    issues.append(Issue(title=f"{issue_repo} {base_commit}", description=issue_description, repo_url=repo_url))
            except json.JSONDecodeError:
                continue
    return issues

In [8]:
issues =  get_issues("./eval/swe_bench/data/swe_bench_lite_test.jsonl")

In [9]:
lenght_issues = len(issues)

In [10]:
lenght_issues

300

In [11]:
final_outputs = []

In [6]:
len(final_outputs)

0

In [15]:
final_list = issues[38:]

for idx, issue in enumerate(final_list):
    print(f"Issue ({idx +1}/{len(final_list)}): {issue.title}")
    host = MCPHost()

    data = host.run_workflow("issue_resolution", initial_input=QueryFormatterInput(issue))
    final_outputs.append(data.fixed_code)
    print("Done!\n\n")

Issue (1/262): django/django 895f28f9cbed817c00ab68770433170d83132d90
Done!


Issue (2/262): django/django d51c50d836c5cf8db5566da17963f871be554615
Done!


Issue (3/262): django/django 447980e72ac01da1594dd3373a03ba40b7ee6f80
Done!


Issue (4/262): django/django c86201b6ed4f8256b0a0520c08aa674f623d4127
Done!


Issue (5/262): django/django 8328811f048fed0dd22573224def8c65410c9f2e
Done!


Issue (6/262): django/django 49ae7ce50a874f8a04cd910882fb9571ff3a0d7a
Done!


Issue (7/262): django/django 4652f1f0aa459a7b980441d629648707c32e36bf
Done!


Issue (8/262): django/django 3bc4240d979812bd11365ede04c028ea13fdc8c6
Done!


Issue (9/262): django/django 78ad4b4b0201003792bfdbf1a7781cbc9ee03539
Done!


Issue (10/262): django/django a59de6e89e8dc1f3e71c9a5a5bbceb373ea5247e
Done!


Issue (11/262): django/django 7af8f4127397279d19ef7c7899e93018274e2f9b
Done!


Issue (12/262): django/django 16218c20606d8cd89c5393970c83da04598a3e04
Done!


Issue (13/262): django/django 184a6eebb0ef56d5f1b1315a8e66683

BadRequestError: Error code: 400 - {'type': 'error', 'error': {'type': 'invalid_request_error', 'message': 'Your credit balance is too low to access the Anthropic API. Please go to Plans & Billing to upgrade or purchase credits.'}}

In [16]:
with open("predictions_agent_sonnet.json", "w") as f:
    json.dump(final_outputs, f, indent=2)

In [17]:
len(final_outputs)

150

In [18]:
final_outputs[0]

'Looking at the issue, the problem occurs when nested CompoundModels are processed. The issue is in the `_cstack` function which handles the \'&\' operator. When a CompoundModel that itself contains multiple models is passed as left or right, the function doesn\'t properly handle the coordinate matrix dimensions.\n\nThe problem is that when we have a nested compound model like `m.Pix2Sky_TAN() & (m.Linear1D(10) & m.Linear1D(5))`, the right side is a CompoundModel with 2 outputs and 2 inputs, but `_cstack` treats it as a simple model and doesn\'t account for the fact that it should preserve the separability structure of the nested compound.\n\nHere\'s the fix:\n\n```python\n# Licensed under a 3-clause BSD style license - see LICENSE.rst\n\n"""\nFunctions to determine if a model is separable, i.e.\nif the model outputs are independent.\n\nIt analyzes ``n_inputs``, ``n_outputs`` and the operators\nin a compound model by stepping through the transforms\nand creating a ``coord_matrix`` of s

In [21]:
def format_predictions_for_swebench(final_outputs):
    """
    Convierte una lista de predicciones al formato requerido por SWE-bench.

    :param final_outputs: lista de dicts con claves como 'instance_id' y 'patch' o similar
    :param model_name: nombre del modelo que se usará como valor de "model"
    :return: lista de predicciones formateadas
    """
    formatted = []
    
    with open("./eval/swe_bench/data/swe_bench_lite_test.jsonl", "r", encoding="utf-8") as f:
        for idx, line in enumerate(f):
            if idx >= 150:
                break
            try:
                sample = json.loads(line)
        
                instance_id = sample.get("instance_id")
                formatted.append({
                    "instance_id": instance_id,
                    "model": "claude-3-5-haiku-20241022",
                    "prediction": final_outputs[idx]  # o 'model_patch' si ya viene así
                })
            except json.JSONDecodeError:
                continue
 
    return formatted

In [22]:
final_list = format_predictions_for_swebench(final_outputs)

In [23]:
len(final_list)

150

In [24]:
with open("swe_bench_predictions_sonnet.jsonl", "w", encoding="utf-8") as f:
    for item in final_list:
        json.dump(item, f)
        f.write("\n")

In [None]:
# import json

# input_path = "swe_bench_predictions.jsonl"
# output_path = "swe_bench_predictions_cleaned.jsonl"

# with open(input_path, "r", encoding="utf-8") as infile, open(output_path, "w", encoding="utf-8") as outfile:
#     for line in infile:
#         line = line.strip()
#         if not line:
#             continue  # Saltar líneas vacías
#         try:
#             json_obj = json.loads(line)
#             json.dump(json_obj, outfile)
#             outfile.write("\n")
#         except json.JSONDecodeError:
#             print("❌ Línea inválida:", line[:100])  # O puedes hacer logging