In [1]:
import json

from datasets import Dataset


def get_openai_datapoint(llm_completions):
    last_msg = llm_completions[-1]
    messages = last_msg['messages']
    flattened_messages = []
    for message in messages:
        for content in message['content']:
            flattened_messages.append(
                {'role': message['role'], 'content': content['text']}
            )
    flattened_messages.append(
        {
            'role': 'assistant',
            'content': last_msg['response']['choices'][0]['message']['content'],
        }
    )
    return flattened_messages


def to_sft_data(output_root):
    output_eval_path = output_root + '/output.swebench_eval.jsonl'
    output_path = output_root + '/output.jsonl'
    eval_result = {}
    for line in open(output_eval_path):
        this_eval = json.loads(line)
        instance_id = this_eval['instance_id']
        is_resolved = this_eval.get('test_result', {}).get('report', {}).get('resolved')
        eval_result[instance_id] = is_resolved
    trajectories = {}
    for line in open(output_path):
        dp = json.loads(line)
        instance_id = dp['instance_id']
        trajectories[instance_id] = get_openai_datapoint(dp['llm_completions'])
    hf_data_dict = {'instance_id': [], 'conversations': [], 'resolved': []}
    for instance_id, is_success in eval_result.items():
        hf_data_dict['instance_id'].append(instance_id)
        hf_data_dict['conversations'].append(trajectories[instance_id])
        hf_data_dict['resolved'].append(is_success)
    hf_dataset = Dataset.from_dict(hf_data_dict)
    return hf_dataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Debug
# import json
# D = []
# with open("/home/jiayidotpan/code/OpenHands/evaluation/evaluation_outputs/outputs/swe-train__swe-train-dev-v1-train.v2.success/CodeActAgent/gpt-4o-mini_maxiter_30_N_v1.9-no-hint-run_1/output.jsonl") as f:
#     for line in f:
#         D.append(json.loads(line))
# idx = 520
# last_msg = D[idx]['llm_completions'][-1]['messages'][-1]
# print(last_msg['role'], '\n', last_msg['content'][0]['text'][:50])
# print(D[idx]['llm_completions'][-1]['response']['choices'][0]['message'])

In [2]:
output_root = '/home/jiayidotpan/code/OpenHands/evaluation/evaluation_outputs/outputs/swe-train__swe-train-dev-v1-train.v2.success/CodeActAgent/gpt-4o-mini_maxiter_30_N_v1.9-no-hint-run_1'
hf_dataset = to_sft_data(output_root)

In [None]:
# hf_dataset[0]
hf_dataset.push_to_hub(
    'swe-train/sft-data-with-labels', split='oct22.swetrainv2.4omini.run1', private=True
)

In [None]:
from datasets import load_dataset

D = load_dataset('swe-train/sft-data-with-labels')

In [None]:
D['oct22.swetrainv2.4omini.run1'][-231]

In [None]:
for run in range(1, 6):
    output_root = f'/home/jiayidotpan/code/OpenHands/evaluation/evaluation_outputs/outputs/swe-train__swe-train-dev-v1-lite-train.v1.success/CodeActAgent/deepseek-coder_maxiter_30_N_v1.9-no-hint-run_{run}'
    hf_dataset = to_sft_data(output_root)
    # hf_dataset.push_to_hub("swe-train/sft-data-with-full-labels", split=f"oct20.swetrainlite.ds25.run{run}", private=True)