In [1]:
import json  
  
def read_jsonl(file_path):  
    """  
    读取 JSONL 文件中的数据并返回一个包含所有记录的字典列表。  
  
    参数:  
    file_path (str): JSONL 文件的路径。  
  
    返回:  
    list: 包含所有记录的字典列表。  
    """  
    data = []  
  
    with open(file_path, 'r', encoding='utf-8') as file:  
        for line in file:  
            # 解析每一行的 JSON 对象并添加到列表中  
            data.append(json.loads(line.strip()))  
  
    return data  

In [2]:
import json  
import os  
import pandas as pd  
  
def calculate(data):  
    good_idx = []  
    same_idx = []  
    loss_idx = []  
    result_label = []  
    for idx, item in enumerate(data[:50]):  
        score_map_generall = {  
            "A>>B": 3,  
            "A>B": 3,  
            "A=B": 2,  
            "B>A": 1,  
            "B>>A": 1,  
            None: 0  
        }  
        score_map_generall2 = {  
            "A>>B": 1,  
            "A>B": 1,  
            "A=B": 2,  
            "B>A": 3,  
            "B>>A": 3,  
            None: 0  
        }  
        score_1 = score_map_generall[item["score_1"]]  
        score_2 = score_map_generall2[item["score_2"]]  
        if score_1 != score_2:  
            same_idx.append(idx)  
            result_label.append("T")  
        elif score_1 == 3:  
            good_idx.append(idx)  
            result_label.append("W")  
        elif score_2 == 1:  
            loss_idx.append(idx)  
            result_label.append("L")  
        else:  
            same_idx.append(idx)  
            result_label.append("T")  
    result = {  
        "good_idx": len(good_idx),  
        "same_idx": len(same_idx),  
        "loss_idx": len(loss_idx),  
        "sum": len(good_idx) + len(same_idx) + len(loss_idx)  
    }  
    return result, result_label  
  
def cal_text(data):  
    model_name = data[0]["model_id_2"]  
    judge_data = read_jsonl(f"data/arena-ta/model_judgment/gpt-4o_text/{model_name}.jsonl")  
    judge_dict = {item['question_id']: [item['games'][0]['score'], item['games'][0]['judgment'], item['games'][1]['score'], item['games'][1]['judgment']] for item in judge_data}  
    res = []  
    result_label = []  
    for idx, item in enumerate(data):  
        score_1 = judge_dict[item["question_id"]][0]  
        judge_1 = judge_dict[item["question_id"]][1]  
        score_2 = judge_dict[item["question_id"]][2]  
        judge_2 = judge_dict[item["question_id"]][3]  
        item["text_score1"] = score_1  
        item["text_judge1"] = judge_1  
        item["text_score2"] = score_2  
        item["text_judge2"] = judge_2  
        good_idx = []  
        same_idx = []  
        loss_idx = []  
        res.append(item)  
    for idx, item in enumerate(res[:50]):  
        score_map_generall = {  
            "A>>B": 3,  
            "A>B": 3,  
            "A=B": 2,  
            "B>A": 1,  
            "B>>A": 1,  
            None: 0  
        }  
        score_map_generall2 = {  
            "A>>B": 1,  
            "A>B": 1,  
            "A=B": 2,  
            "B>A": 3,  
            "B>>A": 3,  
            None: 0  
        }  
        score_1 = score_map_generall[item["text_score1"]]  
        score_2 = score_map_generall2[item["text_score2"]]  
        if score_1 != score_2:  
            same_idx.append(idx)  
            result_label.append("T")  
        elif score_1 == 3:  
            good_idx.append(idx)  
            result_label.append("W")  
        elif score_2 == 1:  
            loss_idx.append(idx)  
            result_label.append("L")  
        else:  
            same_idx.append(idx)  
            result_label.append("T")  
    result = {  
        "good_idx": len(good_idx),  
        "same_idx": len(same_idx),  
        "loss_idx": len(loss_idx),  
        "sum": len(good_idx) + len(same_idx) + len(loss_idx)  
    }  
    return result, result_label  
  
def read_jsonl(file_path):  
    with open(file_path, 'r') as file:  
        return [json.loads(line) for line in file]  
  
def merge_results(model_name, image_res, text_res):  
    merged_result = {  
        "model": model_name,  # 添加模型名称到结果字典的第一项  
        "good_idx_image": image_res["good_idx"],  
        "same_idx_image": image_res["same_idx"],  
        "loss_idx_image": image_res["loss_idx"],  
        "sum_image": image_res["sum"],  
        "good_idx_text": text_res["good_idx"],  
        "same_idx_text": text_res["same_idx"],  
        "loss_idx_text": text_res["loss_idx"],  
        "sum_text": text_res["sum"]  
    }  
    return merged_result  
  
# 获取data/annotation/目录下的所有文件  
annotation_dir = 'data/annotation_2/'  
files = [f for f in os.listdir(annotation_dir) if os.path.isfile(os.path.join(annotation_dir, f))]  
  
merged_results = []  
label_results_text = {}  
label_results_image = {}  
  
# 遍历每个文件并进行calculate统计  
for file_name in files:  
    file_path = os.path.join(annotation_dir, file_name)  
    with open(file_path, 'r') as f:  
        data = json.load(f)  
    print(f"Model: {file_name}")  
    res, result_label_image = calculate(data)  
    res_text, result_label_text = cal_text(data)  
    merged_res = merge_results(file_name, res, res_text)  # 传递模型名称  
    merged_results.append(merged_res)  
    model_name = file_name.split("_")[0]  
    label_results_text[model_name] = result_label_text  
    label_results_image[model_name] = result_label_image  
  
# 指定的模型顺序  
model_order = ["Meta-Llama-3.1-8B-Instruct", "Meta-Llama-3.1-70B-Instruct", "Qwen2-72B-Instruct", "tulu-2-dpo-70b"]  
  
# 按指定顺序排序 merged_results  
ordered_merged_results = sorted(merged_results, key=lambda x: model_order.index(x["model"]) if x["model"] in model_order else float('inf'))  
  
# 创建一个DataFrame并保存为CSV文件  
df = pd.DataFrame(ordered_merged_results)  
  
# 确保模型名称是第一列  
df = df[["model", "good_idx_image", "same_idx_image", "loss_idx_image", "sum_image", "good_idx_text", "same_idx_text", "loss_idx_text", "sum_text"]]  
  
df.to_csv("merged_results.csv", index=False)  
print("Merged results saved to merged_results.csv")  
  
# 保存 label_results_text 为 CSV 文件  
label_text_df = pd.DataFrame.from_dict(label_results_text, orient='index').transpose()  
label_text_df = label_text_df[model_order]  
label_text_df.to_csv("label_results_text.csv", index=False)  
print("Label results text saved to label_results_text.csv")  
  
# 保存 label_results_image 为 CSV 文件  
label_image_df = pd.DataFrame.from_dict(label_results_image, orient='index').transpose()  
label_image_df = label_image_df[model_order]  
label_image_df.to_csv("label_results_image.csv", index=False)  
print("Label results image saved to label_results_image.csv")  


Model: Qwen2-72B-Instruct_sample.json
Model: Meta-Llama-3.1-8B-Instruct_sample.json
Model: Meta-Llama-3.1-70B-Instruct_sample.json
Model: tulu-2-dpo-70b_sample.json
Merged results saved to merged_results.csv
Label results text saved to label_results_text.csv
Label results image saved to label_results_image.csv


In [11]:
import json  
import os  
import pandas as pd  
  
def calculate(data):  
    good_idx = []  
    same_idx = []  
    loss_idx = []  
    result_label = []  
    for idx, item in enumerate(data[:50]):  
        score_map_generall = {  
            "A>>B": 3,  
            "A>B": 3,  
            "A=B": 2,  
            "B>A": 1,  
            "B>>A": 1,  
            None: 0  
        }  
        score_map_generall2 = {  
            "A>>B": 1,  
            "A>B": 1,  
            "A=B": 2,  
            "B>A": 3,  
            "B>>A": 3,  
            None: 0  
        }  
        score_1 = score_map_generall[item["score_1"]]  
        score_2 = score_map_generall2[item["score_2"]]  
        if score_1 != score_2: 
            if score_1 == 3 and score_2 == 2:
                good_idx.append(idx)
                result_label.append("W") 
            elif score_1 == 1 and score_2 == 2:
                loss_idx.append(idx)
                result_label.append("L")
            elif score_1 == 2 and score_2 == 3:
                good_idx.append(idx)
                result_label.append("W")
            elif score_1 == 2 and score_2 == 1:
                loss_idx.append(idx)
                result_label.append("L")
            else:
                same_idx.append(idx)  
                result_label.append("T")  
        elif score_1 == 3:  
            good_idx.append(idx)  
            result_label.append("W")  
        elif score_1 == 1:  
            loss_idx.append(idx)  
            result_label.append("L")  
        else:  
            same_idx.append(idx)  
            result_label.append("T")  
    result = {  
        "good_idx": len(good_idx),  
        "same_idx": len(same_idx),  
        "loss_idx": len(loss_idx),  
        "sum": len(good_idx) + len(same_idx) + len(loss_idx)  
    }  
    return result, result_label  
  
def cal_text(data):  
    model_name = data[0]["model_id_2"]  
    judge_data = read_jsonl(f"data/arena-ta/model_judgment/gpt-4o_text/{model_name}.jsonl")  
    judge_dict = {item['question_id']: [item['games'][0]['score'], item['games'][0]['judgment'], item['games'][1]['score'], item['games'][1]['judgment']] for item in judge_data}  
    res = []  
    result_label = []  
    for idx, item in enumerate(data):  
        score_1 = judge_dict[item["question_id"]][0]  
        judge_1 = judge_dict[item["question_id"]][1]  
        score_2 = judge_dict[item["question_id"]][2]  
        judge_2 = judge_dict[item["question_id"]][3]  
        item["text_score1"] = score_1  
        item["text_judge1"] = judge_1  
        item["text_score2"] = score_2  
        item["text_judge2"] = judge_2  
        good_idx = []  
        same_idx = []  
        loss_idx = []  
        res.append(item)  
    for idx, item in enumerate(res[:50]):  
        score_map_generall = {  
            "A>>B": 3,  
            "A>B": 3,  
            "A=B": 2,  
            "B>A": 1,  
            "B>>A": 1,  
            None: 0  
        }  
        score_map_generall2 = {  
            "A>>B": 1,  
            "A>B": 1,  
            "A=B": 2,  
            "B>A": 3,  
            "B>>A": 3,  
            None: 0  
        }  
        score_1 = score_map_generall[item["text_score1"]]  
        score_2 = score_map_generall2[item["text_score2"]]  
        if score_1 != score_2: 
            if score_1 == 3 and score_2 == 2:
                good_idx.append(idx)
                result_label.append("W") 
            elif score_1 == 1 and score_2 == 2:
                loss_idx.append(idx)
                result_label.append("L")
            elif score_1 == 2 and score_2 == 3:
                good_idx.append(idx)
                result_label.append("W")
            elif score_1 == 2 and score_2 == 1:
                loss_idx.append(idx)
                result_label.append("L")
            else:
                same_idx.append(idx)  
                result_label.append("T")  
        elif score_1 == 3:  
            good_idx.append(idx)  
            result_label.append("W")  
        elif score_1 == 1:  
            loss_idx.append(idx)  
            result_label.append("L")  
        else:  
            same_idx.append(idx)  
            result_label.append("T")  
    result = {  
        "good_idx": len(good_idx),  
        "same_idx": len(same_idx),  
        "loss_idx": len(loss_idx),  
        "sum": len(good_idx) + len(same_idx) + len(loss_idx)  
    }  
    return result, result_label  
  
def read_jsonl(file_path):  
    with open(file_path, 'r') as file:  
        return [json.loads(line) for line in file]  
  
def merge_results(model_name, image_res, text_res):  
    merged_result = {  
        "model": model_name,  # 添加模型名称到结果字典的第一项  
        "good_idx_image": image_res["good_idx"],  
        "same_idx_image": image_res["same_idx"],  
        "loss_idx_image": image_res["loss_idx"],  
        "sum_image": image_res["sum"],  
        "good_idx_text": text_res["good_idx"],  
        "same_idx_text": text_res["same_idx"],  
        "loss_idx_text": text_res["loss_idx"],  
        "sum_text": text_res["sum"]  
    }  
    return merged_result  
  
# 获取data/annotation/目录下的所有文件  
annotation_dir = 'data/annotation_2/'  
files = [f for f in os.listdir(annotation_dir) if os.path.isfile(os.path.join(annotation_dir, f))]  
  
merged_results = []  
label_results_text = {}  
label_results_image = {}  
  
# 遍历每个文件并进行calculate统计  
for file_name in files:  
    file_path = os.path.join(annotation_dir, file_name)  
    with open(file_path, 'r') as f:  
        data = json.load(f)  
    print(f"Model: {file_name}")  
    res, result_label_image = calculate(data)  
    res_text, result_label_text = cal_text(data)  
    merged_res = merge_results(file_name, res, res_text)  # 传递模型名称  
    merged_results.append(merged_res)  
    model_name = file_name.split("_")[0]  
    label_results_text[model_name] = result_label_text  
    label_results_image[model_name] = result_label_image  
  
# 指定的模型顺序  
model_order = ["Meta-Llama-3.1-8B-Instruct", "Meta-Llama-3.1-70B-Instruct", "Qwen2-72B-Instruct", "tulu-2-dpo-70b"]  
  
# 按指定顺序排序 merged_results  
ordered_merged_results = sorted(merged_results, key=lambda x: model_order.index(x["model"]) if x["model"] in model_order else float('inf'))  
  
# 创建一个DataFrame并保存为CSV文件  
df = pd.DataFrame(ordered_merged_results)  
  
# 确保模型名称是第一列  
df = df[["model", "good_idx_image", "same_idx_image", "loss_idx_image", "sum_image", "good_idx_text", "same_idx_text", "loss_idx_text", "sum_text"]]  
  
df.to_csv("merged_results2.csv", index=False)  
print("Merged results saved to merged_results.csv")  
  
# 保存 label_results_text 为 CSV 文件  
label_text_df = pd.DataFrame.from_dict(label_results_text, orient='index').transpose()  
label_text_df = label_text_df[model_order]  
label_text_df.to_csv("label_results_text2.csv", index=False)  
print("Label results text saved to label_results_text.csv")  
  
# 保存 label_results_image 为 CSV 文件  
label_image_df = pd.DataFrame.from_dict(label_results_image, orient='index').transpose()  
label_image_df = label_image_df[model_order]  
label_image_df.to_csv("label_results_image2.csv", index=False)  
print("Label results image saved to label_results_image.csv")  


Model: Qwen2-72B-Instruct_sample.json
Model: Meta-Llama-3.1-8B-Instruct_sample.json
Model: Meta-Llama-3.1-70B-Instruct_sample.json
Model: tulu-2-dpo-70b_sample.json
Merged results saved to merged_results.csv
Label results text saved to label_results_text.csv
Label results image saved to label_results_image.csv
