In [None]:
import json
import os
import pandas as pd

In [None]:
# project_path=${YOUR_PROJECT_PATH}
project_path="/apdcephfs/sz/ChartMimic"

In [None]:
def get_code_passed_files(modelagent):
    template_type = modelagent.split("_")[-1].split("Agent")[0].lower()
    print(template_type)
    file_dir = project_path + "/results/customized/chartedit_{}_results/{}_checker".format(modelagent, template_type)
    filter_files = os.listdir(file_dir)
    filter_files = [ item.split(".pdf")[0]+".py" for item in filter_files if ".pdf" in item]

    return filter_files

In [None]:
# models = ["gpt-4-vision-preview", "claude-3-opus-20240229", "gemini-pro-vision", "Phi-3-vision-128k-instruct", "llava-v1.6-vicuna-7b-hf", "deepseek-vl-7b-chat", "llava-v1.6-mistral-7b-hf", "idefics2-8b", "MiniCPM-Llama3-V-2_5", "Qwen-VL-Chat", "llava-v1.6-vicuna-13b-hf", "cogvlm2-llama3-chat-19B", "InternVL-Chat-V1-5", "llava-v1.6-34b-hf"]

models = ["gpt-4-vision-preview"]
agents = ["EditAgent"]

table_type = "all"

model_agents = [ "{}_{}".format(model, agent) for model in models for agent in agents ]

In [None]:
filter_type = "code_pass"   # or no_filter
denominator = 500

if filter_type == "no_filter":
    filter_files_dict = None
elif filter_type == "code_pass":
    filter_files_dict = { model_agent: get_code_passed_files(model_agent) for model_agent in model_agents}
else:
    raise ValueError("filter_type not supported")

In [None]:
# construct a dataframe, "model" column is the model name
result_df = pd.DataFrame(columns=["model_agent", "example_count", "ExecRate", "TextScore","LayoutScore",  "TypeScore",  "ColorScore", "Average", "GPT4VScore", "Overall"])

# insert the model name
result_df["model_agent"] = [ model + "_" + agent for model in models for agent in agents]
# set the index to be the model name
result_df.set_index("model_agent", inplace=True)

In [None]:
files = []
for model in models:
    for agent in agents:
        filename =  project_path + "/results/customized/chartedit_" + model + "_" + agent +"_results_code4evaluation.json"
        if os.path.exists(filename):
            files.append(filename)
        else:
            raise FileNotFoundError("File not found: {}".format(filename))

for idx, file in enumerate(files):
    print("Processing file:", os.path.basename(file))
    
    data = pd.read_json(file, lines=True)
    data["orginial"] = data["orginial"].apply(lambda x: x.split("/")[-1])
    data["generated"] = data["generated"].apply(lambda x: x.split("/")[-1])

    # filter_files = get_code_passed_files(model_agents[idx])
    if filter_files_dict is not None:
        filter_files = filter_files_dict[model_agents[idx]]
        data = data[ data["orginial"].apply(lambda x: any([item == x for item in filter_files])) ]
        print("Length of filter files:", len(filter_files))

    print("Length of Data:", len(data))
    print("Denominator:", denominator)

    f1s = []

    result_df.loc[model_agents[idx], "example_count"] = len(data)
    result_df.loc[model_agents[idx], "ExecRate"] = len(filter_files) / denominator
    print("Execution Rate:", len(filter_files) / denominator)

    text_metrics = data["text_metrics"]
    avg_f1 = text_metrics.apply(lambda x: x["f1"]).sum()*100 / denominator
    print(avg_f1)
    result_df.loc[model_agents[idx], "TextScore"] = avg_f1
    f1s.append(avg_f1)

    layout_metrics = data['layout_metrics']
    avg_f1 = layout_metrics.apply(lambda x: x["f1"]).sum()*100 / denominator
    print(avg_f1)
    result_df.loc[model_agents[idx], "LayoutScore"] = avg_f1
    f1s.append(avg_f1)

    chart_type_metrics = data["chart_type_metrics"]
    avg_f1 = chart_type_metrics.apply(lambda x: x["f1"]).sum()*100 / denominator
    print(avg_f1)
    result_df.loc[model_agents[idx], "TypeScore"] = avg_f1
    f1s.append(avg_f1)

    color_metrics = data["color_metrics"]
    avg_f1 = color_metrics.apply(lambda x: x["f1"]).sum()*100 / denominator
    print(avg_f1)
    result_df.loc[model_agents[idx], "ColorScore"] = avg_f1
    f1s.append(avg_f1)

    print( sum(f1s)/len(f1s) )

    result_df.loc[model_agents[idx], "Average"] = sum(f1s)/len(f1s)

    print()

In [None]:
files = []
for model in models:
    for agent in agents:
        file =  project_path + "/results/customized/chartedit_" + model + "_" + agent +"_results_gpt4v.json"
        if os.path.exists(file):
            files.append(file)


for idx, file in enumerate(files):
    print(os.path.basename(file))
    
    data = pd.read_json(file, lines=True)
    data["orginial"] = data["orginial"].apply(lambda x: x.split("/")[-1])
    data["generated"] = data["generated"].apply(lambda x: x.split("/")[-1])

    if filter_files_dict is not None:
        filter_files = filter_files_dict[model_agents[idx]]
        data = data[ data["orginial"].apply(lambda x: any([item == x for item in filter_files])) ]
    print(len(data))


    result_df.loc[model_agents[idx], "example_count"] = len(data)

    gpt4v_score = data["gpt4v_score"]
    avg_gpt4v_score = gpt4v_score.sum() / denominator
    print(model_agents[idx])
    result_df.loc[model_agents[idx], "GPT4VScore"] = avg_gpt4v_score
    print(avg_gpt4v_score)


    print()

In [None]:
result_df

In [None]:
# calculate the overall score
result_df["Overall"] = result_df[["Average", "GPT4VScore"]].mean(axis=1)

result_df["ExecRate"] = result_df["ExecRate"] * 100

In [None]:
result_df

In [None]:
result_df.to_csv('results.csv' index=True)