In [None]:
import sys
import os

module_path = os.path.abspath(os.path.join(".."))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
from pathlib import Path
from typing import Any, Dict

from api_integrated_llm.helpers.file_helper import get_dict_from_json


current_path = os.path.abspath(os.path.join("."))

dataset_name_dict = {"Slot-filling": "SLOT", "Sequencing": "SEL", "Rest": "REST"}
dataset_names = ["Slot-filling", "Sequencing", "Rest"]
obfuscation_status = ["obfuscation", "non-ofuscation"]
container: Dict[str, Dict[str, Dict[str, Dict[str, Any]]]] = {}
model_names_dict = {
    "gpt-4o": "gpt-4o",
    "gpt4o": "gpt-4o",
    "llama-3-3": "llama-3.3-70b",
    "llama-3-3-70b": "llama-3.3-70b",
    "mixtral-22b": "mixtral-8x22b-v0.1",
    "mixtral-8x22B": "mixtral-8x22b-v0.1",
}
max_val = 0.0
model_names = set()
for dataset_name in dataset_names:
    if dataset_name not in container:
        container[dataset_name] = {}
    for obfuscation in obfuscation_status:
        if obfuscation not in container[dataset_name]:
            obj = get_dict_from_json(
                file_path=Path(
                    os.path.join(
                        current_path,
                        "data",
                        "completion_rate_bar",
                        f"{obfuscation}_{dataset_name}.json",
                    )
                )
            )

            tmp = {}
            for key in obj.keys():
                tmp[model_names_dict[key]] = obj[key]

            model_names = model_names.union(set(list(tmp.keys())))

            for model_name in model_names:
                if model_name in tmp:
                    if model_name not in container[dataset_name]:
                        container[dataset_name][model_name] = {}
                    container[dataset_name][model_name][obfuscation] = tmp[model_name][
                        "total_macro"
                    ]["win_rate"]
                    max_val = max(
                        max_val, container[dataset_name][model_name][obfuscation]
                    )

sorted_model_names = sorted(list(model_names))
print(sorted_model_names)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, len(list(container.keys())))
fig.set_size_inches(15, 5)
start = ord("a")

for i, dataset_name in enumerate(container.keys()):
    label_dict = {ob: [] for ob in obfuscation_status}

    for model_name in sorted_model_names:
        if model_name in container[dataset_name]:
            for obfuscation in obfuscation_status:
                label_dict[obfuscation].append(
                    container[dataset_name][model_name][obfuscation]
                )
        else:
            for obfuscation in obfuscation_status:
                label_dict[obfuscation].append(0.0)
    x = np.arange(len(sorted_model_names))  # the label locations
    width = 0.25  # the width of the bars
    multiplier = 0

    for attribute, measurement in label_dict.items():
        measurement = list(map(lambda val: round(val, 2), measurement))
        offset = width * multiplier
        rects = ax[i].bar(x + offset, measurement, width, label=attribute)
        ax[i].bar_label(rects, padding=3)
        multiplier += 1

    # Add some text for labels, title and custom x-axis tick labels, etc.
    ax[i].set_ylabel('Completion rate')
    ax[i].set_title(f"({chr(i + start)}) {dataset_name_dict[dataset_name]}", y=-0.15)
    # + width
    ax[i].set_xticks(x + (width / 2), sorted_model_names)
    ax[i].legend(loc='upper right', ncols=1)
    ax[i].set_ylim(0, max_val + 0.1)
    # plt.grid()
plt.tight_layout()
plt.savefig('dataset_model_win_rate.svg', bbox_inches='tight')
plt.savefig('dataset_model_win_rate.png', bbox_inches='tight')
plt.show()
print()