In [10]:
import os
import matplotlib.pyplot as plt
import numpy as np
from colors import COLOR_MAP
import pandas as pd

In [13]:

PLOT_DIR = "../plots"
if not os.path.exists(PLOT_DIR):
    os.makedirs(PLOT_DIR)

# Define the global fontsize
GLOBAL_FONTSIZE = 40

# Dummy data for model names
model_names = {
    "model1": "LLaMa",
    "model2": "LLaMa-S",
    "model3": "RWKV",
    "model4": "RWKV-S",
    "model5": "GPT-3.5",
    "model6": "GPT-4",
}

# Define the score dictionaries
avg_acc_df_easy_success = {
    "model1":0.4395318596 ,
    "model2":0.2623067776,
    "model3":0.0,
    "model4":0.0,
    "model5":0.4150943396,
    "model6":0.7457722029,
}

avg_acc_df_hard_success = {
    "model1": 0.178343949,
    "model2": 0.06896551724,
    "model3": 0.0,
    "model4": 0.0,
    "model5": 0.1486533753,
    "model6": 0.4026845638,
}

avg_acc_df_easy_reasoning = {
    "model1": 0.3589076723,
    "model2": 0.2085612366,
    "model3": 0.0,
    "model4": 0.0,
    "model5": 0.268081761,
    "model6": 0.6532646433,
}

avg_acc_df_hard_reasoning = {
    "model1": 0.1464968153,
    "model2": 0.03448275862,
    "model3": 0.0,
    "model4": 0.0,
    "model5": 0.04616998951,
    "model6": 0.2770469799,
}

avg_acc_rf_easy_success = {
    "model1": 0.1419624217,
    "model2": 0.1197123519,
    "model3": 0.0,
    "model4": 0.0,
    "model5": 0.1343657817,
    "model6": 0.4962330487,
}


avg_acc_rf_hard_success = {
    "model1": 0.01538461538,
    "model2": 0.007532956685,
    "model3": 0.0,
    "model4": 0.0,
    "model5": 0.02421171171,
    "model6": 0.4510894761,
}

avg_acc_rf_easy_reasoning = {
    "model1": 0.1356993737,
    "model2": 0.1129441624,
    "model3": 0.0,
    "model4": 0.0,
    "model5": 0.1294985251,
    "model6": 0.485183325,
}

avg_acc_rf_hard_reasoning = {
    "model1": 0.01538461538,
    "model2": 0.005649717514,
    "model3": 0.0,
    "model4": 0.0,
    "model5": 0.02195945946,
    "model6": 0.4288363468,
}



# all_scores = [avg_acc_desti_nice, avg_acc_desti_harsh, avg_acc_route_nice, avg_acc_route_harsh]
all_scores = {
    "avg_acc_rf_easy_success": avg_acc_rf_easy_success,
    "avg_acc_rf_easy_reasoning": avg_acc_rf_easy_reasoning,
    "avg_acc_rf_hard_success": avg_acc_rf_hard_success,
    "avg_acc_rf_hard_reasoning": avg_acc_rf_hard_reasoning,
    "avg_acc_df_easy_success": avg_acc_df_easy_success,
    "avg_acc_df_easy_reasoning": avg_acc_df_easy_reasoning,
    "avg_acc_df_hard_success": avg_acc_df_hard_success,
    "avg_acc_df_hard_reasoning": avg_acc_df_hard_reasoning,
}


# Set the y range
y_range = (0, 1)

# Please adapt this above commented code to 4 separate plot, no subplot:
# 1. avg_acc_route_nice
# 2. avg_acc_route_harsh
# 3. avg_acc_desti_nice
# 4. avg_acc_desti_harsh

# Create the bar plot
for name, each in all_scores.items():
    fig = plt.figure(figsize=(10, 10))
    plt.tight_layout()

    colors = [COLOR_MAP[model_names[model_name]] for model_name in model_names]
    plt.bar(
        list(model_names.values()),
        [each[model_name] for model_name in model_names],
        color=colors,
    )
    # Set the y range
    plt.ylim(y_range)
    plt.yticks(fontsize=GLOBAL_FONTSIZE)
    plt.xticks(fontsize=GLOBAL_FONTSIZE, rotation=45)
    if name.endswith('reasoning'):
        plt.ylabel('Reasoning Accuracy',fontsize=GLOBAL_FONTSIZE+5)
    else:
        plt.ylabel('Success Rate',fontsize=GLOBAL_FONTSIZE+5)
    # horizontal line at y=0.8
    plt.axhline(y=0.2, color="lightcoral", linestyle="--")
    plt.axhline(y=0.4, color="lightcoral", linestyle="--")
    plt.axhline(y=0.6, color="lightcoral", linestyle="--")
    plt.axhline(y=0.8, color="lightcoral", linestyle="--")

    # save
    plt.savefig(f"{PLOT_DIR}/{name}.pdf", dpi=300, bbox_inches="tight")
    plt.close()