In [48]:
import os
import itertools
import glob
import datetime

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import openpyxl
import matplotlib.ticker
import matplotlib.pyplot as plt


sns.set(font_scale=1.2, style='whitegrid', font='CMU Sans Serif')

# # drawio colors
# # 189,215,238
# # 255,230,153
# pal = sns.color_palette(['#BDD7EE', '#FFE699'])
sns.set_palette("muted", 9)

mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
mpl.rcParams['figure.figsize'] = (4.5,2)
mpl.rcParams['figure.dpi'] = 100

existing_dir = 'en-figures'

In [49]:
def process_sheet(sheet_name):
    excel_raw_df = pd.read_excel('/Users/minghe/llm4faas/eva/_results/results_en_copy.xlsx', sheet_name=sheet_name)

    name_line = excel_raw_df.head(1).dropna(axis=1, how='any')
    pass_rate_df = excel_raw_df[excel_raw_df['Index'].str.contains('PASS RATE') == True].dropna(axis=1, how='any')
    compile_rate_df = excel_raw_df[excel_raw_df['Index'].str.contains('COMPILABLE RATE') == True].dropna(axis=1, how='any')

    raw_combined_df = pd.concat([name_line, pass_rate_df, compile_rate_df], ignore_index=True)

    # Get values
    models = raw_combined_df.iloc[0, 0:5].values
    pass_rate = raw_combined_df.iloc[1, 0:5].values * 100
    compilable_rate = raw_combined_df.iloc[2, 0:5].values * 100

    model_map = {
        'gpt-4o': 'GPT-4o',
        'gpt-4o-mini': 'GPT-4o-Mini',
        'gemini': 'Gemini',
        'llama3': 'Llama-3.1',
        'copilot': 'Copilot',
    }
    mapped_models = [model_map.get(model, model) for model in models]


    # Add Task Name
    data = {
        'Model': list(mapped_models) * 2,
        'Rate': list(pass_rate) + list(compilable_rate),
        'Type': ['PASS RATE'] * len(models) + ['COMPILABLE RATE'] * len(mapped_models),
        'Task': sheet_name,
    }
    return pd.DataFrame(data)

# Sheet Names
sheet_names = ['Fixed Plans', 'Energy Saving', 'Remote Control', 'Auto Adapt']
all_dataframes = [process_sheet(sheet_name) for sheet_name in sheet_names]

# Data Frame containing all the data from the Excel
final_df_en = pd.concat(all_dataframes, ignore_index=True)
final_df_en['Language'] = 'English'

print(final_df_en)
final_df_en.to_csv('final_df_en.csv', index=False)

          Model        Rate             Type            Task Language
0        GPT-4o   77.464789        PASS RATE     Fixed Plans  English
1   GPT-4o-Mini   38.028169        PASS RATE     Fixed Plans  English
2        Gemini    8.450704        PASS RATE     Fixed Plans  English
3     Llama-3.1    0.000000        PASS RATE     Fixed Plans  English
4       Copilot   23.943662        PASS RATE     Fixed Plans  English
5        GPT-4o   83.098592  COMPILABLE RATE     Fixed Plans  English
6   GPT-4o-Mini   70.422535  COMPILABLE RATE     Fixed Plans  English
7        Gemini   45.070423  COMPILABLE RATE     Fixed Plans  English
8     Llama-3.1    0.000000  COMPILABLE RATE     Fixed Plans  English
9       Copilot   39.436620  COMPILABLE RATE     Fixed Plans  English
10       GPT-4o   38.095238        PASS RATE   Energy Saving  English
11  GPT-4o-Mini   38.095238        PASS RATE   Energy Saving  English
12       Gemini    4.761905        PASS RATE   Energy Saving  English
13    Llama-3.1    0