### Imports

In [7]:
import matplotlib.pyplot as plt
import pandas as pd
import ast
import os

### Load Excel File which contains the data being visualized

In [8]:
# Load the Excel file to check its structure and contents
file_path = model_path = os.path.join("excel_result_files", "overall_performance_results.xlsx")
data = pd.ExcelFile(file_path)

In [9]:
# Load the data from the first sheet to examine its content
chart_data = data.parse('Sheet1')

In [None]:
# Extract unique models
models = chart_data['model_name'].unique()
models

### Get the evaluation metric on which the data is being visualized

In [11]:
# Function to extract the 'f1' score
def extract_f1(score_str):
    # Convert the string representation of the dictionary into an actual dictionary
    score_dict = ast.literal_eval(score_str)
    return score_dict['f1']

# Apply the function to the 'bert_score' column
# may be changed to any other metric column in the excel file
chart_data['bert_f1_score'] = chart_data['bert_score'].apply(extract_f1)

In [None]:
chart_data['bert_f1_score']

### Bar Plots for the evaluation metric for each model

In [None]:
# Create separate bar plots for each model
for model in models:
    model_data = chart_data[chart_data['model_name'] == model]
    plt.figure(figsize=(8, 5))
    plt.bar(model_data['dataset_name'], model_data['bert_f1_score'], alpha=0.7, label='bert_score f1')

    # Chart formatting
    plt.title(f'Performance Metrics for {model}', fontsize=14)
    plt.xlabel('Dataset Name', fontsize=12)
    plt.ylabel('Scores', fontsize=12)
    plt.xticks(rotation=45, ha='right')
    plt.legend()
    plt.tight_layout()
    plt.show()

### Best performing fine-tuned model for each pre-trained model

In [None]:
# Include hyperparameters in the cleaned dataset for best performance analysis
best_performance = chart_data.loc[chart_data.groupby('model_name')['bert_f1_score'].idxmax()]
best_performance = best_performance[['model_name', 'dataset_name', 'bert_f1_score', 'hyperparameters']].reset_index(drop=True)

# Create a bar plot for the best-performing dataset by model
plt.figure(figsize=(10, 6))
plt.bar(best_performance['model_name'], best_performance['bert_f1_score'], alpha=0.7, color='blue')

# Annotate each bar with its corresponding hyperparameters
for i, row in best_performance.iterrows():
    plt.text(i, row['bert_f1_score'] + 0.01, str(row['hyperparameters']), 
             ha='center', fontsize=8, rotation=90, wrap=True)

# Chart formatting
plt.title('Best-Performing Training Dataset for Each Model', fontsize=14)
plt.xlabel('Model Name', fontsize=12)
plt.ylabel('BERT f1 Score', fontsize=12)
plt.xticks(ticks=range(len(best_performance)), labels=best_performance['model_name'], rotation=45, ha='right')
plt.tight_layout()

# Show chart
plt.show()