In [70]:
import json
import os
from datetime import datetime

import pandas as pd

from supertrainer import SUPERTRAINER_PUBLIC_ROOT

# Define the directory containing the folders
dataset_dir = os.getenv(SUPERTRAINER_PUBLIC_ROOT) # Replace with the path to your directory

# Initialize lists to store metrics and results
metrics_list = []
results_list = []

# Define the filter date
date_filter = datetime(2024, 10, 28, 10, 14, 31)

# Loop through each folder in the directory
for folder_name in os.listdir(dataset_dir):
    folder_path = os.path.join(dataset_dir, folder_name)
    if os.path.isdir(folder_path):
        # Extract date, dataset name, split name, and model name from folder name (assuming the format contains these details)
        try:
            parts = folder_name.split('-')
            dataset_name = parts[0]
            split_name = parts[1]
            date_str = parts[2]
            folder_date = datetime.strptime(date_str, "%Y%m%d_%H%M%S")
            model_name = '-'.join(parts[3:])
        except (ValueError, IndexError):
            continue  # Skip folders without a valid format

        # Apply date filter
        if folder_date <= date_filter:
            continue

        # Load metrics.json if available
        metrics_path = os.path.join(folder_path, "metrics.json")
        if os.path.exists(metrics_path):
            with open(metrics_path, "r", encoding="utf-8") as f:
                metrics = json.load(f)
                metrics["folder_name"] = folder_name
                metrics["date"] = folder_date
                metrics["dataset_name"] = dataset_name
                metrics["split_name"] = split_name
                metrics["model_name"] = model_name
                metrics_list.append(metrics)

        # Load results.json if available
        results_path = os.path.join(folder_path, "results.json")
        if os.path.exists(results_path):
            with open(results_path, "r", encoding="utf-8") as f:
                results = json.load(f)
                for result in results:
                    result["folder_name"] = folder_name
                    result["date"] = folder_date
                    result["dataset_name"] = dataset_name
                    result["split_name"] = split_name
                    result["model_name"] = model_name
                    results_list.append(result)

# Sort metrics and results by date (most recent first)
metrics_list = sorted(metrics_list, key=lambda x: x["date"], reverse=True)
results_list = sorted(results_list, key=lambda x: x["date"], reverse=True)

# Create DataFrames for better visualization and manipulation
metrics_df = pd.DataFrame(metrics_list)
results_df = pd.DataFrame(results_list)

# Save merged metrics and results to JSON files
merged_metrics_path = os.path.join(dataset_dir, "merged_metrics.json")
merged_results_path = os.path.join(dataset_dir, "merged_results.json")

with open(merged_metrics_path, "w", encoding="utf-8") as f:
    json.dump(metrics_list, f, default=str, indent=4)

with open(merged_results_path, "w", encoding="utf-8") as f:
    json.dump(results_list, f, default=str, indent=4)

# Optionally, display the merged DataFrames
print("Merged Metrics DataFrame:")
print(metrics_df.head())
print("\nMerged Results DataFrame:")
print(results_df.head())

Merged Metrics DataFrame:
   accuracy  precision    recall  f1_score  \
0     0.320   0.295038  0.297607  0.273850   
1     0.320   0.295002  0.296524  0.274947   
2     0.345   0.289295  0.277037  0.269059   
3     0.285   0.306417  0.303875  0.261300   
4     0.360   0.318717  0.311111  0.295991   

                                         folder_name                date  \
0  fake_news_detection_dataset_cross_lingual_form... 2024-10-28 21:49:04   
1  fake_news_detection_dataset_cross_lingual_form... 2024-10-28 21:43:41   
2  fake_news_detection_dataset_cross_lingual_form... 2024-10-28 21:38:56   
3  fake_news_detection_dataset_cross_lingual_form... 2024-10-28 21:34:19   
4  fake_news_detection_dataset_cross_lingual_form... 2024-10-28 21:28:34   

                                        dataset_name  \
0  fake_news_detection_dataset_cross_lingual_form...   
1  fake_news_detection_dataset_cross_lingual_form...   
2  fake_news_detection_dataset_cross_lingual_form...   
3  fake_news_det

In [57]:
metrics_df.iloc[-1]

accuracy                                                      0.4
precision                                                 0.54373
recall                                                   0.588433
f1_score                                                 0.425829
folder_name     fake_news_detection_dataset_cross_lingual_form...
date                                          2024-10-28 10:14:32
dataset_name    fake_news_detection_dataset_cross_lingual_form...
split_name                             train_claim_en_evidence_en
model_name                             claude-3-5-sonnet-20240620
Name: 82, dtype: object

In [71]:
metrics_df["model_name"].unique()

array(['gemma-2-9b-bnb-4bit', 'gpt-4o-mini',
       'bert-base-multilingual-uncased', 'indobert-base-uncased',
       'xlm-roberta-base', 'bert-base-arabic', 'Llama-3.2-3B-Instruct',
       'mistral-7b-instruct-v0.3-bnb-4bit', 'Qwen2.5-7B-bnb-4bit',
       'claude-3-5-sonnet-20240620'], dtype=object)

In [72]:
bert_based_models = {"xlm-roberta-base": "Bilal", "bert-base-arabic": "Bilal", "bert-base-multilingual-uncased": "Erland", "indobert-base-uncased": "Erland"}
open_llms = {"Llama-3.2-3B-Instruct": "Bilal", "mistral-7b-instruct-v0.3-bnb-4bit": "Bilal", "gemma-2-9b-bnb-4bit": "Erland", "Qwen2.5-7B-bnb-4bit": "Erland"}
closed_llms = {"gpt-4o-mini": "Bilal", "claude-3-5-sonnet-20240620": "Erland"}

metrics_df["person"] = metrics_df["model_name"].apply(lambda x: bert_based_models.get(x) or open_llms.get(x) or closed_llms.get(x))

bilal_df = metrics_df[metrics_df["person"] == "Bilal"]
erland_df = metrics_df[metrics_df["person"] == "Erland"]

model_map_to_short = {
    "xlm-roberta-base": "XLM-R",
    "bert-base-arabic": "ArabicBERT",
    "bert-base-multilingual-uncased": "mBERT",
    "indobert-base-uncased": "IndoBERT",
    "Llama-3.2-3B-Instruct": "Llama",
    "mistral-7b-instruct-v0.3-bnb-4bit": "Mistral",
    "gemma-2-9b-bnb-4bit": "Gemma",
    "Qwen2.5-7B-bnb-4bit": "Qwen",
    "gpt-4o-mini": "GPT-4o",
    "claude-3-5-sonnet-20240620": "Claude"
}

In [69]:
erland_df["model_name"].unique()

array(['bert-base-multilingual-uncased', 'indobert-base-uncased',
       'Qwen2.5-7B-bnb-4bit', 'claude-3-5-sonnet-20240620'], dtype=object)

In [73]:
def generate_individual_latex_tables(df, person_name):
    categories = [
        ("BERT-Based Models", bert_based_models, "bert"),
        ("Open LLMs", open_llms, "open-llms"),
        ("Closed LLMs", closed_llms, "closed-llms")
    ]
    
    latex_str = ""
    
    for category_name, models_dict, label_suffix in categories:
        latex_str += "\\begin{table}[h]\n"
        latex_str += f"    \\caption{{Cross-Lingual Evaluation Results of {person_name}'s Evaluation ({category_name})}}\n"
        latex_str += f"    \\label{{tab:{label_suffix}-{person_name.lower()}}}\n"
        latex_str += "    \\small\n"
        latex_str += "    \\begin{tabularx}{\\columnwidth}{l l l X X X X}\n"
        latex_str += "    \\toprule\n"
        latex_str += "    \\textbf{Model} & \\textbf{Claim} & \\textbf{Evidence} & \\textbf{Acc} & \\textbf{Prec} & \\textbf{Rec} & \\textbf{F1} \\\\ \n"
        latex_str += "    \\midrule\n"
        
        # Filter dataframe for models in the current category
        category_models = df[df["model_name"].isin(models_dict.keys())]
        for _, row in category_models.iterrows():
            claim_lang = row["split_name"].split('_')[2]
            evidence_lang = row["split_name"].split('_')[4]
            claim_lang = "en" if claim_lang == "en" else "id" if claim_lang == "idn" else "ar"
            evidence_lang = "en" if evidence_lang == "en" else "id" if evidence_lang == "idn" else "ar"
            latex_str += f"    {model_map_to_short[row['model_name']]} & {claim_lang} & {evidence_lang} & {row['accuracy']:.2f} & {row['precision']:.2f} & {row['recall']:.2f} & {row['f1_score']:.2f} \\\\ \n"
        
        latex_str += "    \\bottomrule\n"
        latex_str += "    \\end{tabularx}\n"
        latex_str += "\\end{table}\n\n"
    
    return latex_str

# Generate LaTeX tables for Bilal and Erland
latex_bilal_tables = generate_individual_latex_tables(bilal_df, "Bilal")
latex_erland_tables = generate_individual_latex_tables(erland_df, "Erland")

# Print the LaTeX code for each person's tables
print("\nLaTeX Tables for Bilal's Evaluation:\n")
print(latex_bilal_tables)
print("\nLaTeX Tables for Erland's Evaluation:\n")
print(latex_erland_tables)



LaTeX Tables for Bilal's Evaluation:

\begin{table}[h]
    \caption{Cross-Lingual Evaluation Results of Bilal's Evaluation (BERT-Based Models)}
    \label{tab:bert-bilal}
    \small
    \begin{tabularx}{\columnwidth}{l l l X X X X}
    \toprule
    \textbf{Model} & \textbf{Claim} & \textbf{Evidence} & \textbf{Acc} & \textbf{Prec} & \textbf{Rec} & \textbf{F1} \\ 
    \midrule
    XLM-R & ar & ar & 0.12 & 0.04 & 0.33 & 0.07 \\ 
    XLM-R & ar & id & 0.12 & 0.04 & 0.33 & 0.07 \\ 
    XLM-R & ar & en & 0.12 & 0.04 & 0.33 & 0.07 \\ 
    XLM-R & id & ar & 0.12 & 0.04 & 0.33 & 0.07 \\ 
    XLM-R & id & id & 0.12 & 0.04 & 0.33 & 0.07 \\ 
    XLM-R & id & en & 0.12 & 0.04 & 0.33 & 0.07 \\ 
    XLM-R & en & ar & 0.12 & 0.04 & 0.33 & 0.07 \\ 
    XLM-R & en & id & 0.12 & 0.04 & 0.33 & 0.07 \\ 
    XLM-R & en & en & 0.12 & 0.04 & 0.33 & 0.07 \\ 
    ArabicBERT & ar & ar & 0.23 & 0.07 & 0.33 & 0.12 \\ 
    ArabicBERT & ar & id & 0.23 & 0.30 & 0.36 & 0.17 \\ 
    ArabicBERT & ar & en & 0.23 & 0.07 