In [4]:
import os
import sys
import pathlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

plt.style.use("ggplot")

data_path = pathlib.Path("/devcode/GATE-private/wandb/results/29052023.csv")

In [7]:
df = pd.read_csv(data_path)

# Print the first few rows of the DataFrame to see what's inside
print(df.head())


                                              Name                   Created   
0                                "fs-clip-debug-0"  2023-05-29T09:20:33.000Z  \
1       "athena-p365-wide-resnet50-2-tv-in1k-1337"  2023-05-29T08:31:08.000Z   
2  "athena-clvr-efficientnetv2-rw-s-ra2-in1k-2306"  2023-05-29T08:19:54.000Z   
3              "athena-p365-talip-base16-wit-2306"  2023-05-29T08:19:53.000Z   
4           "athena-clvr-clip-vit-base16-224-2306"  2023-05-29T08:16:56.000Z   

   Runtime                  End Time Notes                   Updated  Tags   
0       48  2023-05-29T09:21:21.000Z     -  2023-05-29T09:21:21.000Z   NaN  \
1     3961  2023-05-29T09:37:09.000Z     -  2023-05-29T09:37:09.000Z   NaN   
2     4622  2023-05-29T09:36:56.000Z     -  2023-05-29T09:36:56.000Z   NaN   
3     4519  2023-05-29T09:35:12.000Z     -  2023-05-29T09:35:12.000Z   NaN   
4     4801  2023-05-29T09:36:57.000Z     -  2023-05-29T09:36:57.000Z   NaN   

   dataloader.num_workers                         

In [8]:

# Print the summary statistics of the DataFrame
print(df.describe())

             Runtime  Tags  dataloader.num_workers  num_workers         seed   
count     693.000000   0.0              692.000000   692.000000   692.000000  \
mean     8330.764791   NaN               45.825052    45.986622  1201.492775   
std     15950.994087   NaN               23.699588    23.596831   952.160597   
min        24.000000   NaN                2.000000    16.000000    42.000000   
25%      2222.000000   NaN               16.000000    16.000000    42.000000   
50%      3947.000000   NaN               64.000000    64.000000  1337.000000   
75%      6465.000000   NaN               64.000000    64.000000  2306.000000   
max    214427.000000   NaN               96.000000    96.000000  2306.000000   

       eval_num_samples_per_episode  gradient_clipping  scheduler.cooldown   
count                           2.0                2.0               389.0  \
mean                           96.0                1.0                 0.0   
std                             0.0          

In [66]:
import pandas as pd
from rich.table import Table
from rich.console import Console

def load_and_process_data(data_path: str):
    # Load the data
    df = pd.read_csv(data_path)

    # Extract the seed number and remove it from the "Name"
    df['Seed'] = df['Name'].apply(lambda x: int(x.split('-')[-1].replace('"', '')) if x.split('-')[-1].replace('"', '').isdigit() else None)
    df['Name'] = df['Name'].apply(lambda x: '-'.join(x.split('-')[:-1]).replace('"', ''))

    # Filter the DataFrame
    df = df[df['Name'].str.startswith('athena')]
    df = df.dropna()
    
    df['Name'] = df['Name'].apply(lambda x: x.replace("athena-", ""))

    # Extract the task name
    df['Task'] = df['Name'].apply(lambda x: x.split('-')[0])

    # Convert columns to numeric type
    numeric_columns = ["testing/accuracy_top_1-epoch-mean", 
                       "testing/accuracy_top_5-epoch-mean"]

    for col in numeric_columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')

    # Group by 'Name' and 'Task' and calculate mean and standard deviation
    grouped = df.groupby(['Task', 'Name'])[numeric_columns].agg(['mean', 'std', 'count'])

    # Sort by 'Task'
    grouped = grouped.sort_values(by='Task')

    return grouped


def display_table(df):
    # Initialize a console with a larger width
    console = Console(width=120)

    # Initialize a table
    table = Table(show_header=True, header_style="bold cyan")
    table.add_column("Task", overflow="fold", width=10)

    model_names = sorted(set([name[1] for name in df.index]))
    for model_name in model_names:
        table.add_column(model_name, overflow="fold", width=20)

    # Add rows to the table
    tasks = sorted(set([name[0] for name in df.index]))
    for task in tasks:
        row_data = [task]
        for model_name in model_names:
            if (task, model_name) in df.index:
                row = df.loc[(task, model_name)]
                acc1_mean = f"{row['testing/accuracy_top_1-epoch-mean', 'mean']:.4g}"
                acc5_mean = f"{row['testing/accuracy_top_5-epoch-mean', 'mean']:.4g}"
                acc_values = f"{acc1_mean}/{acc5_mean}"
            else:
                acc_values = "N/A"
            row_data.append(acc_values)
        table.add_row(*row_data)

    # Display the table
    console.print(table)


df = load_and_process_data(data_path)
display_table(df)

In [65]:
def generate_latex_table(df):
    latex_table = "\\begin{table}\n"
    latex_table += "\t\\centering\n"
    latex_table += "\t\\scalebox{0.85}{\n"
    latex_table += "\t\\begin{tabular}{lcc}\n"
    latex_table += "\t\t\\toprule\n"
    latex_table += "\t\tName & Accuracy@1 & Accuracy@5\\\\\n"
    latex_table += "\t\t\\midrule\n"

    for name, row in df.iterrows():
        acc1_mean = f"{row['testing/accuracy_top_1-epoch-mean', 'mean']:.4g}"
        acc1_std = f"{row['testing/accuracy_top_1-epoch-mean', 'std']:.4g}"
        acc5_mean = f"{row['testing/accuracy_top_5-epoch-mean', 'mean']:.4g}"
        acc5_std = f"{row['testing/accuracy_top_5-epoch-mean', 'std']:.4g}"
        latex_table += f"\t\t{name.replace('athena-', '')} & {acc1_mean} ± {acc1_std} & {acc5_mean} ± {acc5_std}\\\\\n"

    latex_table += "\t\t\\bottomrule\n"
    latex_table += "\t\\end{tabular}\n"
    latex_table += "\t}\n"
    latex_table += "\\end{table}\n"

    return latex_table

latex_table = generate_latex_table(df)
print(latex_table)

AttributeError: 'tuple' object has no attribute 'replace'

In [45]:
def display_table(df):
    # Initialize a console with a larger width
    console = Console(width=120)

    # Initialize a table
    table = Table(show_header=True, header_style="bold cyan")
    table.add_column("Name", overflow="fold", width=50)
    table.add_column("Accuracy@1", overflow="fold", width=40)
    table.add_column("Accuracy@5", overflow="fold", width=40)

    # Add rows to the table
    for (task, name), row in df.iterrows():
        acc1_mean = f"{row['testing/accuracy_top_1-epoch-mean', 'mean']:.4g}"
        acc1_std = f"{row['testing/accuracy_top_1-epoch-mean', 'std']:.4g}"
        acc5_mean = f"{row['testing/accuracy_top_5-epoch-mean', 'mean']:.4g}"
        acc5_std = f"{row['testing/accuracy_top_5-epoch-mean', 'std']:.4g}"
        table.add_row(name, f"{acc1_mean} ± {acc1_std}", f"{acc5_mean} ± {acc5_std}")

    # Display the table
    console.print(table)


df = load_and_process_data(data_path)
display_table(df)