In [4]:
import os
import sys
import pathlib
from tqdm.auto import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from rich import print

plt.style.use("ggplot")

# data_path = pathlib.Path("/devcode/GATE-private/notebooks/gate-results.csv")

In [5]:
from datetime import datetime


def convert_to_datetime(date_string):
    # convert the string to a datetime object
    date_object = datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%S")
    return date_object

In [6]:
import pandas as pd
import wandb

api = wandb.Api()
project = api.projects("machinelearningbrewery")

for item in project:
    print(item.name)

runs = api.runs("machinelearningbrewery/gate-0-9-1")
exp_name_to_time_dict = {}
metric_keys = ["testing/ensemble_3/"]
all_keys = set()
summary_list, config_list, name_list = [], [], []
for run in tqdm(runs):
    # .summary contains the output keys/values for metrics like accuracy.
    #  We call ._json_dict to omit large files
    metric_dict = run.summary._json_dict
    timestamp = convert_to_datetime(run.heartbeatAt)

    # .config contains the hyperparameters.
    #  We remove special values that start with _.
    config = {k: v for k, v in run.config.items() if not k.startswith("_")}
    if "exp_name" in config:
        exp_name = config["exp_name"]
        if exp_name in exp_name_to_time_dict:
            if timestamp > exp_name_to_time_dict[exp_name]:
                exp_name_to_time_dict[exp_name] = timestamp
            else:
                continue
        # .name is the human-readable name of the run.
        if "beta" in exp_name:
            summary_list.append(metric_dict)
            config_list.append(config)
            name_list.append(exp_name)

            for key in summary_list[-1].keys():
                if "testing" in key and ("macro" in key or "mean" in key):
                    all_keys.add(key)

runs_df = pd.DataFrame(
    {"summary": summary_list, "config": config_list, "name": name_list}
)
# print(all_keys)
# print(runs_df)
# runs_df.to_csv("project.csv")

UsageError: api_key not configured (no-tty). call wandb.login(key=[your_api_key])

In [None]:
new_summary_list = []
new_config_list = []
new_name_list = []

for summary, config, name in zip(summary_list, config_list, name_list):
    # if name.endswith("-7"):
    new_summary_list.append(summary)
    new_config_list.append(config)
    new_name_list.append(name)

summary_list = new_summary_list
config_list = new_config_list
name_list = new_name_list

In [None]:
selected_keys = set()
for key in sorted(all_keys):
    if (
        "shape" not in key
        and "colour" not in key
        and "logits" not in key
        and "count" not in key
        and "material" not in key
        and "yes_no" not in key
        and "size" not in key
        and "similarities" not in key
    ):
        selected_keys.add(key)
print(selected_keys)

In [None]:
from collections import defaultdict

exp_dict = defaultdict(dict)

for name, config, metric_dict in zip(name_list, config_list, summary_list):
    if any([key in metric_dict.keys() for key in selected_keys]):
        for key in selected_keys:
            if key in metric_dict:
                exp_dict[name][key.replace("-/", "/")] = metric_dict[key]

In [None]:
import collections
import csv
from typing import Dict, List


def aggregate_experiments(
    experiments: Dict[str, Dict[str, float]]
) -> Dict[str, Dict[str, List[float]]]:
    aggregated = collections.defaultdict(lambda: collections.defaultdict(list))

    for experiment_name, metrics in experiments.items():
        base_name = experiment_name  # .rsplit("-", 1)[0]
        for metric, value in metrics.items():
            aggregated[base_name][metric].append(value)

    return aggregated


def create_csv(
    output_filename: str,
    aggregated_experiments: Dict[str, Dict[str, List[float]]],
) -> None:
    unique_metrics = set()
    for _, metrics in aggregated_experiments.items():
        unique_metrics.update(metrics.keys())

    rows = []
    for experiment_name, metrics in aggregated_experiments.items():
        experiment_parts = experiment_name.split("-", 2)
        experiment_series, dataset_name = experiment_parts[:2]

        row = {
            "Experiment-series": experiment_series,
            "Dataset-name": dataset_name,
            "Experiment-name": experiment_name,
            "count": 0,
        }
        for metric in unique_metrics:
            values = metrics.get(metric, [])
            count = len(values)
            if "NaN" in values:
                continue
            mean_value = sum(values) / count if count > 0 else None
            row[metric] = mean_value
            row["count"] = max(row["count"], count)

        rows.append(row)

    with open(output_filename, "w", newline="") as csvfile:
        fieldnames = [
            "Experiment-series",
            "Dataset-name",
            "Experiment-name",
            "count",
        ] + sorted(unique_metrics)
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        for row in rows:
            writer.writerow(row)


aggregated_experiments = aggregate_experiments(exp_dict)

# print(list(aggregated_experiments.keys()))
create_csv("experiments_summary.csv", aggregated_experiments)

In [None]:
import collections
import numpy as np
from typing import Dict, List


def prettify_metric(metric: str) -> str:
    pretty_dict = {
        "accuracy_top_5": "acc@5",
        "accuracy_top_1": "acc@1",
        "text_to_image_accuracy": "txt2img acc",
        "text_to_image_accuracy_top_5": "txt2img acc@5",
        "image_to_text_accuracy": "img2txt acc",
        "image_to_text_accuracy_top_5": "img2txt acc@5",
        "auc-macro": "auc-macro",
        "bs-macro": "bs-macro",
        "aps-macro": "aps-macro",
    }
    for key, value in pretty_dict.items():
        if key in metric:
            return metric.replace(key, value)
    return metric


def generate_latex_tables(
    aggregated_experiments: Dict[str, Dict[str, List[float]]]
) -> Dict[str, Dict[str, List[str]]]:
    dataset_tables = collections.defaultdict(
        lambda: collections.defaultdict(list)
    )

    for experiment_name, metrics in aggregated_experiments.items():
        experiment_parts = experiment_name.split("-", 2)
        experiment_series, dataset_name, new_experiment_name = experiment_parts

        for metric, values in metrics.items():
            pretty_metric = (
                prettify_metric(metric)
                .replace("testing/ensemble_3/", "")
                .replace("-epoch-mean", "")
                .replace("_", " ")
            )
            count = len(values)
            if "NaN" in values:
                continue
            mean_value = sum(values) / count if count > 0 else None
            std_dev = np.std(values) if count > 0 else None
            dataset_tables[dataset_name][pretty_metric].append(
                (new_experiment_name, mean_value, std_dev)
            )

    return dataset_tables


def write_latex_tables_to_file(
    dataset_tables: Dict[str, Dict[str, List[str]]], output_filename: str
) -> None:
    with open(output_filename, "w") as output_file:
        for dataset_name, metrics_map in dataset_tables.items():
            output_file.write(f"% {dataset_name} dataset table\n")
            output_file.write("\\begin{table}[htbp]\n")
            output_file.write(
                f"\\caption{{Results for the {dataset_name} dataset}}\n"
            )

            header = " & ".join([metric for metric in metrics_map.keys()])
            output_file.write(
                f"\\begin{{tabular}}{{|l|{'|'.join('c' * (2 * len(metrics_map)))}|}}\\hline\n"
            )
            output_file.write(f"Experiment & {header}\\\\\n")
            output_file.write("\\hline\n")

            experiments = collections.defaultdict(dict)
            for metric, experiment_values in metrics_map.items():
                for new_experiment_name, value, std_dev in experiment_values:
                    if value is not None:
                        if metric not in experiments[new_experiment_name]:
                            experiments[new_experiment_name][metric] = []
                        experiments[new_experiment_name][metric].append(
                            (value, std_dev)
                        )

            for new_experiment_name in sorted(experiments.keys()):
                metric_values = experiments[new_experiment_name]
                row_data = [
                    f"{sum(value for value, _ in metric_values.get(metric, [])) / len(metric_values[metric]):.2f} ± {np.mean([std_dev for _, std_dev in metric_values.get(metric, [])]):.2f}"
                    if metric in metric_values
                    else "NA"
                    for metric in metrics_map.keys()
                ]
                row_data_str = " & ".join(row_data)
                output_file.write(
                    f"{new_experiment_name} & {row_data_str}\\\\\n"
                )
                output_file.write("\\hline\n")

            output_file.write("\\end{tabular}\n")
            output_file.write("\\end{table}\n\n")


dataset_tables = generate_latex_tables(aggregated_experiments)
write_latex_tables_to_file(dataset_tables, "latex_tables.tex")