# Matching Runs without pretraining


In [None]:
import sys
import pandas as pd
import wandb

import matplotlib.pyplot as plt

import os

sys.path.append("/".join(os.getcwd().split("/")[:-2]))

api = wandb.Api()

from matching.config import Settings

# Configuration

Please set the slurmIds of the runs you want to compare here.


In [None]:
compare_ids = [1064635, 1064640, 1064113]
settings = Settings()

# Loading run data


In [None]:
def calc_f1(precision, recall):
    return (2 * (precision * recall) / (precision + recall)).round(3)


colour_train = "#F79647"
colour_pretrain = "#5D97BF"


plt.rcParams.update({"font.size": 20})
plt.rcParams.update({"axes.titlesize": 20})

## Models not using pretraining

Loads all data of runs that do not use pretraining and calculates their average performance.


In [None]:
runs = api.runs(f"{settings.wandb_entity}/{settings.wandb_project}")

name_list = []
precision, recall, f1, best_threshold = [], [], [], []
seeds = []
dropout = []
epochs = []
optimizer = []

for run in runs:
    if not run.state == "finished":
        continue
    summary = run.summary._json_dict
    if "test_precision" not in summary:
        continue
    if "test_recall" not in summary:
        continue
    if "test_f1" not in summary:
        continue
    if "best_threshold" not in summary:
        summary["best_threshold"] = 0.5

    name_list.append(run.name)

    precision.append(summary["test_precision"])
    recall.append(summary["test_recall"])
    f1.append(summary["test_f1"])
    best_threshold.append(summary["best_threshold"])
    seeds.append(run.name.split("seed=")[1].split("_")[0])
    if "drop" in run.config:
        dropout.append(run.config["drop"])
    else:
        dropout.append(0.0)

    if "epochs" in run.config:
        epochs.append(run.config["epochs"])
    else:
        if "n_epochs" in run.config:
            epochs.append(run.config["n_epochs"])
        else:
            epochs.append(10)

    if "adamw" in run.config and run.config["adamw"] == True:
        optimizer.append("adamw")
    else:
        optimizer.append("adam")


runs_df = pd.DataFrame(
    {
        "name": name_list,
        "precision": precision,
        "recall": recall,
        "f1": f1,
        "best_threshold": best_threshold,
        "seed": seeds,
        "epochs": epochs,
        "optimizer": optimizer,
        "dropout": dropout,
    }
)

lm_type = [
    "roberta",
    "distilbert",
]

runs_df["lm"] = runs_df["name"].apply(
    lambda x: next((lm for lm in lm_type if lm in x), "unknown")
)
runs_df["name"] = runs_df.apply(lambda row: f"{row['name']}", axis=1)

runs_df["run_id"] = runs_df["name"].apply(lambda x: x.split("slurmId=")[1])


runs_df["model"] = runs_df["name"].apply(lambda x: x.split("_")[0])

runs_df.sort_values("run_id", ascending=False, inplace=True)

runs_df["run_id"] = runs_df["run_id"].astype(int)

runs_df = runs_df[runs_df["run_id"].isin(compare_ids)]

In [None]:
avg_precision = runs_df.groupby("run_id")["precision"].mean().round(3)
avg_recall = runs_df.groupby("run_id")["recall"].mean().round(3)

pd.concat([avg_precision, avg_recall], axis=1)

f1 = pd.DataFrame(
    index=runs_df["run_id"].unique(),
)

f1["f1"] = (2 * (avg_precision * avg_recall) / (avg_precision + avg_recall)).round(3)

f1["avg_precision"] = avg_precision
f1["avg_recall"] = avg_recall


f1["model"] = runs_df.groupby("run_id")["model"].first()
f1["lm"] = runs_df.groupby("run_id")["lm"].first()

f1["run_id"] = f1.index
f1["epochs"] = runs_df.groupby("run_id")["epochs"].first()
f1

## Models using pretraining

Loads all data of runs that use pretraining and calculates their average performance.


In [None]:
runs = api.runs(f"{settings.wandb_entity}/{settings.wandb_pretrain_project}")

name_list = []
precision, recall, f1, best_threshold = [], [], [], []
batch_sizes, comment = [], []
linear = []
epochs = []
seeds = []
pretrain_epochs = []
dropout = []


for run in runs:
    if not run.state == "finished":
        continue
    summary = run.summary._json_dict
    if "test_precision" not in summary:
        continue
    if "test_recall" not in summary:
        continue
    if "test_f1" not in summary:
        continue
    if "best_threshold" not in summary:
        summary["best_threshold"] = 0.5

    if not "slurmId=" in run.name:
        continue

    name_list.append(run.name)

    precision.append(summary["test_precision"])
    recall.append(summary["test_recall"])
    f1.append(summary["test_f1"])
    best_threshold.append(summary["best_threshold"])

    config = run.config
    if "batch_size" in config:
        batch_sizes.append(config["batch_size"])
    else:
        batch_sizes.append(32)
    if "comment" in config:
        comment.append(config["comment"])
    else:
        comment.append(None)

    if "linear" in config:
        linear.append(config["linear"])
    else:
        linear.append(False)

    if "epochs" in config:
        epochs.append(config["epochs"])
    else:
        epochs.append(10)

    if "pretrain_epochs" in config:
        pretrain_epochs.append(config["pretrain_epochs"])
    else:
        pretrain_epochs.append(10)

    if "dropout" in config:
        dropout.append(config["dropout"])
    else:
        dropout.append(0.0)

    seeds.append(run.name.split("seed=")[1].split("_")[0])


pretrain_runs_df = pd.DataFrame(
    {
        "name": name_list,
        "precision": precision,
        "recall": recall,
        "f1": f1,
        "best_threshold": best_threshold,
        "batch_size": batch_sizes,
        "comment": comment,
        "linear": linear,
        "seed": seeds,
        "epochs": epochs,
        "pretrain_epochs": pretrain_epochs,
        "dropout": dropout,
    }
)

lm_type = [
    "roberta",
    "distilbert",
]

pretrain_runs_df["lm"] = pretrain_runs_df["name"].apply(
    lambda x: next((lm for lm in lm_type if lm in x), "unknown")
)


pretrain_runs_df["run_id"] = pretrain_runs_df["name"].apply(
    lambda x: x.split("slurmId=")[1]
)


pretrain_runs_df["model"] = pretrain_runs_df["name"].apply(lambda x: x.split("_")[0])

pretrain_runs_df["run_id"] = pretrain_runs_df["run_id"].astype(int)

pretrain_runs_df.sort_values("run_id", ascending=False, inplace=True)


pretrain_runs_df["run_id"] = pretrain_runs_df["run_id"].astype(int)
pretrain_runs_df = pretrain_runs_df[pretrain_runs_df["run_id"].isin(compare_ids)]
pretrain_runs_df.head()

In [None]:
avg_precision = pretrain_runs_df.groupby("run_id")["precision"].mean().round(3)
avg_recall = pretrain_runs_df.groupby("run_id")["recall"].mean().round(3)

pretrain_f1 = pd.DataFrame(
    index=pretrain_runs_df["run_id"].unique(),
)

pretrain_f1["f1"] = calc_f1(avg_precision, avg_recall)

pretrain_f1["avg_precision"] = avg_precision
pretrain_f1["avg_recall"] = avg_recall


pretrain_f1["model"] = pretrain_runs_df.groupby("run_id")["model"].first()
pretrain_f1["lm"] = pretrain_runs_df.groupby("run_id")["lm"].first()
pretrain_f1["run_id"] = pretrain_f1.index

pretrain_f1

# Comparison


In [None]:
joined = runs_df.merge(
    pretrain_runs_df, on=["run_id", "seed", "epochs"], suffixes=("", "_pretrain")
)

relevant = joined[joined["run_id"].isin(compare_ids)]


# Reorder the columns in the relevant DataFrame
column_order = [
    "lm",
    "f1",
    "f1_pretrain",
    "precision",
    "precision_pretrain",
    "recall",
    "recall_pretrain",
    "best_threshold",
    "best_threshold_pretrain",
    "comment",
    "linear",
    "pretrain_epochs",
    "lm_pretrain",
    "run_id",
    "seed",
    "batch_size",
    "dropout",
    "epochs",
]
relevant = relevant[column_order]


relevant

In [None]:
grouped = relevant.groupby("run_id")

grouped_data = grouped.agg(
    {
        "precision": "mean",
        "recall": "mean",
        "precision_pretrain": "mean",
        "recall_pretrain": "mean",
        "f1": "mean",
        "best_threshold": "mean",
        "batch_size": "first",
        "comment": "first",
        "linear": "first",
        "pretrain_epochs": "first",
        "epochs": "first",
        "lm": "first",
        "dropout": "first",
    }
)

grouped_data = grouped_data.reset_index()


avg_precision = grouped_data["precision"]
avg_recall = grouped_data["recall"]

avg_precision_pretrain = grouped_data["precision_pretrain"]
avg_recall_pretrain = grouped_data["recall_pretrain"]

f1 = calc_f1(avg_precision, avg_recall)
f1_pretrain = calc_f1(avg_precision_pretrain, avg_recall_pretrain)

joint_f1 = pd.concat(
    [avg_precision, avg_recall, avg_precision_pretrain, avg_recall_pretrain], axis=1
)


joint_f1["f1"] = f1
joint_f1["f1_pretrain"] = f1_pretrain
joint_f1["percent_improvement"] = ((f1_pretrain - f1) / f1) * 100
joint_f1["absolute_improvement"] = f1_pretrain - f1

joint_f1["dropout"] = grouped_data["dropout"]
joint_f1["epochs"] = grouped_data["epochs"]
joint_f1["pretrain_epochs"] = grouped_data["pretrain_epochs"]
joint_f1["batch_size"] = grouped_data["batch_size"]
joint_f1["lm"] = grouped_data["lm"]

joint_f1["run_id"] = grouped_data["run_id"]
joint_f1.set_index("run_id", inplace=True)


joint_f1

# Loss curves

Extracts data on the loss curves of a particular run.


In [None]:
import pandas as pd
import wandb

api = wandb.Api()

slurmId = "1064640"
seed = "0"

runs = api.runs("bp2023fn1-kunstgraph/Matching-Pretrain")
matching_runs = []
train_losses = []


for run in runs:
    # if not run.state == "finished":
    #     continue

    if f"slurmId={slurmId}" in run.name and f"seed={seed}" in run.name:
        history = run.history()
        if "train_loss" in history.columns:
            train_loss = history["train_loss"].dropna().tolist()
            train_losses.append(
                {
                    "run_id": run.id,
                    "train_loss": train_loss,
                    "pretrain_loss": history["pretrain_loss"].dropna().tolist(),
                    "type": (
                        "Pretrain" if "Matching-Pretrain" in run.project else "Regular"
                    ),
                }
            )

runs = api.runs("bp2023fn1-kunstgraph/Matching")

for run in runs:
    if not run.state == "finished":
        continue
    if f"slurmId={slurmId}" in run.name and f"seed={seed}" in run.name:

        history = run.history()
        if "train_loss" in history.columns:
            train_loss = history["train_loss"].dropna().tolist()
            train_losses.append(
                {
                    "run_id": run.name,
                    "train_loss": train_loss,
                    "type": (
                        "Pretrain" if "Matching-Pretrain" in run.project else "Regular"
                    ),
                }
            )

losses = pd.DataFrame(train_losses)
losses.set_index("run_id", inplace=True)
losses

## Similarity Learning loss curve


In [None]:
import matplotlib.pyplot as plt


plt.figure(figsize=(6.5, 5))

for idx, row in losses.iterrows():
    if row["type"] == "Pretrain":
        plt.plot(row["train_loss"], label=f"P-Ditto-D")
    else:
        plt.plot(row["train_loss"], label=f"Ditto-D")


plt.ylabel("Loss")
plt.xlabel("Step")

plt.legend(loc="upper right")
plt.ylim(0, 1)

plt.tight_layout()
plt.savefig("../../data/output/plots/loss_curve.pdf")
plt.show()

## Pretrain loss curve


In [None]:
import matplotlib.pyplot as plt


plt.figure(figsize=(6.5, 5))

for idx, row in losses.iterrows():
    if row["type"] == "Pretrain":
        plt.plot(row["pretrain_loss"], label=f"P-Ditto-D")

plt.ylabel("Loss")
plt.xlabel("Step")

plt.legend()
plt.tight_layout()

plt.xticks()
plt.yticks()

plt.savefig("../../data/output/plots/pretrain_loss_curve.pdf")
plt.show()