In [91]:
from result_utils import *
import pandas as pd 
import wandb
import matplotlib.pyplot as plt
import os
import yaml
import zipfile
import json

In [92]:
entity = "praca-inzynierska"
project = "final-experiments" 


api = wandb.Api()
runs = api.runs(f"{entity}/{project}")

In [93]:
def zip_folder(folder_path, zip_name):
    with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                full_path = os.path.join(root, file)
                arcname = os.path.relpath(full_path, folder_path)
                zipf.write(full_path, arcname)

In [None]:
metrics_max_in_step = ["train/gpu_mem_peak_mb"]
metrics_ignore = ["train/gpu_mem_peak_mb","train/update_skipped", "train/is_update_step","train/accum_step",
                  "train/lr","train/loss","train/grad_norm"]

def save_run_history_wide_csv_and_max_txt(run, base_dir):
    run_dir_name = f"{run.name}_{run.id}"
    run_dir = os.path.join(base_dir, run_dir_name)
    os.makedirs(run_dir, exist_ok=True)

    history = run.history(samples=100000)
    metric_columns = [c for c in history.columns if not c.startswith("_") and c not in metrics_ignore]

    if metric_columns:
        wide_df = history[["_step", "_runtime"] + metric_columns].copy()

        wide_df = wide_df.dropna(subset=metric_columns, how="all")

        csv_path = os.path.join(run_dir, "history_all_metrics.csv.gz")
        wide_df.to_csv(csv_path, index=False, na_rep='', compression='gzip')
        print(f"Saved wide CSV (gzip) for run {run.name} ({run.id})")
    else:
        print(f"No metrics to save for run {run.name} ({run.id})")

    for m in metrics_max_in_step:
        txt_path = os.path.join(run_dir, f"gpu_mem_and_runtime.txt")
        if m in history.columns:
            max_val = history[m].max(skipna=True)
        else:
            max_val = "N/A"

        try:
            runtime = json.loads(run.summary._json_dict).get("_runtime")
            if isinstance(runtime, str):
                runtime = float(runtime)
        except Exception:
            runtime = "N/A"

        with open(txt_path, "w", encoding="utf-8") as f:
            f.write(f"peak_gpu_mem_mb: {max_val}\n")
            f.write(f"runtime_s: {runtime}\n")

    try:
        config_file = run.file("config.yaml")
        config_file.download(root=run_dir, replace=True)
    except Exception:
        print(f"config.yaml not found for run {run.name} ({run.id})")


In [95]:
for run in runs:
    save_run_history_wide_csv_and_max_txt(run, './experiments')

Saved wide CSV (gzip) for run E1_pretraining_wikipedia_bertsmall_mha (hd228t3k)
Saved wide CSV (gzip) for run E1_pretraining_imdb_bertsmall_mha (ghydwtq8)
Saved wide CSV (gzip) for run E1_finetuning_imdb_bertsmall_mha_f0_d0.1_cls (gnty8r83)
Saved wide CSV (gzip) for run E1_finetuning_imdb_bertsmall_mha_f0_d0.1_mean (dz8ut8xo)
Saved wide CSV (gzip) for run E1_finetuning_imdb_bertsmall_mha_f0_d0.2_cls (xzv6zdrh)
Saved wide CSV (gzip) for run E1_finetuning_imdb_bertsmall_mha_f0_d0.2_mean (izheqn5h)
Saved wide CSV (gzip) for run E1_finetuning_imdb_bertsmall_mha_f1_d0.1_cls (y6w5l42k)
Saved wide CSV (gzip) for run E1_finetuning_imdb_bertsmall_mha_f1_d0.1_mean (dtucpxrs)
Saved wide CSV (gzip) for run E1_finetuning_imdb_bertsmall_mha_f1_d0.2_cls (gkglmyzr)
Saved wide CSV (gzip) for run E1_finetuning_imdb_bertsmall_mha_f1_d0.2_mean (haph0z0a)
Saved wide CSV (gzip) for run E1_finetuning_imdb_bertsmall_mha_f2_d0.1_cls (dqnki4qe)
Saved wide CSV (gzip) for run E1_finetuning_imdb_bertsmall_mha_f2_d

KeyboardInterrupt: 

In [None]:
zip_folder('./experiments', 'experiments.zip')

In [None]:
file_path = "./experiments/E1_pretraining_wikipedia_bertsmall_mha_hd228t3k/history_all_metrics.csv.gz"

df = pd.read_csv(file_path)

# df['test/accuracy'].loc[~df['test/accuracy'].isna()] 

In [None]:
df.head()

Unnamed: 0,_step,_runtime,train/epoch,train/avg_epoch_loss
0,9375,528.77337,1.0,4.034514
1,18750,995.020051,2.0,2.876916
2,28125,1460.989766,3.0,2.64707
3,37500,1927.875223,4.0,2.518578
4,46875,2394.698494,5.0,2.423966
