In [1]:
%load_ext autoreload
%autoreload 2

import sys

sys.path.append("../")

In [2]:
RUNS_PATH = "/media/christoph/HDD/experiments/multi_coner_remote/multiruns/"

In [3]:
import json
import wandb

from pathlib import Path
from omegaconf import OmegaConf
from itertools import chain

In [93]:
def runs_as_dicts(path: str):
    path = Path(path)
    
    run_directories = [p.parent for p in path.glob("**/config_tree.txt")]
    
    run_dicts = []
    for directory in run_directories:
        hydra_directory = directory / ".hydra"
        wandb_directory = directory / "wandb"
        
        hydra_config_file = hydra_directory / "config.yaml"
        
        assert hydra_config_file.exists()
        
        wandb_run_directories = list(wandb_directory.glob("run-*"))
        
        if len(wandb_run_directories) != 1:
            print("No wandb run directory found in ", directory)
            continue
        
        wandb_run_directory = wandb_run_directories[0]
        
        wandb_run_id = wandb_run_directory.name.split("-")[-1]
        
        wandb_files_directory = wandb_run_directory / "files"
        
        wandb_metadata_file = wandb_files_directory / "wandb-metadata.json"
        wandb_summary_file = wandb_files_directory / "wandb-summary.json"
        
        assert wandb_metadata_file.exists()
        assert wandb_summary_file.exists()
        
        validation_file = directory / "checkpoints" / "validation_result.json"
        augmented_validation_file = directory / "checkpoints" / "augm_validation_result.json"
        
        assert validation_file.exists()
        assert augmented_validation_file.exists()
        
        hydra_config = OmegaConf.load(hydra_config_file)
        
        with wandb_metadata_file.open() as f:
            wandb_metadata = json.load(f)
        
        with wandb_summary_file.open() as f:
            wandb_summary = json.load(f)
        
        with validation_file.open() as f:
            validation_result = json.load(f)
        
        with augmented_validation_file.open() as f:
            augmented_validation_result = json.load(f)
        
        run_dict = {
            "wandb": {
                "id": wandb_run_id,
                "metadata": wandb_metadata,
                "summary": wandb_summary,
            },
            "hydra": OmegaConf.to_container(hydra_config, resolve=False),
            "validation": {
                "original": validation_result,
                "augmented": augmented_validation_result,
            }
        }
        
        run_dicts.append(run_dict)
    
    return run_dicts

In [94]:
run_dicts = runs_as_dicts(RUNS_PATH)

No wandb run directory found in  /media/christoph/HDD/experiments/multi_coner_remote/multiruns/2022-02-10_20-35-49/29
No wandb run directory found in  /media/christoph/HDD/experiments/multi_coner_remote/multiruns/2022-02-10_20-35-49/10
No wandb run directory found in  /media/christoph/HDD/experiments/multi_coner_remote/multiruns/2022-02-10_20-35-49/11
No wandb run directory found in  /media/christoph/HDD/experiments/multi_coner_remote/multiruns/2022-02-10_20-35-49/12
No wandb run directory found in  /media/christoph/HDD/experiments/multi_coner_remote/multiruns/2022-02-10_20-35-49/13
No wandb run directory found in  /media/christoph/HDD/experiments/multi_coner_remote/multiruns/2022-02-10_20-35-49/14
No wandb run directory found in  /media/christoph/HDD/experiments/multi_coner_remote/multiruns/2022-02-10_20-35-49/15
No wandb run directory found in  /media/christoph/HDD/experiments/multi_coner_remote/multiruns/2022-02-10_20-35-49/24
No wandb run directory found in  /media/christoph/HDD/ex

In [95]:
import pandas as pd

In [123]:
def run_dicts_to_dataframe(run_dicts):
    table = []
    for run_dict in run_dicts:
        table_row = {"id": run_dict["wandb"]["id"]}
        
        validation_metrics = [(f"val/orig/{key}", value) for key, value in run_dict["validation"]["original"]["macro avg"].items()]
        
        table_row.update(validation_metrics)
        
        augmented_validation_metrics = [(f"val/augm/{key}", value) for key, value in run_dict["validation"]["augmented"]["macro avg"].items()]
    
        table_row.update(augmented_validation_metrics)
        
        wandb_metrics = [(metric, val) for metric, val in run_dict["wandb"]["summary"].items() if not metric.startswith("_")]

        table_row.update(wandb_metrics)
    
        hydra_config = run_dict["hydra"]
    
        table_row["batch_size"] = hydra_config["datamodule"]["batch_size"]
        
        hydra_metrics = [(f"{config_key}_target", val) if key == "_target_" else (key, val) for config_key in ["datamodule", "taskmodule", "model"] for key, val in hydra_config[config_key].items()]
    
        table_row.update(hydra_metrics)
    
        table.append(table_row)
    
    return pd.DataFrame(table)

In [124]:
df = run_dicts_to_dataframe(run_dicts)

In [140]:
df.sort_values(by="val/f1", ascending=False)[:50][[
    "id", "val/f1", "val/orig/f1-score", "val/augm/f1-score", "train/f1", "batch_size", "learning_rate", "task_learning_rate", "wiki_to_vec_file",
    "gazetteer_add_output_features", "gazetteer_add_input_tokens", "gazetteer_path", "augment_input", "augment_input_prob", "use_mlp",
    "mlp_hidden_dim", "mlp_num_layers", "model_name_or_path", "use_span_length_embedding", "use_language_model"]]

Unnamed: 0,id,val/f1,val/orig/f1-score,val/augm/f1-score,train/f1,batch_size,learning_rate,task_learning_rate,wiki_to_vec_file,gazetteer_add_output_features,gazetteer_add_input_tokens,gazetteer_path,augment_input,augment_input_prob,use_mlp,mlp_hidden_dim,mlp_num_layers,model_name_or_path,use_span_length_embedding,use_language_model
77,189dh04u,0.910423,0.903801,0.892559,0.945358,16,1e-05,5e-05,/vol/home-vol2/ml/altchris/projects/multi-cone...,True,True,/vol/home-vol2/ml/altchris/projects/multi-cone...,True,0.3,True,1024,2,google/electra-large-discriminator,,
8,tkoj95qs,0.909843,0.90048,0.884286,0.892003,16,1e-05,0.0001,/vol/home-vol2/ml/altchris/projects/multi-cone...,True,True,/vol/home-vol2/ml/altchris/projects/multi-cone...,True,0.2,True,1024,2,google/electra-large-discriminator,,
123,30h3uizb,0.899585,0.888922,0.833542,0.988594,32,1e-05,5e-05,/vol/home-vol2/ml/altchris/projects/multi-cone...,False,False,,False,0.2,True,1024,2,google/electra-large-discriminator,True,True
121,10cd4j62,0.896945,0.890265,0.833772,0.985201,32,1e-05,5e-05,/vol/home-vol2/ml/altchris/projects/multi-cone...,False,False,,False,0.2,True,1024,2,google/electra-large-discriminator,True,True
26,34bizri5,0.891947,0.885088,0.829004,0.974973,32,1e-05,0.0001,/vol/home-vol2/ml/altchris/projects/multi-cone...,False,False,/vol/home-vol2/ml/altchris/projects/multi-cone...,False,0.2,True,1024,2,google/electra-large-discriminator,,
19,16mclf9r,0.891033,0.886239,0.840657,0.924516,32,5e-05,1e-05,/vol/home-vol2/ml/altchris/projects/multi-cone...,True,False,/vol/home-vol2/ml/altchris/projects/multi-cone...,True,0.2,True,1024,2,google/electra-large-discriminator,,
20,1yfizu0u,0.890631,0.881313,0.827188,0.97166,32,1e-05,1e-05,/vol/home-vol2/ml/altchris/projects/multi-cone...,True,False,/vol/home-vol2/ml/altchris/projects/multi-cone...,False,0.2,True,1024,2,google/electra-large-discriminator,,
35,2iwqh41v,0.889072,0.879458,0.842795,0.98181,32,5e-05,0.0001,/vol/home-vol2/ml/altchris/projects/multi-cone...,False,False,/vol/home-vol2/ml/altchris/projects/multi-cone...,False,0.2,True,1024,2,google/electra-large-discriminator,,
14,3ktl34lz,0.888166,0.87465,0.817828,0.963562,32,1e-05,1e-05,/vol/home-vol2/ml/altchris/projects/multi-cone...,False,False,/vol/home-vol2/ml/altchris/projects/multi-cone...,False,0.2,True,1024,2,google/electra-large-discriminator,,
27,1nwj2ove,0.886831,0.876612,0.835322,0.855188,32,1e-05,0.0001,/vol/home-vol2/ml/altchris/projects/multi-cone...,False,False,/vol/home-vol2/ml/altchris/projects/multi-cone...,True,0.2,True,1024,2,google/electra-large-discriminator,,


In [137]:
df.columns

Index(['id', 'val/orig/f1-score', 'val/orig/precision', 'val/orig/recall',
       'val/orig/support', 'val/augm/f1-score', 'val/augm/precision',
       'val/augm/recall', 'val/augm/support', 'train/loss_step', 'epoch',
       'trainer/global_step', 'val/loss', 'val/f1', 'train/loss_epoch',
       'train/f1', 'batch_size', 'datamodule_target', 'data_dir', 'name',
       'num_workers', 'pin_memory', 'taskmodule_target',
       'tokenizer_name_or_path', 'entity_annotation', 'padding', 'truncation',
       'max_length', 'pad_to_multiple_of', 'label_pad_token_id', 'label_to_id',
       'max_span_length', 'wiki_to_vec_file', 'gazetteer_path',
       'gazetteer_add_input_tokens', 'gazetteer_add_output_features',
       'model_target', 'model_name_or_path', 'learning_rate',
       'task_learning_rate', 'warmup_proportion', 'ignore_index',
       'span_length_embedding_dim', 'freeze_model', 'layer_mean',
       'augment_input', 'augment_input_prob', 'use_mlp', 'mlp_hidden_dim',
       'mlp_num_

In [98]:
df.sort_values(by="val/f1", ascending=False)[:50][["id", "val/f1", "train/f1", "batch_size", "learning_rate", "task_learning_rate", "wiki_to_vec_file", "gazetteer_add_output_features", "gazetteer_add_input_tokens", "gazetteer_path", "augment_input", "augment_input_prob", "use_mlp", "mlp_hidden_dim", "mlp_num_layers", "model_name_or_path"]]

Unnamed: 0,id,val/f1,train/f1,batch_size,learning_rate,task_learning_rate,wiki_to_vec_file,gazetteer_add_output_features,gazetteer_add_input_tokens,gazetteer_path,augment_input,augment_input_prob,use_mlp,mlp_hidden_dim,mlp_num_layers,model_name_or_path
77,189dh04u,0.910423,0.945358,16,1e-05,5e-05,/vol/home-vol2/ml/altchris/projects/multi-cone...,True,True,/vol/home-vol2/ml/altchris/projects/multi-cone...,True,0.3,True,1024,2,google/electra-large-discriminator
8,tkoj95qs,0.909843,0.892003,16,1e-05,0.0001,/vol/home-vol2/ml/altchris/projects/multi-cone...,True,True,/vol/home-vol2/ml/altchris/projects/multi-cone...,True,0.2,True,1024,2,google/electra-large-discriminator
123,30h3uizb,0.899585,0.988594,32,1e-05,5e-05,/vol/home-vol2/ml/altchris/projects/multi-cone...,False,False,,False,0.2,True,1024,2,google/electra-large-discriminator
121,10cd4j62,0.896945,0.985201,32,1e-05,5e-05,/vol/home-vol2/ml/altchris/projects/multi-cone...,False,False,,False,0.2,True,1024,2,google/electra-large-discriminator
26,34bizri5,0.891947,0.974973,32,1e-05,0.0001,/vol/home-vol2/ml/altchris/projects/multi-cone...,False,False,/vol/home-vol2/ml/altchris/projects/multi-cone...,False,0.2,True,1024,2,google/electra-large-discriminator
19,16mclf9r,0.891033,0.924516,32,5e-05,1e-05,/vol/home-vol2/ml/altchris/projects/multi-cone...,True,False,/vol/home-vol2/ml/altchris/projects/multi-cone...,True,0.2,True,1024,2,google/electra-large-discriminator
20,1yfizu0u,0.890631,0.97166,32,1e-05,1e-05,/vol/home-vol2/ml/altchris/projects/multi-cone...,True,False,/vol/home-vol2/ml/altchris/projects/multi-cone...,False,0.2,True,1024,2,google/electra-large-discriminator
35,2iwqh41v,0.889072,0.98181,32,5e-05,0.0001,/vol/home-vol2/ml/altchris/projects/multi-cone...,False,False,/vol/home-vol2/ml/altchris/projects/multi-cone...,False,0.2,True,1024,2,google/electra-large-discriminator
14,3ktl34lz,0.888166,0.963562,32,1e-05,1e-05,/vol/home-vol2/ml/altchris/projects/multi-cone...,False,False,/vol/home-vol2/ml/altchris/projects/multi-cone...,False,0.2,True,1024,2,google/electra-large-discriminator
27,1nwj2ove,0.886831,0.855188,32,1e-05,0.0001,/vol/home-vol2/ml/altchris/projects/multi-cone...,False,False,/vol/home-vol2/ml/altchris/projects/multi-cone...,True,0.2,True,1024,2,google/electra-large-discriminator


In [78]:
df.columns

Index(['train/loss_step', 'epoch', 'trainer/global_step', 'val/loss', 'val/f1',
       'train/loss_epoch', 'train/f1', 'batch_size', 'datamodule_target',
       'data_dir', 'name', 'num_workers', 'pin_memory', 'taskmodule_target',
       'tokenizer_name_or_path', 'entity_annotation', 'padding', 'truncation',
       'max_length', 'pad_to_multiple_of', 'label_pad_token_id', 'label_to_id',
       'max_span_length', 'wiki_to_vec_file', 'gazetteer_path',
       'gazetteer_add_input_tokens', 'gazetteer_add_output_features',
       'model_target', 'model_name_or_path', 'learning_rate',
       'task_learning_rate', 'warmup_proportion', 'ignore_index',
       'span_length_embedding_dim', 'freeze_model', 'layer_mean',
       'augment_input', 'augment_input_prob', 'use_mlp', 'mlp_hidden_dim',
       'mlp_num_layers'],
      dtype='object')