In [1]:
import pandas as pd

from analysis_utils import get_run_data, process_run_data, aggregate_metrics, METRIC_NAMES

In [2]:
data, config = get_run_data("eli-carrami/Cprt-Paper-Final")
out = []
best_basis = "biochem/val_localization_f1"
use_last = False
for d, c in zip(data, config):
    llm = c['model']['value']['language_model']
    if 'gpt' in llm:
        h = d.iloc[-1].copy()
    elif use_last:
        d = d[d.epoch == 0]
        h = d.iloc[-1].copy()
    else:
        h = d.iloc[d[best_basis].idxmax()].copy()
    h['protein_layer_to_use'] = c['model']['value']['protein_layer_to_use']
    h['protein_layer_to_use'] = -1 if h['protein_layer_to_use'] == 12 else h['protein_layer_to_use']
    h['esm'] = c['model']['value']['protein_model']
    h['llm'] = llm
    h['strategy'] = c['model']['value']['multimodal_strategy']
    h['fields'] = c['datamodule']['value']['data_field_names']
    h['holder'] = c['datamodule']['value']['sequence_placeholder']
    h['latent'] = c['model']['value']['perceiver_latent_size']
    h['schedulers'] = c['model']['value'].get('schedulers', None)
    h['grad'] = c['trainer']['value'].get('accumulate_grad_batches', 1)
    h['clip'] = c['trainer']['value'].get('gradient_clip_val', 0)
    h['seed'] = c['seed']['value']
    h['subsample'] = c['datamodule']['value']['subsample_data']
    out.append(h)


serene-shadow-9 fresh-vortex-8 vivid-wood-7 magic-shape-6 avid-dust-5 helpful-aardvark-4 zany-spaceship-68 chocolate-field-67 kind-pond-66 vocal-leaf-65 hearty-darkness-18 dainty-hill-17 valiant-voice-16 logical-smoke-14 zero-shot vivid-dust-49 mild-star-13 cool-water-12 confused-dream-11 solar-pyramid-10 treasured-pyramid-5 still-sky-3 pious-puddle-2 worldly-grass-1 decent-dawn-48 ethereal-plant-47 charmed-eon-45 playful-sun-44 decent-water-43 polished-smoke-42 fancy-leaf-39 rural-forest-38 glorious-sea-45 celestial-spaceship-43 atomic-oath-39 decent-star-38 proud-sea-37 solar-voice-33 sage-tree-27 

In [44]:
df = pd.DataFrame(out).reset_index(inplace=False, drop=True)
df["avg_binary_loc_f1"] = df[[col for col in df.columns if "_in_" in col]].mean(axis=1)
df['metrics/val_perplexity'] = df['metrics/val_perplexity'].astype(float)
df = df[df.strategy == "soft-prompt"]
df = df[df.subsample == 1.0]
df = df[df["clip"] == 0]
df = df[df.grad == 1]
df = df[df.latent == 100]
# df = df[df.holder != " "]
df = df[df.fields == "qa"]
df = df[df.schedulers.apply(lambda x: str(x) == "None")]
# df["type"] = df["fields"].apply(lambda x: x[0])
df

Unnamed: 0,biochem/val_is_real_f1,biochem/val_is_enzyme_hard_f1,biochem/val_kingdom_f1,biochem/val_localization_f1,biochem/val_cofactor,biochem/val_is_fake_f1,biochem/val_mw_error,metrics/val_perplexity,metrics/val_rouge1_fmeasure,metrics/val_rouge1_precision,...,strategy,fields,holder,latent,schedulers,grad,clip,seed,subsample,avg_binary_loc_f1
0,0.988915,0.878214,0.802693,0.763233,0.495652,0.933977,0.039181,2.12798,0.768996,0.770956,...,soft-prompt,qa,,100,,1,0,7,1.0,0.271608
30,0.987051,0.876346,0.820807,0.746808,0.536797,0.321644,0.037661,2.075031,0.771896,0.773551,...,soft-prompt,qa,,100,,1,0,42,1.0,0.415482


In [45]:
model_order = ['q', 'qa', 'qa_real', 's']
var = 'fields'
ordering = (var, model_order)
df = df[df[ordering[0]].isin(ordering[1])]
df[ordering[0]] = pd.Categorical(df[ordering[0]], categories=ordering[1], ordered=True)
df.sort_values([ordering[0], 'seed'], inplace=True)
df

Unnamed: 0,biochem/val_is_real_f1,biochem/val_is_enzyme_hard_f1,biochem/val_kingdom_f1,biochem/val_localization_f1,biochem/val_cofactor,biochem/val_is_fake_f1,biochem/val_mw_error,metrics/val_perplexity,metrics/val_rouge1_fmeasure,metrics/val_rouge1_precision,...,strategy,fields,holder,latent,schedulers,grad,clip,seed,subsample,avg_binary_loc_f1
0,0.988915,0.878214,0.802693,0.763233,0.495652,0.933977,0.039181,2.12798,0.768996,0.770956,...,soft-prompt,qa,,100,,1,0,7,1.0,0.271608
30,0.987051,0.876346,0.820807,0.746808,0.536797,0.321644,0.037661,2.075031,0.771896,0.773551,...,soft-prompt,qa,,100,,1,0,42,1.0,0.415482


In [46]:
metrics_names = {k:v for k, v in METRIC_NAMES.items() if 'rouge' not in k}
agg_df = aggregate_metrics(df, group_by=var)
agg_df = agg_df[[col for col in metrics_names]]

for col, name in metrics_names.items():
    agg_df[name] = round(agg_df[(col, 'mean')], 2).astype(str) + " (" + round(agg_df[(col, 'std')], 2).astype(str) + ")"
    agg_df.drop([(col, 'mean'), (col, 'std')], axis=1, inplace=True)

agg_df.to_clipboard()
agg_df

  return df.groupby(group_by).agg(['mean', 'std'])


Unnamed: 0_level_0,is_real F1,is_enzyme F1,kingdom F1,localization F1,cofactor Recall,binary localization\naverage F1,is_fake F1,MW MALE,perplexity,in_membrane F1,in_nucleus F1,in_mitochondria F1
fields,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
q,nan (nan),nan (nan),nan (nan),nan (nan),nan (nan),nan (nan),nan (nan),nan (nan),nan (nan),nan (nan),nan (nan),nan (nan)
qa,0.99 (0.0),0.88 (0.0),0.81 (0.01),0.76 (0.01),0.52 (0.03),0.34 (0.1),0.63 (0.43),0.04 (0.0),2.1 (0.04),0.23 (0.02),0.52 (0.2),0.28 (0.12)
qa_real,nan (nan),nan (nan),nan (nan),nan (nan),nan (nan),nan (nan),nan (nan),nan (nan),nan (nan),nan (nan),nan (nan),nan (nan)
s,nan (nan),nan (nan),nan (nan),nan (nan),nan (nan),nan (nan),nan (nan),nan (nan),nan (nan),nan (nan),nan (nan),nan (nan)
