In [None]:
dataset = "zhao"
metric = "auroc_scores"
naive_models = ["InDegreeTargetSorter", "OutDegreeSourceSorter", "CorrelationDecoder", "HarmonicDegreeSorter"]

import wandb
api = wandb.Api()
p = api.project('GRN_inference', entity='scialdonelab')
sweeps = p.sweeps()

import sys
sys.path.append('..')

In [None]:
models_selected = {
    "GATv2Conv + MLPDecoder": {'mplayer': 'GATv2Conv', 'decoder': 'MLPDecoder', 'encoder': 'GNNEncoder'}, 
    "GCNConv + MLPDecoder": {'mplayer': 'GCNConv', 'decoder': 'MLPDecoder', 'encoder': 'GNNEncoder'}, 
    "GCNConv + InnerProductDecoder": {'mplayer': 'GCNConv', 'decoder': 'InnerProductDecoder', 'encoder': 'GNNEncoder'},
    "GCNConv only": {'mplayer': 'GCNConv', 'decoder': 'MLPDecoder', 'encoder': 'GNNGraphOnlyEncoder'},
}

def get_top_runs(sweep, n):
    runs = sweep.runs
    runs = [run for run in runs if run.state == "finished"]
    first_runs = sorted(runs, key=lambda run: run.summary.get("_timestamp", 0), reverse=False)[:250]
    top_runs = sorted(first_runs, key=lambda run: run.summary.get("val_average_precision_score", 0), reverse=True)[:n]
    return top_runs

sweep_top = {}
for sweep in sweeps:
    sweep = api.sweep(f"scialdonelab/GRN_inference/{sweep.id}")
    if sweep.config["parameters"]["dataset"]['value'] == dataset and sweep.config["parameters"]["negative_sampling"]['value'] in ["random", "structured_tail", "degree_aware"]:
        sweep_encoder = sweep.config["parameters"]["encoder"]['value']
        if sweep_encoder == 'GAE_Encoder':
            sweep_encoder = 'GNNEncoder'
        sweep_model = {
            'mplayer': sweep.config["parameters"]["mplayer"]['value'],
            'decoder': sweep.config["parameters"]["decoder"]['value'],
            'encoder': sweep_encoder
        }
        if any(model == sweep_model for model in models_selected.values()):
            id_string = f"{sweep_model['decoder']}_{sweep_model['mplayer']}_{sweep_model['encoder']}_{sweep.config['parameters']['negative_sampling']['value']}"
            top_run = get_top_runs(sweep, n=1)
            if id_string not in sweep_top:
                sweep_top[id_string] = top_run[0]
                sweep_top[f"{id_string}_n_runs"] = len(sweep.runs)
            else:
                prev_n_runs = sweep_top.get(f"{id_string}_n_runs", 0)
                cur_n_runs = len(sweep.runs)
                if cur_n_runs > prev_n_runs:
                    sweep_top[id_string] = top_run[0]
                    sweep_top[f"{id_string}_n_runs"] = cur_n_runs


In [3]:
for key in sorted(sweep_top):
    if not key.endswith('_n_runs'):
        run = sweep_top[key]
        print(f"Model: {key}, Run ID: {run.id}")

run_ids = []
for key in sorted(sweep_top):
    if not key.endswith('_n_runs'):
        run = sweep_top[key]
        run_ids.append(run.id)
print("IDs",run_ids)

Model: InnerProductDecoder_GCNConv_GNNEncoder_degree_aware, Run ID: df54ey1e
Model: InnerProductDecoder_GCNConv_GNNEncoder_random, Run ID: njl162p1
Model: InnerProductDecoder_GCNConv_GNNEncoder_structured_tail, Run ID: 00mdnkwn
Model: MLPDecoder_GATv2Conv_GNNEncoder_degree_aware, Run ID: 53spb369
Model: MLPDecoder_GATv2Conv_GNNEncoder_random, Run ID: zgy49z99
Model: MLPDecoder_GATv2Conv_GNNEncoder_structured_tail, Run ID: gzxy5164
Model: MLPDecoder_GCNConv_GNNEncoder_degree_aware, Run ID: 4rvc133w
Model: MLPDecoder_GCNConv_GNNEncoder_random, Run ID: w64aad0a
Model: MLPDecoder_GCNConv_GNNEncoder_structured_tail, Run ID: 91111gr1
Model: MLPDecoder_GCNConv_GNNGraphOnlyEncoder_degree_aware, Run ID: ypm5rzod
Model: MLPDecoder_GCNConv_GNNGraphOnlyEncoder_random, Run ID: s4khp2rv
Model: MLPDecoder_GCNConv_GNNGraphOnlyEncoder_structured_tail, Run ID: 38qxu1ve
IDs ['df54ey1e', 'njl162p1', '00mdnkwn', '53spb369', 'zgy49z99', 'gzxy5164', '4rvc133w', 'w64aad0a', '91111gr1', 'ypm5rzod', 's4khp2rv',

In [None]:
def get_naive_run(decoder, sampling):
    runs = api.runs(
        f"scialdonelab/GRN_inference", 
        filters={"config.model": "NaiveModel", "group": None}, 
        order = "+created_at"
    )
    run = [
        run for run in runs 
        if run.config.get("decoder") == decoder 
        and run.config.get("negative_sampling") == sampling
        and run.config.get("dataset") == dataset
    ][0]
    return run

naive_runs = {}
for decoder in naive_models:
    for sampling in ["random", "structured_tail", "degree_aware"]:
        run = get_naive_run(decoder, sampling)
        naive_runs[f"{decoder}_{sampling}"] = run

In [5]:
run_idstrings = []
for run_id, run in naive_runs.items():
    decoder = run.config.get('decoder', '')
    sampling = run.config.get('negative_sampling', '')
    id_string = f"{decoder}_{sampling}"
    run_idstrings.append((id_string, run))

for id_string, run in sorted(run_idstrings, key=lambda x: x[0]):
    print(f"Run ID: {run.id}, Model: {id_string}")
    
naive_run_ids = [run.id for run in naive_runs.values()]
print("Naive run ids:", naive_run_ids)


Run ID: yn3onvws, Model: CorrelationDecoder_degree_aware
Run ID: 9653b013, Model: CorrelationDecoder_random
Run ID: hnuaaieq, Model: CorrelationDecoder_structured_tail
Run ID: n5r24sba, Model: HarmonicDegreeSorter_degree_aware
Run ID: cf0g5o3z, Model: HarmonicDegreeSorter_random
Run ID: 4p6z3ofx, Model: HarmonicDegreeSorter_structured_tail
Run ID: ulqvoni1, Model: InDegreeTargetSorter_degree_aware
Run ID: ivk0ydr6, Model: InDegreeTargetSorter_random
Run ID: 9uqudp73, Model: InDegreeTargetSorter_structured_tail
Run ID: 3kiybyyz, Model: OutDegreeSourceSorter_degree_aware
Run ID: kyuvtba8, Model: OutDegreeSourceSorter_random
Run ID: irpza8dz, Model: OutDegreeSourceSorter_structured_tail
Naive run ids: ['ivk0ydr6', '9uqudp73', 'ulqvoni1', 'kyuvtba8', 'irpza8dz', '3kiybyyz', '9653b013', 'hnuaaieq', 'yn3onvws', 'cf0g5o3z', '4p6z3ofx', 'n5r24sba']
