In [1]:
%load_ext autoreload
%autoreload 2

In [44]:
from typing import List, Dict, Tuple

import pandas as pd
import wandb
import numpy as np

from shared.constants import WANDB_PROJECT, WANDB_ENTITY, BENCHMARKS_RESULTS, REPORTS_PATH

In [22]:
baseline_results_dir = REPORTS_PATH.joinpath('results')
baseline_dataset_results_dir = baseline_results_dir.joinpath('dataset')
baseline_dataset_results_dir.mkdir(exist_ok=True, parents=True)

# W&B Run loading and cleaning

In [3]:
def load_runs():
    if baseline_results_dir.joinpath('runs.csv').exists():
        return pd.read_csv(str(baseline_results_dir.joinpath('runs.csv')))

    api = wandb.Api()
    runs = api.runs(f'{WANDB_ENTITY}/{WANDB_PROJECT}')

    summary_list, config_list, name_list = [], [], []
    for run in runs:
        # .summary contains the output keys/values for metrics like accuracy.
        #  We call ._json_dict to omit large files
        summary_list.append(run.summary._json_dict)

        # .config contains the hyperparameters.
        #  We remove special values that start with _.
        config_list.append(
            {k: v for k, v in run.config.items()
             if not k.startswith('_')})

        # .name is the human-readable name of the run.
        name_list.append(run.name)

    runs_df = pd.DataFrame({
        "summary": summary_list,
        "config": config_list,
        "name": name_list
    })

    runs_df.to_csv(str(baseline_results_dir.joinpath('runs.csv')), index=False)

    return runs_df

In [4]:
runs_df = load_runs()
runs_df.head()

Unnamed: 0,summary,config,name
0,"{'_runtime': 11, 'conductance': 0.873266899737...","{'beta': 0.713924306041298, 'alpha': 0.6991942...",2022-02-02_10-31-13-ESPRA-star-wars:split_7
1,"{'_runtime': 11, 'conductance': 0.904640563463...","{'beta': 0.7465033494641004, 'alpha': 0.779883...",2022-02-02_10-30-26-ESPRA-star-wars:split_7
2,{'snapshots/link_modularity_table': {'artifact...,"{'beta': 0.5768659119056833, 'alpha': 0.895906...",2022-02-02_10-29-39-ESPRA-star-wars:split_7
3,"{'internal_edge_density': 0.3005464829954626, ...","{'beta': 0.6621678846739937, 'alpha': 0.696420...",2022-02-02_10-28-50-ESPRA-star-wars:split_7
4,"{'modularity': 0.03598548327922235, 'snapshots...","{'beta': 0.5153812368860328, 'alpha': 0.626899...",2022-02-02_10-27-58-ESPRA-star-wars:split_7


## Clean runs and group them per dataset version

In [5]:
runs_df['summary'] = runs_df['summary'].apply(eval)
runs_df['config'] = runs_df['config'].apply(eval)
runs_df = pd.concat([
    runs_df,
    pd.json_normalize(runs_df['summary'], max_level=0).add_prefix('summary.'),
    pd.json_normalize(runs_df['config'], max_level=0).add_prefix('config.')
], axis=1)
runs_df.drop(['summary', 'config'], axis=1, inplace=True)

runs_df = runs_df[runs_df['summary.error'].isna()]
runs_df = runs_df[~runs_df['summary._runtime'].isna()]
runs_dfs = dict(tuple(runs_df.groupby(['config.dataset', 'config.version'])))
for dataset, df in runs_dfs.items():
    df.dropna(axis=1, how='all', inplace=True)

In [6]:
next(iter(runs_dfs.values())).head(5)

Unnamed: 0,name,summary._runtime,summary.conductance,summary.snapshots/z_modularity_table,summary.snapshots/community_count_table,summary._step,summary._wandb,summary.z_modularity,summary.community_count,summary.link_modularity,...,config.baseline,config.death,config.max_step,config.min_length,config.persist_threshold,config.matching_threshold,config.level,config.epsilon,config.weighted,config.reuse_partition
104,2022-02-01_18-55-26-GreeneDCD-louvain-DBLP-HCN...,1921.0,0.181434,"{'size': 184, '_type': 'table-file', 'ncols': ...","{'nrows': 5, 'sha256': '570ade0c7300404aa3f3fd...",0.0,{'runtime': 1926},2.817597,54.6,0.128548,...,GreeneDCD-louvain,3.0,-1.0,2.0,1.0,0.200061,-1.0,0.001,False,False
105,2022-02-01_18-44-51-GreeneDCD-louvain-DBLP-HCN...,602.0,0.179942,"{'size': 185, '_type': 'table-file', 'ncols': ...","{'size': 109, '_type': 'table-file', 'ncols': ...",0.0,{'runtime': 608},2.865793,51.2,0.128153,...,GreeneDCD-louvain,4.0,-1.0,2.0,1.0,0.205309,-1.0,0.001,False,False
108,2022-02-01_18-33-45-GreeneDCD-louvain-DBLP-HCN...,628.0,0.17948,{'path': 'media/table/snapshots/z_modularity_t...,"{'ncols': 2, 'nrows': 5, 'sha256': '8d264556b1...",0.0,{'runtime': 631},2.847568,50.2,0.128279,...,GreeneDCD-louvain,3.0,-1.0,2.0,1.0,0.097753,-1.0,0.001,False,False
109,2022-02-01_18-23-14-GreeneDCD-louvain-DBLP-HCN...,597.0,0.180375,{'artifact_path': 'wandb-client-artifact://xlk...,{'sha256': '54c3c4933d7119fccdea160140782154bd...,0.0,{'runtime': 602},2.853356,51.4,0.128228,...,GreeneDCD-louvain,4.0,-1.0,2.0,1.0,0.168891,-1.0,0.001,False,False
111,2022-02-01_18-05-40-GreeneDCD-louvain-DBLP-HCN...,1017.0,0.179118,{'artifact_path': 'wandb-client-artifact://xwe...,{'_latest_artifact_path': 'wandb-client-artifa...,0.0,{'runtime': 1020},2.838976,49.6,0.128354,...,GreeneDCD-louvain,3.0,-1.0,2.0,1.0,0.186049,-1.0,0.001,False,False


In [36]:
from benchmarks.evaluation import ALL_METRICS


def maximizing_metric(df: pd.DataFrame):
    for metric_cls in ALL_METRICS:
        if f'summary.{metric_cls.metric_name()}' in df.columns:
            return metric_cls.metric_name(), metric_cls.metric_order() == 'maximize'

def aggregate_metrics_by_baseline(df: pd.DataFrame):
    metric, desc = maximizing_metric(df)
    metrics = [metric_cls for metric_cls in ALL_METRICS if f'summary.{metric_cls.metric_name()}' in df.columns]

    print(f'Aggregating baselines by {metric}')
    if df[f'summary.{metric}'].isna().any():
        print('Found NaN values in metric')
        df[f'summary.{metric}'] = df[f'summary.{metric}'].fillna(np.NINF if desc else np.PINF)

    grouped = df\
        .sort_values(f'summary.{metric}', ascending=not desc)\
        .groupby(['config.baseline'])
    statistics = grouped.agg(
        run_count=('config.version', 'count'),
    )

    best_df = grouped.head(1).set_index('config.baseline')
    best_df.drop(columns=best_df.columns.difference([
        *[f'summary.{metric_cls.metric_name()}' for metric_cls in metrics],
        'name', 'summary._runtime'
    ]), inplace=True)
    for metric_cls in metrics:
        desc = metric_cls.metric_order() == 'maximize'
        df[f'summary.{metric_cls.metric_name()}'] = df[f'summary.{metric_cls.metric_name()}'].fillna(np.NINF if desc else np.PINF)
        best_df[f'{metric_cls.metric_name()}_rank'] = best_df[f'summary.{metric_cls.metric_name()}'].rank(ascending=not desc)

    best_df.rename(columns={
        'summary._runtime': 'runtime'
    }, inplace=True)
    best_df.columns = map(lambda x: x.replace('summary.', ''), best_df.columns)
    best_df = best_df[sorted(map(str, best_df.columns))]
    best_df['total'] = sum(best_df[col] == 1 for col in best_df.columns if col.endswith('rank'))

    df = best_df.join(statistics, how='inner')
    df.index.name = 'baseline'
    return df


In [38]:
dataset_results_dfs = {}

for (dataset, version), df in runs_dfs.items():
    print(f'Processing {dataset} {version}')
    df = aggregate_metrics_by_baseline(df)
    df.to_csv(str(baseline_dataset_results_dir.joinpath(f'{dataset}_{version}.csv')), index=True)
    dataset_results_dfs[(dataset, version)] = df

Processing DBLP-HCN split_5
Aggregating baselines by nf1
Processing DBLP-HCN static
Aggregating baselines by nf1
Processing DBLP-V1 split_5_small
Aggregating baselines by modularity
Processing email-Eu-core static
Aggregating baselines by nmi
Processing enron-mail-20150507 split_10
Aggregating baselines by modularity
Processing enron-mail-20150507 split_10_small
Aggregating baselines by modularity
Processing enron-mail-20150507 split_20
Aggregating baselines by modularity
Processing enron-mail-20150507 split_5
Aggregating baselines by modularity
Processing house-of-representatives-congress-116 split_10
Aggregating baselines by modularity
Processing house-of-representatives-congress-116 split_10_small
Aggregating baselines by modularity
Processing house-of-representatives-congress-116 split_5
Aggregating baselines by modularity
Processing house-of-representatives-congress-116 static_small
Aggregating baselines by modularity
Processing imdb-5000-movie-dataset split_5
Aggregating baseline

In [69]:
static_dataset_results_dfs = {
    (dataset, version): df
    for (dataset, version), df in dataset_results_dfs.items()
    if version == 'static' or version == 'static_small'
}

dynamic_dataset_results_dfs = {
    (dataset, version): df
    for (dataset, version), df in dataset_results_dfs.items()
    if version != 'static' and version != 'static_small'
}

In [70]:
from collections import defaultdict


def aggregate_metrics_by_type(dfs: Dict[Tuple[str, str], pd.DataFrame]):
    baselines = defaultdict(dict)
    for (dataset, version), df in dfs.items():
        for baseline, rank in df['total'].rank(ascending=False).iteritems():
            baselines[baseline][f'{dataset}:{version}'] = rank

    result_df = pd.DataFrame(baselines).T
    result_df.index.name = 'baseline'
    result_df = result_df[~result_df.isna().all(axis=1)]
    result_df['total'] = (result_df == 1).sum(axis=1)

    return result_df

In [71]:
static_results_df = aggregate_metrics_by_type(static_dataset_results_dfs)
static_results_df.to_csv(str(baseline_results_dir.joinpath('static_results.csv')), index=True)
static_results_df

Unnamed: 0_level_0,DBLP-HCN:static,email-Eu-core:static,house-of-representatives-congress-116:static_small,imdb-5000-movie-dataset:static,misc-football:static,social-distancing-student:static,star-wars:static,ucidata-zachary:static,total
baseline,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
ComE,1.0,4.0,2.0,2.0,,,,,1
Louvain,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7
GEMSEC,,3.0,4.0,,3.0,,3.0,2.5,0
ANGEL,,2.0,3.0,,2.0,,2.0,2.5,0


In [72]:
dynamic_results_df = aggregate_metrics_by_type(dynamic_dataset_results_dfs)
dynamic_results_df.to_csv(str(baseline_results_dir.joinpath('dynamic_results.csv')), index=True)
dynamic_results_df

Unnamed: 0_level_0,DBLP-HCN:split_5,enron-mail-20150507:split_10,enron-mail-20150507:split_10_small,enron-mail-20150507:split_20,enron-mail-20150507:split_5,house-of-representatives-congress-116:split_10,house-of-representatives-congress-116:split_10_small,house-of-representatives-congress-116:split_5,imdb-5000-movie-dataset:split_5,social-distancing-student:split_5,star-wars:split_5,star-wars:split_7,sx-mathoverflow:split_10,sx-mathoverflow:split_6,sx-superuser:split_10,sx-superuser:split_6,DBLP-V1:split_5_small,total
baseline,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
GreeneDCD-louvain,1.0,2.0,3.0,2.5,1.0,3.5,2.0,5.0,1.0,1.0,3.5,5.5,1.0,1.5,1.0,1.0,,7
Louvain-Dynamic,2.5,1.0,2.0,1.0,2.0,1.0,1.0,1.0,3.0,3.0,3.5,2.0,2.5,1.5,3.0,4.5,1.0,6
Dynamo,2.5,3.0,1.0,2.5,3.0,3.5,5.0,,2.0,2.0,6.0,5.5,,,,2.5,2.0,1
GreeneDCD-MOSES,,,,,,,5.0,2.0,,,3.5,3.5,4.0,4.0,2.0,4.5,3.0,0
ESPRA,,,,,,3.5,5.0,3.5,,,3.5,3.5,,,,,,0
ARCHANGEL,,,,,,3.5,3.0,3.5,,,1.0,1.0,2.5,3.0,,2.5,,2
