In [1]:
%load_ext autoreload
%autoreload 2

import os
import json
import pandas as pd
import numpy as np
import sys
from pathlib import Path

root_folder = Path('/home/bcjexu/maxcut-80/bespoke-gnn4do/')
sys.path.insert(0, str(root_folder))

from utils.tabulate import load_datasets, load_train_outputs, load_baseline_outputs

In [2]:
# load datasets and calculate validation slices

import torch

datasets = load_datasets()
indices = {}
for name, dataset in datasets.items():
    torch.manual_seed(0)
    print(f"{name} dataset size: {len(dataset)}")
    train_size = int(0.8 * len(dataset))
    val_size = (len(dataset) - train_size)//2
    test_size = len(dataset) - train_size - val_size
    _, _, test_dataset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])
    indices[name] = test_dataset.indices

dataset_names = list(datasets.keys())
dataset_names.append('RANDOM')
dataset_names.append('RANDOM 500')
dataset_names.append('ForcedRB')

loading PROTEINS
loading ENZYMES
loading COLLAB
loading IMDB-BINARY
loading MUTAG
PROTEINS dataset size: 1113
ENZYMES dataset size: 600
COLLAB dataset size: 5000
IMDB-BINARY dataset size: 1000
MUTAG dataset size: 188


In [3]:
# loading max cut results from files

print("loading model losses")
maxcut_models = load_train_outputs(root_folder / 'training_runs', '230823_test')
print("loading sdp lift losses")
maxcut_sdp_lift = load_baseline_outputs(root_folder / 'baseline_runs', '230824_sdp', 'sdp', indices)
print("loading sdp proj losses")
maxcut_sdp_proj = load_baseline_outputs(root_folder / 'baseline_runs', '230824_sdp', 'sdp|random_hyperplane', indices)
print("loading gurobi 1s")
maxcut_grb_1s = load_baseline_outputs(root_folder / 'baseline_runs', '230823_gurobi_1s', 'gurobi', indices)
print("loading gurobi 5s")
maxcut_grb_5s = load_baseline_outputs(root_folder / 'baseline_runs', '230823_gurobi_5s', 'gurobi', indices)
print("loading gurobi 20s")
maxcut_grb_20s = load_baseline_outputs(root_folder / 'baseline_runs', '230916_gurobi_max_cut_20s', 'gurobi', indices)

loading model losses
loading sdp lift losses
load_baseline_outputs: IMDB-BINARY length: 100
load_baseline_outputs: IMDB-BINARY sdp: 97.83850940704346
load_baseline_outputs: RANDOM length: 1000
load_baseline_outputs: RANDOM sdp: 896.5637869873046
load_baseline_outputs: COLLAB length: 500
load_baseline_outputs: COLLAB sdp: 2627.7631793823243
load_baseline_outputs: PROTEINS length: 112
load_baseline_outputs: PROTEINS sdp: 103.8618523819106
load_baseline_outputs: MUTAG length: 19
load_baseline_outputs: MUTAG sdp: 28.101939753482217
load_baseline_outputs: ENZYMES length: 60
load_baseline_outputs: ENZYMES sdp: 82.51778809229533
loading sdp proj losses
load_baseline_outputs: IMDB-BINARY length: 100
load_baseline_outputs: IMDB-BINARY sdp|random_hyperplane: 97.495
load_baseline_outputs: RANDOM length: 1000
load_baseline_outputs: RANDOM sdp|random_hyperplane: 872.0625
load_baseline_outputs: COLLAB length: 500
load_baseline_outputs: COLLAB sdp|random_hyperplane: 2624.261
load_baseline_outputs: PR

In [4]:
# putting max cut results in a table
maxcut_scores = pd.DataFrame(columns=dataset_names)

for (model, dataset), (train_losses, valid_scores) in maxcut_models.items():
    maxcut_scores.at[model, dataset] = np.max(valid_scores)
for (model, dataset), (train_losses, valid_scores) in maxcut_models.items():
    print(f"{model} {dataset} loss: {train_losses[-1]}")

for dataset, score in maxcut_sdp_lift.items():
    maxcut_scores.at['SDP lift', dataset] = score

for dataset, score in maxcut_sdp_proj.items():
    maxcut_scores.at['SDP proj', dataset] = score

for dataset, score in maxcut_grb_1s.items():
    maxcut_scores.at['gurobi 1s', dataset] = score

for dataset, score in maxcut_grb_5s.items():
    maxcut_scores.at['gurobi 5s', dataset] = score

for dataset, score in maxcut_grb_20s.items():
    maxcut_scores.at['gurobi 20s', dataset] = score

for dataset in dataset_names:
    edges = 0
    count = 0
    if dataset not in datasets:
        continue
    for example in datasets[dataset]:
        edges += example.edge_index.shape[1]
        count += 1
    maxcut_scores.at['edge count', dataset] = float(edges) / count

#maxcut_scores.style.apply(lambda col: ['font-weight:bold' if x==col.max() else '' for x in col])

In [5]:
# loading vertex cover results from files

print("loading model losses")
vc_models = load_train_outputs(root_folder / 'training_runs', '230913_VC') # 230901_VC for without PE
print("loading more model losses")
vc_models_2 = load_train_outputs(root_folder / 'training_runs', '230914_VC')
vc_models.update(vc_models_2)
print("loading model losses, forcedRB")
vc_models_forcedrb = load_train_outputs(root_folder / 'training_runs', '230910_VC_forcedrb')

print("loading sdp lift losses")
vc_sdp_lift = load_baseline_outputs(root_folder / 'baseline_runs', '230902_VC_sdp', 'sdp', indices)
print("loading sdp proj losses")
vc_sdp_proj = load_baseline_outputs(root_folder / 'baseline_runs', '230902_VC_sdp', 'sdp|random_hyperplane', indices)
print("loading gurobi 1s")
vc_grb_1s = load_baseline_outputs(root_folder / 'baseline_runs', '230902_gurobi_1s', 'gurobi', indices)
print("loading gurobi 5s")
vc_grb_5s = load_baseline_outputs(root_folder / 'baseline_runs', '230902_gurobi_5s', 'gurobi', indices)
print("loading gurobi 20s")
vc_grb_20s = load_baseline_outputs(root_folder / 'baseline_runs', '230916_gurobi_vertex_cover_20s', 'gurobi', indices)

print("loading gurobi RANDOM 500")
vc_grb_bigrandom_1 = load_baseline_outputs(root_folder / 'baseline_runs', '230909_gurobi_1s', 'gurobi', indices)
vc_grb_bigrandom_5 = load_baseline_outputs(root_folder / 'baseline_runs', '230909_gurobi_5s', 'gurobi', indices)
print("loading sdp RANDOM 500")
vc_sdp_bigrandom_lift = load_baseline_outputs(root_folder / 'baseline_runs', '230910_VC_sdp_500_RANDOM', 'sdp', indices)
vc_sdp_bigrandom_proj = load_baseline_outputs(root_folder / 'baseline_runs', '230910_VC_sdp_500_RANDOM', 'sdp|random_hyperplane', indices)

print("loading gurobi ForcedRB")
vc_grb_forcedrb_1 = load_baseline_outputs(root_folder / 'baseline_runs', '230913_forcedrb_gurobi_1s', 'gurobi', indices)
vc_grb_forcedrb_5 = load_baseline_outputs(root_folder / 'baseline_runs', '230913_forcedrb_gurobi_5s', 'gurobi', indices)
print("loading SDP ForcedRB")
vc_sdp_forcedrb_lift = load_baseline_outputs(root_folder / 'baseline_runs', '230914_VC_sdp_ForcedRB', 'sdp', indices)
vc_sdp_forcedrb_proj = load_baseline_outputs(root_folder / 'baseline_runs', '230914_VC_sdp_ForcedRB', 'sdp|random_hyperplane', indices)

loading model losses
loading more model losses
loading model losses, forcedRB
loading sdp lift losses
load_baseline_outputs: ENZYMES length: 60
load_baseline_outputs: ENZYMES sdp: -19.91958220601082
load_baseline_outputs: RANDOM length: 1000
load_baseline_outputs: RANDOM sdp: -73.35152729797363
load_baseline_outputs: MUTAG length: 19
load_baseline_outputs: MUTAG sdp: -7.7372643822117855
load_baseline_outputs: COLLAB length: 104
load_baseline_outputs: COLLAB sdp: -56.45677478496845
load_baseline_outputs: IMDB-BINARY length: 100
load_baseline_outputs: IMDB-BINARY sdp: -16.756892976760863
load_baseline_outputs: PROTEINS length: 112
load_baseline_outputs: PROTEINS sdp: -24.925131029316358
loading sdp proj losses
load_baseline_outputs: ENZYMES length: 60
load_baseline_outputs: ENZYMES sdp|random_hyperplane: -20.0
load_baseline_outputs: RANDOM length: 1000
load_baseline_outputs: RANDOM sdp|random_hyperplane: -81.3
load_baseline_outputs: MUTAG length: 19
load_baseline_outputs: MUTAG sdp|rando

In [6]:
# putting vertex cover results in a table
vc_scores = pd.DataFrame(columns=dataset_names)

for (model, dataset), (train_losses, valid_scores) in vc_models.items():
    vc_scores.at[model, dataset] = np.max(valid_scores)

for (model, dataset), (train_losses, valid_scores) in vc_models_forcedrb.items():
    print(dataset)
    vc_scores.at[model, dataset] = np.max(valid_scores)

for dataset, score in vc_sdp_lift.items():
    vc_scores.at['SDP lift', dataset] = score

for dataset, score in vc_sdp_proj.items():
    vc_scores.at['SDP proj', dataset] = score

for dataset, score in vc_grb_1s.items():
    vc_scores.at['gurobi 1s', dataset] = score

for dataset, score in vc_grb_5s.items():
    vc_scores.at['gurobi 5s', dataset] = score

for dataset, score in vc_grb_20s.items():
    vc_scores.at['gurobi 20s', dataset] = score

for dataset, score in vc_grb_bigrandom_1.items():
    vc_scores.at['gurobi 1s', 'RANDOM 500'] = score
for dataset, score in vc_grb_bigrandom_5.items():
    vc_scores.at['gurobi 5s', 'RANDOM 500'] = score

for dataset, score in vc_sdp_bigrandom_lift.items():
    vc_scores.at['SDP lift', 'RANDOM 500'] = score
for dataset, score in vc_sdp_bigrandom_proj.items():
    vc_scores.at['SDP proj', 'RANDOM 500'] = score

for dataset, score in vc_grb_forcedrb_1.items():
    vc_scores.at['gurobi 1s', 'ForcedRB'] = score
for dataset, score in vc_grb_forcedrb_5.items():
    vc_scores.at['gurobi 5s', 'ForcedRB'] = score

for dataset, score in vc_sdp_forcedrb_lift.items():
    vc_scores.at['SDP lift', 'ForcedRB'] = score
for dataset, score in vc_sdp_forcedrb_proj.items():
    vc_scores.at['SDP proj', 'ForcedRB'] = score

for dataset in dataset_names:
    nodes = 0
    count = 0
    if dataset not in datasets:
        continue
    for example in datasets[dataset]:
        nodes += example.num_nodes
        count += 1
    vc_scores.at['vertex count', dataset] = -float(nodes) / count

#vc_scores.style.apply(lambda col: ['font-weight:bold' if x==col.max() else '' for x in col])

In [7]:
# loading max clique results from files

print("loading model losses")
clique_models = load_train_outputs(root_folder / 'training_runs', '230920_clique') # 230901_VC for without PE

print("loading gurobi 1s")
clique_grb_1s = load_baseline_outputs(root_folder / 'baseline_runs', '230920_gurobi_max_clique_1s', 'gurobi', indices)
print("loading gurobi 5s")
clique_grb_5s = load_baseline_outputs(root_folder / 'baseline_runs', '230920_gurobi_max_clique_5s', 'gurobi', indices)
print("loading gurobi 20s")
clique_grb_20s = load_baseline_outputs(root_folder / 'baseline_runs', '230920_gurobi_max_clique_20s', 'gurobi', indices)

loading model losses
loading gurobi 1s
load_baseline_outputs: COLLAB length: 500
load_baseline_outputs: COLLAB gurobi: 41.368
load_baseline_outputs: RANDOM length: 1000
load_baseline_outputs: RANDOM gurobi: 4.304
load_baseline_outputs: IMDB-BINARY length: 100
load_baseline_outputs: IMDB-BINARY gurobi: 10.01
load_baseline_outputs: MUTAG length: 19
load_baseline_outputs: MUTAG gurobi: 2.0
load_baseline_outputs: ENZYMES length: 60
load_baseline_outputs: ENZYMES gurobi: 3.716666666666667
load_baseline_outputs: PROTEINS length: 112
load_baseline_outputs: PROTEINS gurobi: 3.8035714285714284
load_baseline_outputs: ForcedRB length: 1000
load_baseline_outputs: ForcedRB gurobi: 19.429
loading gurobi 5s
load_baseline_outputs: IMDB-BINARY length: 100
load_baseline_outputs: IMDB-BINARY gurobi: 10.01
load_baseline_outputs: MUTAG length: 19
load_baseline_outputs: MUTAG gurobi: 2.0
load_baseline_outputs: PROTEINS length: 112
load_baseline_outputs: PROTEINS gurobi: 3.8125
load_baseline_outputs: ForcedR

In [8]:
# putting clique results in a table
clique_scores = pd.DataFrame(columns=dataset_names)

for (model, dataset), (train_losses, valid_scores) in clique_models.items():
    clique_scores.at[model, dataset] = np.max(valid_scores)

for dataset, score in clique_grb_1s.items():
    clique_scores.at['gurobi 1s', dataset] = score

for dataset, score in clique_grb_5s.items():
    clique_scores.at['gurobi 5s', dataset] = score

for dataset, score in clique_grb_20s.items():
    clique_scores.at['gurobi 20s', dataset] = score

for dataset in dataset_names:
    edges = 0
    count = 0
    if dataset not in datasets:
        continue
    for example in datasets[dataset]:
        edges += example.edge_index.shape[1]
        count += 1

In [9]:
maxcut_scores

Unnamed: 0,PROTEINS,ENZYMES,COLLAB,IMDB-BINARY,MUTAG,RANDOM,RANDOM 500,ForcedRB
SDP lift,103.861852,82.517788,2627.763179,97.838509,28.10194,896.563787,,
SDP proj,102.299107,81.45,2624.261,97.495,27.947368,872.0625,,
gurobi 1s,102.361607,81.45,2624.557,97.495,27.947368,874.1265,,
gurobi 5s,102.361607,81.45,2624.599,97.495,27.947368,874.2815,,
gurobi 20s,102.361607,81.45,2624.651,97.495,27.947368,874.3905,,6341.078
edge count,145.631626,124.273333,4914.4316,193.062,39.585106,,,


In [10]:
vc_scores.to_csv('/home/bcjexu/maxcut-80/bespoke-gnn4do/analysis_ipynb/vc_baseline_scores.csv')
vc_scores

Unnamed: 0,PROTEINS,ENZYMES,COLLAB,IMDB-BINARY,MUTAG,RANDOM,RANDOM 500,ForcedRB
SDP lift,-24.925131,-19.919582,-56.456775,-16.756893,-7.737264,-73.351527,-434.955242,-196.419424
SDP proj,-24.973214,-20.0,-56.673077,-16.78,-7.736842,-81.3,-488.485,-197.206
gurobi 1s,-24.964286,-20.0,-67.462,-16.76,-7.736842,-75.926,-472.6,-197.027
gurobi 5s,-24.964286,-20.0,-67.462,-16.76,-7.736842,-75.926,-467.42,-196.481
gurobi 20s,-24.964286,-20.0,-67.462,-16.76,-7.736842,-75.926,,-196.455
vertex count,-39.057502,-32.633333,-74.4948,-19.773,-17.930851,,,


In [11]:
clique_scores

Unnamed: 0,PROTEINS,ENZYMES,COLLAB,IMDB-BINARY,MUTAG,RANDOM,RANDOM 500,ForcedRB
gurobi 1s,3.803571,3.716667,41.368,10.01,2.0,4.304,,19.429
gurobi 5s,3.8125,3.716667,41.834,10.01,2.0,4.304,,22.455
gurobi 20s,3.8125,3.716667,41.834,10.01,2.0,4.304,,22.49


In [12]:
# get maxcut, vc scores normalized by gurobi 5s count
maxcut_norms = maxcut_scores.copy()
for dataset in dataset_names:
    maxcut_norms[dataset] = maxcut_scores[dataset] / maxcut_scores.at['gurobi 20s', dataset]
vc_norms = vc_scores.copy()
for dataset in dataset_names:
    vc_norms[dataset] = vc_scores[dataset] / vc_scores.at['gurobi 20s', dataset]
clique_norms = clique_scores.copy()
for dataset in dataset_names:
    clique_norms[dataset] = clique_scores[dataset] / clique_scores.at['gurobi 20s', dataset]

In [13]:
maxcut_norms

Unnamed: 0,PROTEINS,ENZYMES,COLLAB,IMDB-BINARY,MUTAG,RANDOM,RANDOM 500,ForcedRB
SDP lift,1.014656,1.01311,1.001186,1.003523,1.005531,1.025359,,
SDP proj,0.999389,1.0,0.999851,1.0,1.0,0.997338,,
gurobi 1s,1.0,1.0,0.999964,1.0,1.0,0.999698,,
gurobi 5s,1.0,1.0,0.99998,1.0,1.0,0.999875,,
gurobi 20s,1.0,1.0,1.0,1.0,1.0,1.0,,1.0
edge count,1.422717,1.525762,1.872413,1.980225,1.416416,,,


In [14]:
vc_norms

Unnamed: 0,PROTEINS,ENZYMES,COLLAB,IMDB-BINARY,MUTAG,RANDOM,RANDOM 500,ForcedRB
SDP lift,0.998432,0.995979,0.836868,0.999815,1.000055,0.966092,,0.999819
SDP proj,1.000358,1.0,0.840074,1.001193,1.0,1.070779,,1.003823
gurobi 1s,1.0,1.0,1.0,1.0,1.0,1.0,,1.002912
gurobi 5s,1.0,1.0,1.0,1.0,1.0,1.0,,1.000132
gurobi 20s,1.0,1.0,1.0,1.0,1.0,1.0,,1.0
vertex count,1.564535,1.631667,1.104248,1.179773,2.317593,,,


In [15]:
clique_norms

Unnamed: 0,PROTEINS,ENZYMES,COLLAB,IMDB-BINARY,MUTAG,RANDOM,RANDOM 500,ForcedRB
gurobi 1s,0.997658,1.0,0.988861,1.0,1.0,1.0,,0.863895
gurobi 5s,1.0,1.0,1.0,1.0,1.0,1.0,,0.998444
gurobi 20s,1.0,1.0,1.0,1.0,1.0,1.0,,1.0
