In [None]:
%cd ./../

In [2]:
import torch
import os
import pickle
import random 
import numpy as np
from subgraph.utils import cudavar
from GMN.configure import get_default_config
from subgraph.earlystopping import EarlyStoppingModule
from sklearn.metrics import average_precision_score, ndcg_score

In [3]:
class Namespace:
    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)

av = Namespace(   want_cuda                    = True,
                  has_cuda                   = torch.cuda.is_available(),
                  use_pairnorm               = False,
                  is_sig                     = False,
                  n_layers                   = 3,
                  conv_type                  = 'SAGE',
                  method_type                = 'order',
                  skip                       = 'learnable',
                  MIN_QUERY_SUBGRAPH_SIZE    = 5,
                  MAX_QUERY_SUBGRAPH_SIZE    = 10,
                  MIN_CORPUS_SUBGRAPH_SIZE   = 11,
                  MAX_CORPUS_SUBGRAPH_SIZE   = 15,
                  DIR_PATH                   =".",
                  DATASET_NAME               = "ptc_fr",
                  RUN_TILL_ES                = True,
                  ES                         = 50,
                  transform_dim              = 16,
                  GMN_NPROPLAYERS            = 5,
                  FEAT_TYPE                  = "One",
                  filters_1                  = 10,
                  filters_2                  = 10,
                  filters_3                  = 10,
                  neuromatch_hidden_dim      = 10,
                  post_mp_dim                = 64,
                  bottle_neck_neurons        = 10,
                  tensor_neurons             = 10,               
                  dropout                    = 0,
                  bins                       = 16,
                  histogram                  = False,
                  WEIGHT_DECAY               =5*10**-4,
                  BATCH_SIZE                 =128,
                  LEARNING_RATE              =0.001,
                  CONV                       = "GCN",
                  MARGIN                     = 0.1,
                  NOISE_FACTOR               = 0,
                  NUM_RUNS                   = 2,
                  TASK                       = "",
                  test_size                  = 300,
              )

In [4]:
def load_config(): 
  config = get_default_config()

  config['encoder'] ['node_hidden_sizes'] = [10]
  config['encoder'] ['node_feature_dim'] = 1
  config['encoder'] ['edge_feature_dim'] = 1
    
  config['aggregator'] ['node_hidden_sizes'] = [10]
  config['aggregator'] ['graph_transform_sizes'] = [10]
  config['aggregator'] ['input_size'] = [10]

  config['graph_matching_net'] ['node_state_dim'] = 10
  config['graph_matching_net'] ['n_prop_layers'] = av.GMN_NPROPLAYERS
  config['graph_matching_net'] ['edge_hidden_sizes'] = [20]
  config['graph_matching_net'] ['node_hidden_sizes'] = [10]
    
  config['graph_embedding_net'] ['node_state_dim'] = 10
  config['graph_embedding_net'] ['n_prop_layers'] = av.GMN_NPROPLAYERS
  config['graph_embedding_net'] ['edge_hidden_sizes'] = [20]
  config['graph_embedding_net'] ['node_hidden_sizes'] = [10]
  
  config['graphsim']= {}
  config['graphsim']['conv_kernel_size'] = [10,4,2]
  config['graphsim']['linear_size'] = [24, 16]
  config['graphsim']['gcn_size'] = [10,10,10]
  config['graphsim']['conv_pool_size'] = [3,3,2]
  config['graphsim']['conv_out_channels'] = [2,4,8]
  config['graphsim']['dropout'] = av.dropout

  config['training']['batch_size']  = av.BATCH_SIZE
  config['training']['margin']  = av.MARGIN
  config['evaluation']['batch_size']  = av.BATCH_SIZE
  config['model_type']  = "embedding"

  seed = config['seed']
  random.seed(seed)
  np.random.seed(seed + 1)
  torch.manual_seed(seed + 2)
  torch.backends.cudnn.deterministic = False
    
  return config

In [5]:
import os
import subgraph.iso_matching_models as im

es = EarlyStoppingModule(av,50)
device = "cuda" if av.has_cuda and av.want_cuda else "cpu"

Using backend: pytorch


In [6]:
from grakel.kernels import ShortestPath, RandomWalk
import grakel
def compute_SPKernel(gr):
    grakel_gr = list(grakel.graph_from_networkx(gr))
    gkall = ShortestPath(normalize=True,with_labels=False)
    Kall = gkall.fit_transform(grakel_gr)
    return Kall
def compute_RWKernel(gr):
    grakel_gr = list(grakel.graph_from_networkx(gr))
    gkall = RandomWalk(normalize=True,kernel_type='exponential')
    Kall = gkall.fit_transform(grakel_gr)
    return Kall


def print_metric_table(all_results, id2):
    id1 = 1 #val=0, test=1
    print("\\begin{table}[hbt!]")
    print("\\centering")
    print("\\begin{tabular}{l|c|c|c|c|c|c} ")
    print("\\hline ")

    print(" & PTC\_FR & PTC\_FM & PTC\_MM & PTC\_MR  & MUTAG & AIDS \\\\\hline")

    for s in ["SPKernel", "RWKernel", "GraphSim", "GOTSim", "SimGNN", "GMN-embed", "GMN-match", \
                 "NeuroMatch", "ISONET", "Node-align(Node loss)", "Node-align(Edge loss)", \
                 "GMN-embed-Asym", "GMN-match-Asym", "ISONET-Sym"]:
        res = all_results[s]
        for dataset in ['ptc_fr', 'ptc_fm', 'ptc_mm', 'ptc_mr', 'mutag', 'aids']:
            s = s + " & " + "{:.2f} $\pm$ {:.2f}".format(res[dataset][id1][id2], res[dataset][id1][id2+1]/np.sqrt(av.test_size))
        print(s,"\\\\")

    print("\\end{tabular} ")
    print("\\end{table}")

def evaluate_embeddings_similarity_map_mrr_mndcg(av,model,sampler):
  model.eval()
  d_pos = sampler.list_pos
  d_neg = sampler.list_neg

  d = d_pos + d_neg
  npos = len(d_pos)
  nneg = len(d_neg)

  pred = []

  n_batches = sampler.create_batches(d)
  for i in range(n_batches):
    #ignoring target values here since not needed for AP ranking score 
    batch_data,batch_data_sizes,_,batch_adj = sampler.fetch_batched_data_by_id(i)
    pred.append( model(batch_data,batch_data_sizes,batch_adj).data)

  all_pred = torch.cat(pred,dim=0) 
  labels = cudavar(av,torch.cat((torch.ones(npos),torch.zeros(nneg))))
  ap_score   = average_precision_score(labels.cpu(), all_pred.cpu())
  so = np.argsort(all_pred.cpu()).tolist()[::-1]
  labels_rearranged = labels.cpu()[so]
  rr = 1/(labels_rearranged.tolist().index(1)+1)
  ndcg = ndcg_score([labels.cpu().tolist()],[all_pred.cpu().tolist()])

  q_graphs = list(range(len(sampler.query_graphs)))   
    
  all_ap, all_rr, all_ndcg = [], [], []

  for q_id in q_graphs:
    dpos = list(filter(lambda x:x[0][0]==q_id,d_pos))
    dneg = list(filter(lambda x:x[0][0]==q_id,d_neg))
    npos = len(dpos)
    nneg = len(dneg)
    d = dpos+dneg
    if npos>0 and nneg>0:    
      #Damn
      n_batches = sampler.create_batches(d) 
      pred = []  
      for i in range(n_batches):
        #ignoring known ged values here since not needed for AP ranking score 
        batch_data,batch_data_sizes,_,batch_adj = sampler.fetch_batched_data_by_id(i)
        pred.append( model(batch_data,batch_data_sizes,batch_adj).data)
      all_pred = torch.cat(pred,dim=0) 
      labels = cudavar(av,torch.cat((torch.ones(npos),torch.zeros(nneg))))
      ap   = average_precision_score(labels.cpu(), all_pred.cpu()) 
      all_ap.append(ap)
      so = np.argsort(all_pred.cpu()).tolist()[::-1]
      labels_rearranged = labels.cpu()[so]
      all_rr.append(1/(labels_rearranged.tolist().index(1)+1))
      all_ndcg.append(ndcg_score([labels.cpu().tolist()],[all_pred.cpu().tolist()]))
  return ap_score, np.mean(all_ap), np.std(all_ap), rr, np.mean(all_rr), np.std(all_rr), ndcg, np.mean(all_ndcg), np.std(all_ndcg), all_ap, all_rr


def evaluate_embeddings_similarity_kernels(av,sim_matrix,sampler):
  #model.eval()
    d_pos = sampler.list_pos
    d_neg = sampler.list_neg
    
    d = d_pos + d_neg
    npos = len(d_pos)
    nneg = len(d_neg)

    pred = []
    
    for i in range(len(d_pos)) :
        q = d_pos[i][0][0]
        c = d_pos[i][0][1]
        pred.append(sim_matrix[q, c+len(sampler.query_graphs)])
    for i in range(len(d_neg)) :
        q = d_neg[i][0][0]
        c = d_neg[i][0][1]
        pred.append(sim_matrix[q, c+len(sampler.query_graphs)]) 

    all_pred = np.array(pred) 
    labels = np.concatenate((np.ones(npos),np.zeros(nneg)))
    ap_score   = average_precision_score(labels, all_pred)
    so = np.argsort(all_pred).tolist()[::-1]
    labels_rearranged = labels[so]
    rr = 1/(labels_rearranged.tolist().index(1)+1)
    ndcg = ndcg_score([labels],[all_pred])

    q_graphs = list(range(len(sampler.query_graphs)))    
    
    all_ap = []
    all_ndcg = []
    all_rr = []

    for q_id in q_graphs:
        dpos = list(filter(lambda x:x[0][0]==q_id,d_pos))
        dneg = list(filter(lambda x:x[0][0]==q_id,d_neg))
        npos = len(dpos)
        nneg = len(dneg)
        d = dpos+dneg
        if npos>0 and nneg>0:     
            pred = []  
            for i in range(len(dpos)) :
                q = dpos[i][0][0]
                c = dpos[i][0][1]
                pred.append(sim_matrix[q, c+len(sampler.query_graphs)])
            for i in range(len(dneg)) :
                q = dneg[i][0][0]
                c = dneg[i][0][1]
                pred.append(sim_matrix[q, c+len(sampler.query_graphs)]) 

            all_pred = np.array(pred) 
            labels = np.concatenate((np.ones(npos),np.zeros(nneg)))
            all_ap.append(average_precision_score(labels, all_pred))
            so = np.argsort(all_pred).tolist()[::-1]
            labels_rearranged = labels[so]
            all_rr.append(1/(labels_rearranged.tolist().index(1)+1))
            all_ndcg.append(ndcg_score([labels],[all_pred]))

    return ap_score, np.mean(all_ap), np.std(all_ap), rr, np.mean(all_rr), np.std(all_rr), ndcg, np.mean(all_ndcg), np.std(all_ndcg), all_ap, all_rr

In [7]:
def fetch_gmn_data():
    data_mode = "test" if av.test_size==25 else "Extra_test_300"
    test_data = im.OurMatchingModelSubgraphIsoData(av,mode=data_mode)
    val_data = im.OurMatchingModelSubgraphIsoData(av,mode="val")
    test_data.data_type = "pyg"
    val_data.data_type = "pyg"
    return val_data, test_data

def get_result(task,dataset):
    #config = load_config()
    av.TASK = task
    av.DATASET_NAME = dataset 

    if av.FEAT_TYPE == "Adjrow" or  av.FEAT_TYPE == "Adjrow1" or av.FEAT_TYPE == "AdjOnehot": 
      av.TASK = av.TASK + "_" + av.FEAT_TYPE

    val_data, test_data = fetch_gmn_data()
    if av.TASK.startswith("matching_iso_var_18_gmn_sinkhorn_param_big_hinge_score_on_embeds"):
      config = load_config()
      model = im.Node_align_Node_loss(av,config,1).to(device)
      test_data.data_type = "gmn"
      val_data.data_type = "gmn"
    elif av.TASK.startswith("matching_iso_var_19_gmn_all"):
      config = load_config()
      model = im.GMN_embed(av,config,1).to(device)
      test_data.data_type = "gmn"
      val_data.data_type = "gmn"
    elif av.TASK.startswith("matching_iso_var_27_gmn_edge_perm_sinkhorn_param_big_hinge_score_on_edges"):
      #One more hack. 
      av.MAX_EDGES = max(max([g.number_of_edges() for g in test_data.query_graphs]),\
                   max([g.number_of_edges() for g in test_data.corpus_graphs]))
      config = load_config()
      model = im.ISONET(av,config,1).to(device)
      test_data.data_type = "gmn"
      val_data.data_type = "gmn"
    elif av.TASK.startswith("matching_iso_var_29_gmn_sinkhorn_param_big_hinge_score_on_edge_similarity_ff_adj_mask"):
      config = load_config()
      model = im.Node_align_Edge_loss(av,config,1).to(device)
      test_data.data_type = "gmn"
      val_data.data_type = "gmn"
    elif av.TASK.startswith("matching_iso_var_34_gmn_embed_hinge"):
      config = load_config()
      model = im.GMN_embed_hinge(av,config,1).to(device)
      test_data.data_type = "gmn"
      val_data.data_type = "gmn"
    elif av.TASK.startswith("matching_iso_var_35_gmn_match_hinge"):
      config = load_config()
      model = im.GMN_match_hinge(av,config,1).to(device)
      test_data.data_type = "gmn"
      val_data.data_type = "gmn"
    elif av.TASK.startswith("matching_iso_var_36_gmn_edge_perm_sinkhorn_param_big_sqeuc_score_on_edges"):
      av.MAX_EDGES = max(max([g.number_of_edges() for g in test_data.query_graphs]),\
                     max([g.number_of_edges() for g in test_data.corpus_graphs]))
      config = load_config()
      model = im.ISONET_Sym(av,config,1).to(device)
      test_data.data_type = "gmn"
      val_data.data_type = "gmn"
    elif av.TASK.startswith("simgnn_noperm") :
      config = load_config()
      model = im.SimGNN(av,1).to(device)
      test_data.data_type = "pyg"
      val_data.data_type = "pyg"    
    elif av.TASK.startswith("gmn_match"):
      config = load_config()
      model = im.GMN_match(av,config,1).to(device)
      test_data.data_type = "gmn"
      val_data.data_type = "gmn"
    elif av.TASK.startswith("matching_iso_graphsim"):
      config = load_config()
      model = im.GraphSim(av,config,1).to(device)
      test_data.data_type = "pyg"
      val_data.data_type = "pyg"
    elif av.TASK.startswith("matching_iso_neuromatch"):
      config = load_config()
      model = im.NeuroMatch(1,av.neuromatch_hidden_dim,av).to(device)
      test_data.data_type = "pyg"
      val_data.data_type = "pyg"
    elif av.TASK.startswith("ir_modified_gotsim"):
      config = load_config()
      model = im.GOTSim(av,config,1).to(device)
      test_data.data_type = "pyg"
      val_data.data_type = "pyg"
    else:
      print("ALERT!! CHECK FOR ERROR")  
    model.eval()
    checkpoint = es.load_best_model()
    model.load_state_dict(checkpoint['model_state_dict'])
    val_result = evaluate_embeddings_similarity_map_mrr_mndcg(av,model,val_data)
    test_result = evaluate_embeddings_similarity_map_mrr_mndcg(av,model,test_data)

    return val_result, test_result

In [8]:
task_dict = {} 

task_dict['Node-align(Node loss)'] = {}
task_dict['Node-align(Node loss)']["ptc_fm"] = "matching_iso_var_18_gmn_sinkhorn_param_big_hinge_score_on_embeds_run7_corrected_noise0_margin2E-1"
task_dict['Node-align(Node loss)']["ptc_fr"] = "matching_iso_var_18_gmn_sinkhorn_param_big_hinge_score_on_embeds_run7_corrected_noise0_margin2E-1"
task_dict['Node-align(Node loss)']["ptc_mr"] = "matching_iso_var_18_gmn_sinkhorn_param_big_hinge_score_on_embeds_run1_corrected_noise0_margin2E-1"
task_dict['Node-align(Node loss)']["ptc_mm"] = "matching_iso_var_18_gmn_sinkhorn_param_big_hinge_score_on_embeds_run1_corrected_noise0_margin2E-1"
task_dict['Node-align(Node loss)']["mutag"]  = "matching_iso_var_18_gmn_sinkhorn_param_big_hinge_score_on_embeds_run1_corrected_noise0_margin2E-1"
task_dict['Node-align(Node loss)']["aids"]   = "matching_iso_var_18_gmn_sinkhorn_param_big_hinge_score_on_embeds_run1_corrected_noise0_margin2E-1"

task_dict['GMN-embed'] = {}
task_dict['GMN-embed']["ptc_fm"] = "matching_iso_var_19_gmn_all_run3_margin2E-1_corrected"
task_dict['GMN-embed']["ptc_fr"] = "matching_iso_var_19_gmn_all_run3_margin2E-1_corrected"
task_dict['GMN-embed']["ptc_mr"] = "matching_iso_var_19_gmn_all_run1_margin2E-1_corrected"
task_dict['GMN-embed']["ptc_mm"] = "matching_iso_var_19_gmn_all_run1_margin2E-1_corrected"
task_dict['GMN-embed']["mutag"]  = "matching_iso_var_19_gmn_all_run1_margin2E-1_corrected"
task_dict['GMN-embed']["aids"]   = "matching_iso_var_19_gmn_all_run1_margin2E-1_corrected"


task_dict['ISONET'] = {}
task_dict['ISONET']["ptc_fm"] = "matching_iso_var_27_gmn_edge_perm_sinkhorn_param_big_hinge_score_on_edges_run1_corrected_noise0_margin2E-1"
task_dict['ISONET']["ptc_fr"] = "matching_iso_var_27_gmn_edge_perm_sinkhorn_param_big_hinge_score_on_edges_run1_corrected_noise0_margin2E-1"
task_dict['ISONET']["ptc_mr"] = "matching_iso_var_27_gmn_edge_perm_sinkhorn_param_big_hinge_score_on_edges_run1_corrected_noise0_margin2E-1"
task_dict['ISONET']["ptc_mm"] = "matching_iso_var_27_gmn_edge_perm_sinkhorn_param_big_hinge_score_on_edges_run1_corrected_noise0_margin2E-1"
task_dict['ISONET']["mutag"]  = "matching_iso_var_27_gmn_edge_perm_sinkhorn_param_big_hinge_score_on_edges_run1_corrected_noise0_margin2E-1"
task_dict['ISONET']["aids"]   = "matching_iso_var_27_gmn_edge_perm_sinkhorn_param_big_hinge_score_on_edges_run1_corrected_noise0_margin2E-1"

task_dict['Node-align(Edge loss)'] = {}
task_dict['Node-align(Edge loss)']["ptc_fm"] = "matching_iso_var_29_gmn_sinkhorn_param_big_hinge_score_on_edge_similarity_ff_adj_mask_run1_corrected_noise0_margin2E-1"
task_dict['Node-align(Edge loss)']["ptc_fr"] = "matching_iso_var_29_gmn_sinkhorn_param_big_hinge_score_on_edge_similarity_ff_adj_mask_run1_corrected_noise0_margin2E-1"
task_dict['Node-align(Edge loss)']["ptc_mr"] = "matching_iso_var_29_gmn_sinkhorn_param_big_hinge_score_on_edge_similarity_ff_adj_mask_run1_corrected_noise0_margin2E-1"
task_dict['Node-align(Edge loss)']["ptc_mm"] = "matching_iso_var_29_gmn_sinkhorn_param_big_hinge_score_on_edge_similarity_ff_adj_mask_run1_corrected_noise0_margin2E-1"
task_dict['Node-align(Edge loss)']["mutag"]  = "matching_iso_var_29_gmn_sinkhorn_param_big_hinge_score_on_edge_similarity_ff_adj_mask_run1_corrected_noise0_margin2E-1"
task_dict['Node-align(Edge loss)']["aids"]   = "matching_iso_var_29_gmn_sinkhorn_param_big_hinge_score_on_edge_similarity_ff_adj_mask_run1_corrected_noise0_margin2E-1"

task_dict['GraphSim'] = {}
task_dict['GraphSim']["ptc_fm"] = "matching_iso_graphsim_score_logits_pair_loss_NoInterpolation_NoBfs_One_dropout0_margin5E-1_run1_corrected_noise0_margin5E-1"
task_dict['GraphSim']["ptc_fr"] = "matching_iso_graphsim_score_logits_pair_loss_NoInterpolation_NoBfs_One_dropout0_margin5E-1_run1_corrected_noise0_margin5E-1"
task_dict['GraphSim']["ptc_mr"] = "matching_iso_graphsim_score_logits_pair_loss_NoInterpolation_NoBfs_One_dropout0_margin5E-1_run1_corrected_noise0_margin5E-1"
task_dict['GraphSim']["ptc_mm"] = "matching_iso_graphsim_score_logits_pair_loss_NoInterpolation_NoBfs_One_dropout0_margin5E-1_run1_corrected_noise0_margin5E-1"
task_dict['GraphSim']["mutag"]  = "matching_iso_graphsim_score_logits_pair_loss_NoInterpolation_NoBfs_One_dropout0_margin5E-1_run1_corrected_noise0_margin5E-1"
task_dict['GraphSim']["aids"]   = "matching_iso_graphsim_score_logits_pair_loss_NoInterpolation_NoBfs_One_dropout0_margin5E-1_run1_corrected_noise0_margin5E-1"

    
task_dict['GOTSim'] = {}
for dataset in ['ptc_fr','ptc_fm','ptc_mr','ptc_mm','mutag','aids' ]:
  task_dict['GOTSim'][dataset] = "ir_modified_gotsim_run1"


task_dict['GMN-embed-Asym'] = {}
task_dict['GMN-match-Asym'] = {}
task_dict['ISONET-Sym'] = {}
for dataset in ['ptc_fr','ptc_fm','ptc_mr','ptc_mm','mutag','aids' ]:
  task_dict['GMN-embed-Asym'][dataset] = "matching_iso_var_34_gmn_embed_hinge_run1_margin2E-1"
  if dataset == "ptc_fm":
    task_dict['GMN-match-Asym'][dataset] = "matching_iso_var_35_gmn_match_hinge_run2_margin2E-1"
  else:
    task_dict['GMN-match-Asym'][dataset] = "matching_iso_var_35_gmn_match_hinge_run1_margin2E-1"
  task_dict['ISONET-Sym'][dataset] = "matching_iso_var_36_gmn_edge_perm_sinkhorn_param_big_sqeuc_score_on_edges_run1_corrected_noise0_margin2E-1"


task_dict['NeuroMatch'] = {}
task_dict['NeuroMatch']["ptc_fm"] = "matching_iso_neuromatch_hid_dim10_post_mp64_dropout0_margin5E-1_run1_corrected_noise0_margin5E-1"
task_dict['NeuroMatch']["ptc_fr"] = "matching_iso_neuromatch_hid_dim10_post_mp64_dropout0_margin5E-1_run1_corrected_noise0_margin5E-1"
task_dict['NeuroMatch']["ptc_mr"] = "matching_iso_neuromatch_hid_dim10_post_mp64_dropout0_margin5E-1_run1_corrected_noise0_margin5E-1"
task_dict['NeuroMatch']["ptc_mm"] = "matching_iso_neuromatch_hid_dim10_post_mp64_dropout0_margin5E-1_run1_corrected_noise0_margin5E-1"
task_dict['NeuroMatch']["mutag"]  = "matching_iso_neuromatch_hid_dim10_post_mp64_dropout0_margin5E-1_run1_corrected_noise0_margin5E-1"
task_dict['NeuroMatch']["aids"]   = "matching_iso_neuromatch_hid_dim10_post_mp64_dropout0_margin5E-1_run1_corrected_noise0_margin5E-1"


task_dict['SimGNN'] = {}
task_dict['SimGNN']["ptc_fm"] = "simgnn_noperm_run4_margin2E-1"
task_dict['SimGNN']["ptc_fr"] = "simgnn_noperm_run3_margin10E-1"
task_dict['SimGNN']["ptc_mr"] = "simgnn_noperm_run4_margin2E-1" 
task_dict['SimGNN']["ptc_mm"] = "simgnn_noperm_run4_margin2E-1" 
task_dict['SimGNN']["mutag"]  = "simgnn_noperm_run3_margin10E-1" 
task_dict['SimGNN']["aids"]   = "simgnn_noperm_run4_margin2E-1" 

task_dict['GMN-match'] = {}
task_dict['GMN-match']["ptc_fm"] = "gmn_match_run1_margin2E-1"
task_dict['GMN-match']["ptc_fr"] = "gmn_match_run1_margin2E-1"
task_dict['GMN-match']["ptc_mr"] = "gmn_match_run1_margin2E-1" 
task_dict['GMN-match']["ptc_mm"] = "gmn_match_run1_margin2E-1" 
task_dict['GMN-match']["mutag"]  = "gmn_match_run2_margin2E-1" 
task_dict['GMN-match']["aids"]   = "gmn_match_run1_margin2E-1"


# Results table for 25 query graphs

# Takes 30+ min without Final_results_for_25_query_graphs file

In [2]:
fp_25 = av.DIR_PATH +"/Datasets/" +"Final_results_for_25_query_graphs"+".pkl"
av.test_size=25

if os.path.isfile(fp_25):
    with open(fp_25, 'rb') as f:
        all_results = pickle.load(f)
else:
    all_results = {}
    for task in ["SPKernel", "RWKernel", "GraphSim", "GOTSim", "SimGNN", "GMN-embed", "GMN-match", \
                 "NeuroMatch", "ISONET", "Node-align(Node loss)", "Node-align(Edge loss)", \
                 "GMN-embed-Asym", "GMN-match-Asym", "ISONET-Sym"]:
    
        all_results[task] = {}
        if task in ["GraphSim", "NeuroMatch"]:
            av.MARGIN = 0.5
        else:
            av.MARGIN = 0.1
    
        if task == "SPKernel":

            for dataset in ['ptc_fr', 'ptc_fm', 'ptc_mr', 'ptc_mm', 'aids', 'mutag']:  
    
                av.TASK = task
                av.DATASET_NAME = dataset 
            
                data_mode = "test" if av.test_size==25 else "Extra_test_300"
                sampler = im.OurMatchingModelSubgraphIsoData(av,mode=data_mode)
                sampler.data_type = "gmn"
            
                fp = av.DIR_PATH+"/Datasets/" +"SPKernel_iso_"+str(av.test_size)+"_"+ av.DATASET_NAME
                if os.path.isfile(fp):
                    with open(fp, 'rb') as f:
                        SPK = pickle.load(f)
                else:
                    SPK = compute_SPKernel(sampler.query_graphs + sampler.corpus_graphs )
                    with open(fp, 'wb') as f:
                        pickle.dump(SPK, f)
    
            
                all_results[task][dataset] = (_, evaluate_embeddings_similarity_kernels(av, SPK, sampler))
            
        elif task == "RWKernel":
            for dataset in ['ptc_fr', 'ptc_fm', 'ptc_mr', 'ptc_mm', 'aids', 'mutag']:  
    
                av.TASK = task
                av.DATASET_NAME = dataset 
            
                data_mode = "test" if av.test_size==25 else "Extra_test_300"
                sampler = im.OurMatchingModelSubgraphIsoData(av,mode=data_mode)
                sampler.data_type = "gmn"
            
                fp = av.DIR_PATH+"/Datasets/" +"RWKernel_iso_"+str(av.test_size)+"_"+ av.DATASET_NAME
                if os.path.isfile(fp):
                    with open(fp, 'rb') as f:
                        RWK = pickle.load(f)
                else:
                    RWK = compute_RWKernel(sampler.query_graphs + sampler.corpus_graphs )
                    with open(fp, 'wb') as f:
                        pickle.dump(RWK, f)
    
            
                all_results[task][dataset] = (_, evaluate_embeddings_similarity_kernels(av, RWK, sampler))    
    
        else:
            for dataset in ["ptc_fm","ptc_fr","ptc_mm","ptc_mr","mutag","aids"]:
                all_results[task][dataset] =  get_result(task_dict[task][dataset],dataset)
                
    with open(fp_25, 'wb') as f:
        pickle.dump(all_results, f)

# This reproduces Table2, 3 and 4 in main paper and Table10, 11 in Appendix (run above cell before running this cell)

In [12]:
id2 = 1 #metric_id_map = {'MAP':1, 'MRR':4, 'MNDCG':7}
print_metric_table(all_results, id2)
print_metric_table(all_results, 4)

\begin{table}[hbt!]
\centering
\begin{tabular}{l|c|c|c|c|c|c} 
\hline 
 & PTC\_FR & PTC\_FM & PTC\_MM & PTC\_MR  & MUTAG & AIDS \\\hline
SPKernel & 0.34 $\pm$ 0.02 & 0.37 $\pm$ 0.02 & 0.34 $\pm$ 0.02 & 0.32 $\pm$ 0.01 & 0.43 $\pm$ 0.04 & 0.36 $\pm$ 0.02 \\
RWKernel & 0.19 $\pm$ 0.01 & 0.21 $\pm$ 0.02 & 0.22 $\pm$ 0.02 & 0.19 $\pm$ 0.02 & 0.18 $\pm$ 0.03 & 0.16 $\pm$ 0.01 \\
GraphSim & 0.40 $\pm$ 0.02 & 0.36 $\pm$ 0.02 & 0.41 $\pm$ 0.03 & 0.44 $\pm$ 0.04 & 0.32 $\pm$ 0.02 & 0.32 $\pm$ 0.04 \\
GOTSim & 0.42 $\pm$ 0.03 & 0.55 $\pm$ 0.04 & 0.45 $\pm$ 0.04 & 0.53 $\pm$ 0.04 & 0.63 $\pm$ 0.04 & 0.50 $\pm$ 0.04 \\
SimGNN & 0.35 $\pm$ 0.02 & 0.54 $\pm$ 0.04 & 0.39 $\pm$ 0.02 & 0.42 $\pm$ 0.03 & 0.55 $\pm$ 0.03 & 0.32 $\pm$ 0.04 \\
GMN-embed & 0.74 $\pm$ 0.04 & 0.75 $\pm$ 0.05 & 0.76 $\pm$ 0.04 & 0.77 $\pm$ 0.04 & 0.89 $\pm$ 0.05 & 0.79 $\pm$ 0.04 \\
GMN-match & 0.77 $\pm$ 0.04 & 0.78 $\pm$ 0.04 & 0.84 $\pm$ 0.04 & 0.80 $\pm$ 0.04 & 0.91 $\pm$ 0.04 & 0.83 $\pm$ 0.04 \\
NeuroMatch & 0.67 $\pm$ 0

# Results table for 300 query graphs

# Takes 2hr+ without Final_results_for_300_query_graphs file

In [12]:
fp_300 = av.DIR_PATH +"/Datasets/" +"Final_results_for_300_query_graphs"+".pkl"
av.test_size=300

if os.path.isfile(fp_300):
    with open(fp_300, 'rb') as f:
        all_results = pickle.load(f)
else:
    all_results = {}
    for task in ["SPKernel", "RWKernel", "GraphSim", "GOTSim", "SimGNN", "GMN-embed", "GMN-match", \
                 "NeuroMatch", "ISONET", "Node-align(Node loss)", "Node-align(Edge loss)", \
                 "GMN-embed-Asym", "GMN-match-Asym", "ISONET-Sym"]:
    
        all_results[task] = {}
        if task in ["GraphSim", "NeuroMatch"]:
            av.MARGIN = 0.5
        else:
            av.MARGIN = 0.1
    
        if task == "SPKernel":

            for dataset in ['ptc_fr', 'ptc_fm', 'ptc_mr', 'ptc_mm', 'aids', 'mutag']:  
    
                av.TASK = task
                av.DATASET_NAME = dataset 
            
                data_mode = "test" if av.test_size==25 else "Extra_test_300"
                sampler = im.OurMatchingModelSubgraphIsoData(av,mode=data_mode)
                sampler.data_type = "gmn"
            
                fp = av.DIR_PATH+"/Datasets/" +"SPKernel_iso_"+str(av.test_size)+"_"+ av.DATASET_NAME
                if os.path.isfile(fp):
                    with open(fp, 'rb') as f:
                        SPK = pickle.load(f)
                else:
                    SPK = compute_SPKernel(sampler.query_graphs + sampler.corpus_graphs )
                    with open(fp, 'wb') as f:
                        pickle.dump(SPK, f)
    
            
                all_results[task][dataset] = (_, evaluate_embeddings_similarity_kernels(av, SPK, sampler))
            
        elif task == "RWKernel":
            for dataset in ['ptc_fr', 'ptc_fm', 'ptc_mr', 'ptc_mm', 'aids', 'mutag']:  
    
                av.TASK = task
                av.DATASET_NAME = dataset 
            
                data_mode = "test" if av.test_size==25 else "Extra_test_300"
                sampler = im.OurMatchingModelSubgraphIsoData(av,mode=data_mode)
                sampler.data_type = "gmn"
            
                fp = av.DIR_PATH+"/Datasets/" +"RWKernel_iso_"+str(av.test_size)+"_"+ av.DATASET_NAME
                if os.path.isfile(fp):
                    with open(fp, 'rb') as f:
                        RWK = pickle.load(f)
                else:
                    RWK = compute_RWKernel(sampler.query_graphs + sampler.corpus_graphs )
                    with open(fp, 'wb') as f:
                        pickle.dump(RWK, f)
    
            
                all_results[task][dataset] = (_, evaluate_embeddings_similarity_kernels(av, RWK, sampler))    
    
        else:
            for dataset in ["ptc_fm","ptc_fr","ptc_mm","ptc_mr","mutag","aids"]:
                all_results[task][dataset] =  get_result(task_dict[task][dataset],dataset)
                
    with open(fp_300, 'wb') as f:
        pickle.dump(all_results, f)

# This reproduces Table14 in Appendix (run above cell before running this cell)

In [13]:
id2 = 1 #metric_id_map = {'MAP':1, 'MRR':4, 'MNDCG':7}
print_metric_table(all_results, id2)
print_metric_table(all_results, 4)

\begin{table}[hbt!]
\centering
\begin{tabular}{l|c|c|c|c|c|c} 
\hline 
 & PTC\_FR & PTC\_FM & PTC\_MM & PTC\_MR  & MUTAG & AIDS \\\hline
SPKernel & 0.35 $\pm$ 0.01 & 0.38 $\pm$ 0.01 & 0.38 $\pm$ 0.01 & 0.37 $\pm$ 0.01 & 0.42 $\pm$ 0.01 & 0.36 $\pm$ 0.01 \\
RWKernel & 0.20 $\pm$ 0.00 & 0.21 $\pm$ 0.00 & 0.21 $\pm$ 0.00 & 0.21 $\pm$ 0.01 & 0.21 $\pm$ 0.01 & 0.19 $\pm$ 0.00 \\
GraphSim & 0.39 $\pm$ 0.01 & 0.39 $\pm$ 0.01 & 0.41 $\pm$ 0.01 & 0.45 $\pm$ 0.01 & 0.38 $\pm$ 0.01 & 0.37 $\pm$ 0.01 \\
GOTSim & 0.49 $\pm$ 0.01 & 0.60 $\pm$ 0.01 & 0.47 $\pm$ 0.01 & 0.57 $\pm$ 0.01 & 0.59 $\pm$ 0.01 & 0.55 $\pm$ 0.01 \\
SimGNN & 0.40 $\pm$ 0.01 & 0.58 $\pm$ 0.01 & 0.40 $\pm$ 0.01 & 0.45 $\pm$ 0.01 & 0.50 $\pm$ 0.01 & 0.37 $\pm$ 0.01 \\
GMN-embed & 0.82 $\pm$ 0.01 & 0.81 $\pm$ 0.01 & 0.79 $\pm$ 0.01 & 0.83 $\pm$ 0.01 & 0.87 $\pm$ 0.01 & 0.79 $\pm$ 0.01 \\
GMN-match & 0.84 $\pm$ 0.01 & 0.83 $\pm$ 0.01 & 0.86 $\pm$ 0.01 & 0.86 $\pm$ 0.01 & 0.91 $\pm$ 0.01 & 0.82 $\pm$ 0.01 \\
NeuroMatch & 0.74 $\pm$ 0