In [None]:
import numpy as np
import os
import pandas as pd
import sklearn
import sklearn.linear_model
from tqdm import tqdm
from sklearn.decomposition import PCA
from collections import defaultdict

In [None]:
num_runs = 5

In [None]:
for dataset in ['Texas', 'Cornell', 'omnipath', 'SIGNOR', 'iPTMnet']:
    config = np.load(f'results/DS-AE/{dataset}/best_val.npz', allow_pickle=True)['config'][()]
    for i in range(num_runs):
        print(f"python test.py --model DS-AE --lr {config['lr']} --bias {config['bias']} --dropout {config['dropout']} --num-layers {config['num_layers']} --act {config['act']} --weight-decay {config['weight_decay']} --save-as best_test_{i} --q {config['q']} --dataset {dataset} --J {config['J']} --seed {1234+i}")
        
    config = np.load(f'results/DS-PM/{dataset}/best_val.npz', allow_pickle=True)['config'][()]
    for i in range(num_runs):
        print (f"python test.py --model DS-PM --lr {config['lr']} --c {config['c']} --act linear --weight-decay {config['weight_decay']} --save-as best_test_{i} --q {config['q']} --dataset {dataset} --J {config['J']} --seed {1234+i}")
        
    config = np.load(f'results/Node2Vec/{dataset}/best_val.npz', allow_pickle=True)['config'][()]
    for i in range(num_runs):
        print (f"python test.py --model Node2Vec --walk-length {config['walk_length']} --num-walks {config['num_walks']} --save-as best_test_{i} --seed {1234+i} --dataset {dataset}")
        
    config = np.load(f'results/Shallow/{dataset}/best_val_undirected.npz', allow_pickle=True)['config'][()]
    for i in range(num_runs):
        print (f"python hyperbolic_methods/test.py --task lp --model Shallow --manifold PoincareBall --lr {config['lr']} --weight-decay {config['weight_decay']} --num-layers 0 --use-feats 0 --dropout {config['dropout']} --act {config['act']} --bias {config['bias']} --optimizer RiemannianAdam --symmetrize 1 --save 1 --save-as best_test_{i}_undirected --cuda -1 --dataset {dataset} --seed {1234+i}")
        
    config = np.load(f'results/Shallow/{dataset}/best_val_directed.npz', allow_pickle=True)['config'][()]
    for i in range(num_runs):
        print (f"python hyperbolic_methods/test.py --task lp --model Shallow --manifold PoincareBall --lr {config['lr']} --weight-decay {config['weight_decay']} --num-layers 0 --use-feats 0 --dropout {config['dropout']} --act {config['act']} --bias {config['bias']} --optimizer RiemannianAdam --symmetrize 0 --save 1 --save-as best_test_{i}_directed --cuda -1 --dataset {dataset} --seed {1234+i}")
        
    config = np.load(f'results/GAE/{dataset}/best_val_undirected.npz', allow_pickle=True)['config'][()]
    for i in range(num_runs):
        print (f"python test.py --model GAE --lr {config['lr']} --bias {config['bias']} --dropout {config['dropout']} --num-layers {config['num_layers']} --act {config['act']} --weight-decay {config['weight_decay']} --symmetrize-adj 1 --save-as best_test_{i}_undirected --device cuda:0 --dataset {dataset} --seed {1234+i}")
    
    config = np.load(f'results/GAE/{dataset}/best_val_directed.npz', allow_pickle=True)['config'][()]
    for i in range(num_runs):
        print (f"python test.py --model GAE --lr {config['lr']} --bias {config['bias']} --dropout {config['dropout']} --num-layers {config['num_layers']} --act {config['act']} --weight-decay {config['weight_decay']} --symmetrize-adj 0 --save-as best_test_{i}_directed --device cuda:0 --dataset {dataset} --seed {1234+i}")
        
    config = np.load(f'results/HGCN/{dataset}/best_val_undirected.npz', allow_pickle=True)['config'][()]
    for i in range(num_runs):
        print (f"python hyperbolic_methods/test.py --task lp --model HGCN --manifold PoincareBall --lr {config['lr']} --weight-decay {config['weight_decay']} --num-layers {config['num_layers']} --dropout {config['dropout']} --act {config['act']} --bias {config['bias']} --optimizer Adam --c {config['c']} --symmetrize 1 --save 1 --save-as best_test_{i}_undirected --cuda 1 --dataset {dataset} --seed {1234+i}")
        
    config = np.load(f'results/HGCN/{dataset}/best_val_directed.npz', allow_pickle=True)['config'][()]
    for i in range(num_runs):
        print (f"python hyperbolic_methods/test.py --task lp --model HGCN --manifold PoincareBall --lr {config['lr']} --weight-decay {config['weight_decay']} --num-layers {config['num_layers']} --dropout {config['dropout']} --act {config['act']} --bias {config['bias']} --optimizer Adam --c {config['c']} --symmetrize 0 --save 1 --save-as best_test_{i}_directed --cuda -1 --dataset {dataset} --seed {1234+i}")
    
    config = np.load(f'results/MagNet/{dataset}/best_val.npz', allow_pickle=True)['config'][()]   
    for i in range(num_runs):
        print (f"python test.py --model MagNet --lr {config['lr']} --bias {config['bias']} --dropout {config['dropout']} --num-layers {config['num_layers']} --act {config['act']} --q {config['q']} --weight-decay {config['weight_decay']} --task existence --save-as best_test_{i} --num-classes 2 --dataset {dataset} --seed {1234+i}")
        
    if dataset == 'omnipath':
        
        config = np.load(f'results/TransE/{dataset}/best_val_edgeatt.npz', allow_pickle=True)['config'][()]
        for i in range(num_runs):
            print (f"python test.py --model TransE --lr {config['lr']} --temperature {config['temperature']} --margin {config['margin']} --weight-decay {config['weight_decay']} --edge_attribute 1 --save-as best_test_edgeatt_{i} --dataset {dataset} --seed {1234+i}")
     
        config = np.load(f'results/TransE/{dataset}/best_val_noedgeatt.npz', allow_pickle=True)['config'][()]
        for i in range(num_runs):
            print (f"python test.py --model TransE --lr {config['lr']} --temperature {config['temperature']} --margin {config['margin']} --weight-decay {config['weight_decay']} --edge_attribute 0 --save-as best_test_noedgeatt_{i} --dataset {dataset} --seed {1234+i}")
        
    else:
        config = np.load(f'results/TransE/{dataset}/best_val.npz', allow_pickle=True)['config'][()]
        for i in range(num_runs):
            print (f"python test.py --model TransE --lr {config['lr']} --temperature {config['temperature']} --margin {config['margin']} --weight-decay {config['weight_decay']} --edge_attribute 0 --save-as best_test_{i} --dataset {dataset} --seed {1234+i}")
        
    config = np.load(f'results/UDS-AE/{dataset}/best_val.npz', allow_pickle=True)['config'][()] 
    for i in range(num_runs):
        print (f"python test.py --model UDS-AE --lr {config['lr']} --bias {config['bias']} --dropout {config['dropout']} --num-layers {config['num_layers']} --act {config['act']} --weight-decay {config['weight_decay']} --save-as best_test_{i} --dataset {dataset} --seed {1234+i}")

In [None]:
def directed_classifier(embedding, train_pairs, test_pairs):
    
    embedding.index = embedding.index.astype(str)
    train_pairs = train_pairs.astype(str)
    test_pairs = test_pairs.astype(str)
    
    X_train_pos = np.hstack((embedding.loc[train_pairs[:, 0]], embedding.loc[train_pairs[:, 1]]))
    X_train_neg = np.hstack((embedding.loc[train_neg_pairs[:, 0]], embedding.loc[train_neg_pairs[:, 1]]))
    X_train = np.vstack((X_train_pos, X_train_neg))
    y_train = [1]*(X_train_pos.shape[0]) + [0]*(X_train_neg.shape[0])

    X_test_pos = np.hstack((embedding.loc[test_pairs[:, 0]], embedding.loc[test_pairs[:, 1]]))
    X_test_neg = np.hstack((embedding.loc[test_neg_pairs[:, 0]], embedding.loc[test_neg_pairs[:, 1]]))
    X_test = np.vstack((X_test_pos, X_test_neg))
    y_test = [1]*(X_test_pos.shape[0]) + [0]*(X_test_neg.shape[0])

    clf = sklearn.linear_model.RidgeClassifier(random_state=0)
    clf.fit(X_train, y_train, )
    y_score_test = clf.decision_function(X_test)

    return ((sklearn.metrics.roc_auc_score(y_test, y_score_test), 
            sklearn.metrics.average_precision_score(y_test, y_score_test)))

In [None]:
results = []
for dataset in ['Texas', 'Cornell', 'omnipath', 'SIGNOR', 'iPTMnet']:
    splits = np.load(f'data/{dataset}_curated_interactions.npz')
    train, val, test = splits['train'], splits['val'], splits['test']
    all_data = np.vstack((train, val, test))
    
    train = np.vstack((train, val))
    
    train_neg_pairs = []
    for pair in tqdm(train):
        if len(set(np.where(all_data[:, 0] == pair[1])[0]).intersection(np.where(all_data[:, 1] == pair[0])[0])) == 0:
            train_neg_pairs.append([pair[1], pair[0]])

    train_neg_pairs = np.array(train_neg_pairs).reshape(-1,2)

    test_neg_pairs = []
    for pair in tqdm(test):
        if len(set(np.where(all_data[:, 0] == pair[1])[0]).intersection(np.where(all_data[:, 1] == pair[0])[0])) == 0:
            test_neg_pairs.append([pair[1], pair[0]])

    test_neg_pairs = np.array(test_neg_pairs).reshape(-1,2)
    
    train_neg_pairs = train_neg_pairs.astype(str)
    test_neg_pairs = test_neg_pairs.astype(str)

    for method in ['DS-AE', 'DS-PM', 'Node2Vec', 'Shallow', 'GAE', 'HGCN', 'MagNet', 'TransE', 'UDS-AE']:
        files = [x for x in os.listdir(f'results/{method}/{dataset}/') if (x.endswith('npz')) & ('test' in x) & ('ablated' not in x) & ('v2' in x)]
        for file in files:
            try:
                res = np.load(f'results/{method}/{dataset}/{file}', allow_pickle=True)
            except:
                continue
            config = res['config']
            res = pd.DataFrame(data=res['embedding'], index=res['names'])
            try:
                auroc = directed_classifier(res, train, test)[0]
            except ValueError:
                print (method, file)
                continue
                
            results.append([dataset, method, 'noedgeatt' in file, 'undirected' in file, auroc])

results = pd.DataFrame(results)

In [None]:
results.columns = ['Dataset', 'Method', 'NoEdge', 'Undirected', 'AUROC']

In [None]:
auroc_per_dataset = defaultdict(list)
for dataset in ['Texas', 'Cornell']:
    
    res = results[results['Dataset'] == dataset]
    auroc_per_dataset['DSAE-Euc'].append(res[res['Method'] == 'DS-AE']['AUROC'].mean())
    auroc_per_dataset['DSAE-Hyp'].append(res[res['Method'] == 'DS-PM']['AUROC'].mean())
    auroc_per_dataset['node2vec'].append(res[res['Method'] == 'Node2Vec']['AUROC'].mean())
    auroc_per_dataset['PM'].append(res[(res['Method'] == 'Shallow') & (res['Undirected'] == True)]['AUROC'].mean())
    auroc_per_dataset['PM-D'].append(res[(res['Method'] == 'Shallow') & (res['Undirected'] == False)]['AUROC'].mean())
    auroc_per_dataset['GAE'].append(res[(res['Method'] == 'GAE') & (res['Undirected'] == True)]['AUROC'].mean())
    auroc_per_dataset['GAE-D'].append(res[(res['Method'] == 'GAE') & (res['Undirected'] == False)]['AUROC'].mean())
    auroc_per_dataset['HGCN'].append(res[(res['Method'] == 'HGCN') & (res['Undirected'] == True)]['AUROC'].mean())
    auroc_per_dataset['HGCN-D'].append(res[(res['Method'] == 'HGCN') & (res['Undirected'] == False)]['AUROC'].mean())
    auroc_per_dataset['MagNet'].append(res[(res['Method'] == 'MagNet')]['AUROC'].mean())
    auroc_per_dataset['TransE'].append(res[(res['Method'] == 'TransE') & (res['NoEdge'] == True)]['AUROC'].mean())    
    auroc_per_dataset['TransE-E'].append(res[(res['Method'] == 'TransE') & (res['NoEdge'] == False)]['AUROC'].mean())    
    auroc_per_dataset['UDS-AE'].append(res[(res['Method'] == 'UDS-AE')]['AUROC'].mean())

In [None]:
print ('Method', '&', ' & '.join(['Texas', 'Cornell']), '\\\\')
print ('\hline\hline')
for method in ['DSAE-Euc', 'DSAE-Hyp', 'node2vec', 'PM', 'PM-D', 'GAE', 'GAE-D', 'HGCN', 'HGCN-D', 'MagNet', 'TransE', 'TransE-E', 'UDS-AE']:
    print (method, '&', ' & '.join(['%.3f'% x for x in auroc_per_dataset[method]]), "\\\\")