## Subgraph classification 

In [5]:
#Imports
import numpy as np
from NNetwork import NNetwork as nn
import networkx as nx
#import utils.NNetwork as nn
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn import metrics, model_selection
from tqdm import trange
from sklearn.cluster import KMeans
import matplotlib.gridspec as gridspec
from tqdm import trange
from numpy import genfromtxt
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [6]:
def compute_accuracy_metrics(Y_test, P_pred, use_opt_threshold=False, verbose=False):
    # y_test = binary label 
    # P_pred = predicted probability for y_test
    # compuate various binary classification accuracy metrics
    fpr, tpr, thresholds = metrics.roc_curve(Y_test, P_pred, pos_label=None)
    mythre = thresholds[np.argmax(tpr - fpr)]
    myauc = metrics.auc(fpr, tpr)
    # print('!!! auc', myauc)
    
    # Compute classification statistics
    threshold = 0.5
    if use_opt_threshold:
        threshold = mythre
    
    Y_pred = P_pred.copy()
    Y_pred[Y_pred < threshold] = 0
    Y_pred[Y_pred >= threshold] = 1

    mcm = confusion_matrix(Y_test, Y_pred)
    tn = mcm[0, 0]
    tp = mcm[1, 1]
    fn = mcm[1, 0]
    fp = mcm[0, 1]

    accuracy = (tp + tn) / (tp + tn + fp + fn)
    sensitivity = tn / (tn + fp)
    specificity = tp / (tp + fn)
    precision = tp / (tp + fp)
    fall_out = fp / (fp + tn)
    miss_rate = fn / (fn + tp)

    # Save results
    results_dict = {}
    results_dict.update({'Y_test': Y_test})
    results_dict.update({'Y_pred': Y_pred})
    results_dict.update({'AUC': myauc})
    results_dict.update({'Opt_threshold': mythre})
    results_dict.update({'Accuracy': accuracy})
    results_dict.update({'Sensitivity': sensitivity})
    results_dict.update({'Specificity': specificity})
    results_dict.update({'Precision': precision})
    results_dict.update({'Fall_out': fall_out})
    results_dict.update({'Miss_rate': miss_rate})
    
    if verbose:
        for key in [key for key in results_dict.keys()]:
            print('% s ===> %.3f' % (key, results_dict.get(key)))
    return results_dict

In [14]:
# X: a list of NNetwork objects
# num_edges, num_nodes, min_degree, max_degree, diameter
import math

def MACC_nx(G, k):
    # G = networkx Network
    # k = length of chain motif
    # Compute k x k Matrix of Average Clustering Coefficients
    G_nn = nn.NNetwork()
    G_nn.add_edges( list(G.edges) )
    X0, embs0 = G_nn.get_patches(k=k, sample_size=1000, skip_folded_hom=False, sampling_alg = 'pivot')
    MACC = np.sum(X0, axis=1)/X0.shape[1]
    MACC = MACC.reshape(k,k)
    return MACC

def datagen_graph_features(X, k0_list):
    # generate the toy dataset
    # file = open(file_name, 'w+', newline='')
    output_dict_list = []
    
    header = ["num_edges", "num_nodes", "min_degree", "max_degree", "diameter", 
              "degree_assortativity_coef", "num_clique", "avg_clustering_coef", 
              "density"]
  
    for i in trange(len(X)): #X.shape[1]: total number of graphs
                                #X.shape[0]: num_nodes^2
        output_dict = {}
        G_nn = X[i]
        G=nx.Graph(G_nn.get_edges())
        
        num_edges = G.number_of_edges()
        min_degree = min(list(G.degree), key=lambda x: x[1])[1]
        max_degree = max(list(G.degree), key=lambda x: x[1])[1]
        diameter = nx.diameter(G)

        degree_assortativity_coef = nx.degree_assortativity_coefficient(G)
        num_clique = nx.graph_clique_number(G)
        avg_clustering_coef = nx.average_clustering(G)
        #small_world_coef = nx.omega(G)
        density = nx.density(G)

        output_dict.update({"num_nodes": len(G.nodes())})
        output_dict.update({"num_edges": G.number_of_edges()})
        output_dict.update({"min_degree": min(list(G.degree), key=lambda x: x[1])[1]})
        output_dict.update({"max_degree": max(list(G.degree), key=lambda x: x[1])[1]})
        output_dict.update({"diameter": nx.diameter(G)})
        a = nx.degree_assortativity_coefficient(G)
        if math.isnan(a):
            a = 0
        output_dict.update({"degree_assortativity_coef": a})
        output_dict.update({"num_cliques": nx.graph_clique_number(G)})
        output_dict.update({"Avg_clustering_coeff": nx.average_clustering(G)})
        output_dict.update({"edeg_density": nx.density(G)})
        for k0 in k0_list:
            output_dict.update({"MACC k0={}".format(k0): MACC_nx(G, k=k0)})

        output_dict_list.append(output_dict)
        
    return output_dict_list

def run_binary_classification(output_dict_list, y, scale=1):

    X_train_idx, X_test_idx, y_train, y_test = train_test_split(np.arange(len(y)), y, test_size=0.50, random_state=42)        

    
    keys = list(output_dict_list[0].keys())
    results_dict_total = {}
    for key in keys:
        feature_vector_list = []
        for i in np.arange(len(output_dict_list) ):
            output_dict = output_dict_list[i]
            key
            if key.split(" ")[0] == "MACC":
                feature_vector_list.append(output_dict.get(key).reshape(-1,1)) 
            else: 
                feature_vector_list.append(output_dict.get(key)) 

        X = np.asarray(feature_vector_list)
        if key.split(" ")[0] == "MACC":
            X = X.reshape(len(feature_vector_list), -1) 
        else: 
            X = X[:,np.newaxis]

        print("X.shape", X.shape)

        X_train = X[X_train_idx,:]
        X_test = X[X_test_idx,:]
    
        scale0 = 1
        if key.split(" ")[0] == "MACC":
            scale0 = scale
        clf = LogisticRegression(random_state=0).fit((X_train-np.mean(X_train, axis=0))/scale0, y_train)
        y_pred = clf.predict((X_test-np.mean(X_test, axis=0))/scale0)
        P_pred = clf.predict_proba((X_test-np.mean(X_test, axis=0))/scale0)

        results_dict = compute_accuracy_metrics(y_test, P_pred[:,1], use_opt_threshold=False, verbose=False)

        print("method = {}, AUC = {:f}".format(key, results_dict.get("AUC")))

        results_dict_total.update({key: results_dict})
    return results_dict_total
    

In [11]:
# Subgraph sampling for subgraph classification
# Output = subgraph_list as NNetwork objects

ntwk_list = ['Wisconsin87', 'UCLA26'] # Wisconsin87, UCLA26, Caltech36
label_list = []
subgraph_list = []
num_subgraphs = 100
k_list = [110]
k0_list = [15,20]

#X_MACC = []
#X_edge_density = []
#X_adj = []

nn_network_list = []
for ntwk in ntwk_list:
    ntwk_nonumber = ''.join([i for i in ntwk if not i.isdigit()])
    path = "Data/Facebook/" + str(ntwk) + '.txt'
    G = nn.NNetwork()
    G.load_add_edges(path, increment_weights=False, use_genfromtxt=True)
    nn_network_list.append(G)
    print('num nodes in G', len(G.nodes()))
    print('num edges in G', len(G.get_edges()))

y = [0]*num_subgraphs + [1]*num_subgraphs

subgraph_list_dict = {}
output_dict_list_total = {}

for k in k_list:
    print("sampling subgraphs with k={} nodes".format(k))
    subgraph_list = []
    for a in np.arange(len(nn_network_list)):
        G = nn_network_list[a]
        
        for i in trange(num_subgraphs):
            label_list.append(str(ntwk))

            # take the induced subgraph 
            X, embs = G.get_patches(k=k, sample_size=10, skip_folded_hom=False, sampling_alg = 'pivot')
            H = G.subgraph(embs[-1]) # take the last instance of MCMC sampling
            #A_adj = H.get_adjacency_matrix()
            subgraph_list.append(H)
            #subgraph_list.append(A_adj)

            """
            ## compute summary stats of subgraphs 

            A_adj = H.get_adjacency_matrix()
            X_adj.append(A_adj)

            X0, embs0 = H.get_patches(k=k0, sample_size=1000, skip_folded_hom=False, sampling_alg = 'pivot')
            MACC = np.sum(X0, axis=1)/X0.shape[1]
            MACC = MACC.reshape(k0,k0)
            X_MACC.append(MACC)

            X_edge_density.append(np.linalg.norm(A_adj, 1)/len(H.nodes()))
            """

    subgraph_list_dict.update({"k={}".format(k) : subgraph_list})

    print("extracting subgraph features..")
    output_dict_list = datagen_graph_features(X=subgraph_list, k0_list = k0_list)
    results_dict_total = run_binary_classification(output_dict_list, y=y, scale=0.5)
    
    output_dict_list_total.update({"k={}".format(k): results_dict_total})

num nodes in G 23842
num edges in G 1671904
num nodes in G 20467
num edges in G 1495226
sampling subgraphs with k=110 nodes


100%|█████████████████████████████████████████| 100/100 [00:21<00:00,  4.57it/s]
100%|█████████████████████████████████████████| 100/100 [00:19<00:00,  5.06it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 200/200 [03:17<00:00,  1.01it/s]

X.shape (200, 1)
method = num_nodes, AUC = 0.425970
X.shape (200, 1)
method = num_edges, AUC = 0.665866
X.shape (200, 1)
method = min_degree, AUC = 0.517607
X.shape (200, 1)
method = max_degree, AUC = 0.625850
X.shape (200, 1)
method = diameter, AUC = 0.426170
X.shape (200, 1)
method = degree_assortativity_coef, AUC = 0.664266
X.shape (200, 1)
method = num_cliques, AUC = 0.588235
X.shape (200, 1)
method = Avg_clustering_coeff, AUC = 0.526611
X.shape (200, 1)
method = edeg_density, AUC = 0.677071
X.shape (200, 225)
method = MACC k0=15, AUC = 0.686675
X.shape (200, 400)
method = MACC k0=20, AUC = 0.711084





In [58]:
G0 = nn_network_list[0]

In [60]:
H = G0.k_node_ind_subgraph(k=10)

In [61]:
H.get_edges()

[['767', '126'],
 ['767', '26'],
 ['767', '631'],
 ['126', '330'],
 ['126', '767'],
 ['126', '26'],
 ['126', '90'],
 ['26', '126'],
 ['26', '330'],
 ['26', '767'],
 ['26', '554'],
 ['631', '227'],
 ['631', '767'],
 ['631', '304'],
 ['227', '631'],
 ['304', '90'],
 ['304', '631'],
 ['330', '126'],
 ['330', '26'],
 ['330', '376'],
 ['330', '554'],
 ['330', '90'],
 ['90', '126'],
 ['90', '330'],
 ['90', '376'],
 ['90', '304'],
 ['90', '554'],
 ['376', '554'],
 ['376', '330'],
 ['376', '90'],
 ['554', '330'],
 ['554', '90'],
 ['554', '26'],
 ['554', '376']]

In [73]:
# Subgraph sampling for subgraph classification
# Output = subgraph_list as NNetwork objects

ntwk_list = ['Caltech36', 'Simmons81', 'Reed98', 'NYU9', 'Virginia63', 'UCLA26', 'Wisconsin87'] # Wisconsin87, UCLA26, Caltech36

#ntwk_list = ['Caltech36', 'Simmons81', 'Reed98', 'NYU9'] # Wisconsin87, UCLA26, Caltech36

label_list = []
subgraph_list = []
num_subgraphs = 100
k_list = [30]
k0_list = [5, 10,20,30]

#X_MACC = []
#X_edge_density = []
#X_adj = []

nn_network_list = []
for ntwk in ntwk_list:
    ntwk_nonumber = ''.join([i for i in ntwk if not i.isdigit()])
    path = "Data/Facebook/" + str(ntwk) + '.txt'
    G = nn.NNetwork()
    G.load_add_edges(path, increment_weights=False, use_genfromtxt=True)
    nn_network_list.append(G)
    print('num nodes in G', len(G.nodes()))
    print('num edges in G', len(G.get_edges()))

y = [0]*num_subgraphs + [1]*num_subgraphs

results_all = {}

for a in np.arange(len(nn_network_list)):
    for b in np.arange(a+1, len(nn_network_list)):
        if a != b:
            nn_network_list_sub = [nn_network_list[a],nn_network_list[b]]
            print("Classifying subgraphs in {}-{} ...".format(ntwk_list[a], ntwk_list[b]))

            subgraph_list_dict = {}
            output_dict_list_total = {}

            for k in k_list:
                print("sampling subgraphs with k={} nodes".format(k))
                subgraph_list = []
                for q in np.arange(len(nn_network_list)):
                    G = nn_network_list[q]

                    for i in trange(num_subgraphs):
                        label_list.append(str(ntwk))

                        # take the induced subgraph 
                        #X, embs = G.get_patches(k=k, sample_size=10, skip_folded_hom=False, sampling_alg = 'pivot')
                        #H = G.subgraph(embs[-1]) # take the last instance of MCMC sampling
                    
                        H = G.k_node_ind_subgraph(k=k)
                        while H is None:
                            H = G.k_node_ind_subgraph(k=k)
                        
                        #A_adj = H.get_adjacency_matrix()
                        subgraph_list.append(H)
                        #subgraph_list.append(A_adj)

                        """
                        ## compute summary stats of subgraphs 

                        A_adj = H.get_adjacency_matrix()
                        X_adj.append(A_adj)

                        X0, embs0 = H.get_patches(k=k0, sample_size=1000, skip_folded_hom=False, sampling_alg = 'pivot')
                        MACC = np.sum(X0, axis=1)/X0.shape[1]
                        MACC = MACC.reshape(k0,k0)
                        X_MACC.append(MACC)

                        X_edge_density.append(np.linalg.norm(A_adj, 1)/len(H.nodes()))
                        """

                subgraph_list_dict.update({"k={}".format(k) : subgraph_list})

                print("extracting subgraph features..")
                output_dict_list = datagen_graph_features(X=subgraph_list, k0_list = k0_list)
                results_dict_total = run_binary_classification(output_dict_list, y=y, scale=0.3)

                output_dict_list_total.update({"subgraph_list": subgraph_list_dict})
                output_dict_list_total.update({"k={}".format(k): results_dict_total})
            output_dict_list_total.update({"subgraph_list": subgraph_list_dict})
            results_all.update({"{}-{}".format(ntwk_list[a], ntwk_list[b]) : output_dict_list_total})
            np.save("Output_files/subgraph_classification_data30", results_all)

num nodes in G 769
num edges in G 33312
num nodes in G 1518
num edges in G 65976
num nodes in G 962
num edges in G 37624
num nodes in G 21679
num edges in G 1431430
num nodes in G 21325
num edges in G 1396356
num nodes in G 20467
num edges in G 1495226
num nodes in G 23842
num edges in G 1671904
Classifying subgraphs in Caltech36-Simmons81 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 798.54it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 728.77it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 865.37it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 113.78it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 152.49it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 166.81it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 139.62it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [22:56<00:00,  1.97s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.850540
X.shape (700, 1)
method = min_degree, AUC = 0.582833
X.shape (700, 1)
method = max_degree, AUC = 0.751301
X.shape (700, 1)
method = diameter, AUC = 0.782313
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.604242
X.shape (700, 1)
method = num_cliques, AUC = 0.792117
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.799120
X.shape (700, 1)
method = edeg_density, AUC = 0.850540
X.shape (700, 25)
method = MACC k0=5, AUC = 0.890756
X.shape (700, 100)
method = MACC k0=10, AUC = 0.947579
X.shape (700, 400)
method = MACC k0=20, AUC = 0.967587
X.shape (700, 900)
method = MACC k0=30, AUC = 0.957183
Classifying subgraphs in Caltech36-Reed98 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 512.54it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 699.01it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 781.05it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 122.11it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 138.93it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 150.79it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 130.68it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [22:54<00:00,  1.96s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.821329
X.shape (700, 1)
method = min_degree, AUC = 0.655062
X.shape (700, 1)
method = max_degree, AUC = 0.699080
X.shape (700, 1)
method = diameter, AUC = 0.824930
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.629052
X.shape (700, 1)
method = num_cliques, AUC = 0.693477
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.682673
X.shape (700, 1)
method = edeg_density, AUC = 0.821329
X.shape (700, 25)
method = MACC k0=5, AUC = 0.769108
X.shape (700, 100)
method = MACC k0=10, AUC = 0.877151
X.shape (700, 400)
method = MACC k0=20, AUC = 0.898359
X.shape (700, 900)
method = MACC k0=30, AUC = 0.911164
Classifying subgraphs in Caltech36-NYU9 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 604.92it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 710.30it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 814.38it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 132.46it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 137.95it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 157.13it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 122.87it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [22:50<00:00,  1.96s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.910164
X.shape (700, 1)
method = min_degree, AUC = 0.576831
X.shape (700, 1)
method = max_degree, AUC = 0.821329
X.shape (700, 1)
method = diameter, AUC = 0.791116
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.651461
X.shape (700, 1)
method = num_cliques, AUC = 0.743297
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.832333
X.shape (700, 1)
method = edeg_density, AUC = 0.910164
X.shape (700, 25)
method = MACC k0=5, AUC = 0.932773
X.shape (700, 100)
method = MACC k0=10, AUC = 0.956383
X.shape (700, 400)
method = MACC k0=20, AUC = 0.954782
X.shape (700, 900)
method = MACC k0=30, AUC = 0.948780
Classifying subgraphs in Caltech36-Virginia63 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 783.05it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 661.15it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 773.01it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 125.30it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 138.74it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 152.06it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 127.78it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [22:58<00:00,  1.97s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.854342
X.shape (700, 1)
method = min_degree, AUC = 0.562225
X.shape (700, 1)
method = max_degree, AUC = 0.762505
X.shape (700, 1)
method = diameter, AUC = 0.832933
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.634254
X.shape (700, 1)
method = num_cliques, AUC = 0.703081
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.778711
X.shape (700, 1)
method = edeg_density, AUC = 0.854342
X.shape (700, 25)
method = MACC k0=5, AUC = 0.899160
X.shape (700, 100)
method = MACC k0=10, AUC = 0.941577
X.shape (700, 400)
method = MACC k0=20, AUC = 0.927171
X.shape (700, 900)
method = MACC k0=30, AUC = 0.918367
Classifying subgraphs in Caltech36-UCLA26 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 774.56it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 728.97it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 818.25it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 140.98it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 142.75it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 152.78it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 129.01it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [23:01<00:00,  1.97s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.864546
X.shape (700, 1)
method = min_degree, AUC = 0.644058
X.shape (700, 1)
method = max_degree, AUC = 0.799120
X.shape (700, 1)
method = diameter, AUC = 0.886355
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.618647
X.shape (700, 1)
method = num_cliques, AUC = 0.760304
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.784714
X.shape (700, 1)
method = edeg_density, AUC = 0.864546
X.shape (700, 25)
method = MACC k0=5, AUC = 0.918367
X.shape (700, 100)
method = MACC k0=10, AUC = 0.965586
X.shape (700, 400)
method = MACC k0=20, AUC = 0.962385
X.shape (700, 900)
method = MACC k0=30, AUC = 0.958784
Classifying subgraphs in Caltech36-Wisconsin87 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 668.66it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 569.03it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 809.42it/s]
100%|█████████████████████████████████████████| 100/100 [00:01<00:00, 95.11it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 137.58it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 151.37it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 126.03it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [23:12<00:00,  1.99s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.867347
X.shape (700, 1)
method = min_degree, AUC = 0.631853
X.shape (700, 1)
method = max_degree, AUC = 0.769108
X.shape (700, 1)
method = diameter, AUC = 0.810524
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.674270
X.shape (700, 1)
method = num_cliques, AUC = 0.732293
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.772309
X.shape (700, 1)
method = edeg_density, AUC = 0.867347
X.shape (700, 25)
method = MACC k0=5, AUC = 0.910764
X.shape (700, 100)
method = MACC k0=10, AUC = 0.919168
X.shape (700, 400)
method = MACC k0=20, AUC = 0.917167
X.shape (700, 900)
method = MACC k0=30, AUC = 0.897559
Classifying subgraphs in Simmons81-Reed98 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 747.74it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 678.37it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 790.68it/s]
100%|█████████████████████████████████████████| 100/100 [00:01<00:00, 96.11it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 133.55it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 133.38it/s]
100%|█████████████████████████████████████████| 100/100 [00:01<00:00, 80.75it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [23:08<00:00,  1.98s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.840736
X.shape (700, 1)
method = min_degree, AUC = 0.664866
X.shape (700, 1)
method = max_degree, AUC = 0.797919
X.shape (700, 1)
method = diameter, AUC = 0.792117
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.615446
X.shape (700, 1)
method = num_cliques, AUC = 0.762105
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.767107
X.shape (700, 1)
method = edeg_density, AUC = 0.840736
X.shape (700, 25)
method = MACC k0=5, AUC = 0.889956
X.shape (700, 100)
method = MACC k0=10, AUC = 0.925970
X.shape (700, 400)
method = MACC k0=20, AUC = 0.921569
X.shape (700, 900)
method = MACC k0=30, AUC = 0.921969
Classifying subgraphs in Simmons81-NYU9 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 826.55it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 670.60it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 777.96it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 118.30it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 136.11it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 142.92it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 126.37it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [23:14<00:00,  1.99s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.892157
X.shape (700, 1)
method = min_degree, AUC = 0.623850
X.shape (700, 1)
method = max_degree, AUC = 0.812125
X.shape (700, 1)
method = diameter, AUC = 0.812325
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.729092
X.shape (700, 1)
method = num_cliques, AUC = 0.738896
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.798719
X.shape (700, 1)
method = edeg_density, AUC = 0.892157
X.shape (700, 25)
method = MACC k0=5, AUC = 0.921168
X.shape (700, 100)
method = MACC k0=10, AUC = 0.895558
X.shape (700, 400)
method = MACC k0=20, AUC = 0.911565
X.shape (700, 900)
method = MACC k0=30, AUC = 0.901961
Classifying subgraphs in Simmons81-Virginia63 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 804.72it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 715.73it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 798.64it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 124.02it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 135.73it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 124.21it/s]
100%|█████████████████████████████████████████| 100/100 [00:01<00:00, 96.20it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [23:10<00:00,  1.99s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.832133
X.shape (700, 1)
method = min_degree, AUC = 0.568828
X.shape (700, 1)
method = max_degree, AUC = 0.714086
X.shape (700, 1)
method = diameter, AUC = 0.792317
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.661865
X.shape (700, 1)
method = num_cliques, AUC = 0.703081
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.746299
X.shape (700, 1)
method = edeg_density, AUC = 0.832133
X.shape (700, 25)
method = MACC k0=5, AUC = 0.892357
X.shape (700, 100)
method = MACC k0=10, AUC = 0.943177
X.shape (700, 400)
method = MACC k0=20, AUC = 0.940376
X.shape (700, 900)
method = MACC k0=30, AUC = 0.945178
Classifying subgraphs in Simmons81-UCLA26 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 858.52it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 643.97it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 823.12it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 118.41it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 131.30it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 137.77it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 121.45it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [23:44<00:00,  2.03s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.890356
X.shape (700, 1)
method = min_degree, AUC = 0.613045
X.shape (700, 1)
method = max_degree, AUC = 0.761104
X.shape (700, 1)
method = diameter, AUC = 0.881953
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.549020
X.shape (700, 1)
method = num_cliques, AUC = 0.787115
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.839136
X.shape (700, 1)
method = edeg_density, AUC = 0.890356
X.shape (700, 25)
method = MACC k0=5, AUC = 0.918768
X.shape (700, 100)
method = MACC k0=10, AUC = 0.948780
X.shape (700, 400)
method = MACC k0=20, AUC = 0.963585
X.shape (700, 900)
method = MACC k0=30, AUC = 0.952781
Classifying subgraphs in Simmons81-Wisconsin87 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 793.33it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 728.19it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 847.41it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 143.59it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 145.25it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 156.32it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 134.01it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [23:00<00:00,  1.97s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.842337
X.shape (700, 1)
method = min_degree, AUC = 0.605042
X.shape (700, 1)
method = max_degree, AUC = 0.723890
X.shape (700, 1)
method = diameter, AUC = 0.799120
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.589436
X.shape (700, 1)
method = num_cliques, AUC = 0.747899
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.705482
X.shape (700, 1)
method = edeg_density, AUC = 0.842337
X.shape (700, 25)
method = MACC k0=5, AUC = 0.938776
X.shape (700, 100)
method = MACC k0=10, AUC = 0.955582
X.shape (700, 400)
method = MACC k0=20, AUC = 0.960384
X.shape (700, 900)
method = MACC k0=30, AUC = 0.968387
Classifying subgraphs in Reed98-NYU9 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 805.76it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 734.05it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 915.48it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 110.79it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 128.93it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 140.76it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 119.70it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [23:10<00:00,  1.99s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.844538
X.shape (700, 1)
method = min_degree, AUC = 0.591637
X.shape (700, 1)
method = max_degree, AUC = 0.730692
X.shape (700, 1)
method = diameter, AUC = 0.806923
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.674270
X.shape (700, 1)
method = num_cliques, AUC = 0.682873
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.729892
X.shape (700, 1)
method = edeg_density, AUC = 0.844538
X.shape (700, 25)
method = MACC k0=5, AUC = 0.887955
X.shape (700, 100)
method = MACC k0=10, AUC = 0.925970
X.shape (700, 400)
method = MACC k0=20, AUC = 0.941176
X.shape (700, 900)
method = MACC k0=30, AUC = 0.942777
Classifying subgraphs in Reed98-Virginia63 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 815.27it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 709.61it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 870.80it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 146.97it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 147.28it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 158.42it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 132.25it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [22:47<00:00,  1.95s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.861745
X.shape (700, 1)
method = min_degree, AUC = 0.586234
X.shape (700, 1)
method = max_degree, AUC = 0.780712
X.shape (700, 1)
method = diameter, AUC = 0.839536
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.669868
X.shape (700, 1)
method = num_cliques, AUC = 0.738896
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.811925
X.shape (700, 1)
method = edeg_density, AUC = 0.861745
X.shape (700, 25)
method = MACC k0=5, AUC = 0.924370
X.shape (700, 100)
method = MACC k0=10, AUC = 0.949580
X.shape (700, 400)
method = MACC k0=20, AUC = 0.967587
X.shape (700, 900)
method = MACC k0=30, AUC = 0.948379
Classifying subgraphs in Reed98-UCLA26 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 862.93it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 739.13it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 856.30it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 144.99it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 148.53it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 164.91it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 137.97it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [22:55<00:00,  1.97s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.801120
X.shape (700, 1)
method = min_degree, AUC = 0.540816
X.shape (700, 1)
method = max_degree, AUC = 0.688275
X.shape (700, 1)
method = diameter, AUC = 0.764506
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.582233
X.shape (700, 1)
method = num_cliques, AUC = 0.690476
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.770308
X.shape (700, 1)
method = edeg_density, AUC = 0.801120
X.shape (700, 25)
method = MACC k0=5, AUC = 0.882353
X.shape (700, 100)
method = MACC k0=10, AUC = 0.937175
X.shape (700, 400)
method = MACC k0=20, AUC = 0.911164
X.shape (700, 900)
method = MACC k0=30, AUC = 0.907163
Classifying subgraphs in Reed98-Wisconsin87 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 815.35it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 753.94it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 854.68it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 150.15it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 149.38it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 165.32it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 140.97it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [22:55<00:00,  1.96s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.840936
X.shape (700, 1)
method = min_degree, AUC = 0.600240
X.shape (700, 1)
method = max_degree, AUC = 0.718487
X.shape (700, 1)
method = diameter, AUC = 0.820328
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.609044
X.shape (700, 1)
method = num_cliques, AUC = 0.733493
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.697479
X.shape (700, 1)
method = edeg_density, AUC = 0.840936
X.shape (700, 25)
method = MACC k0=5, AUC = 0.913565
X.shape (700, 100)
method = MACC k0=10, AUC = 0.939576
X.shape (700, 400)
method = MACC k0=20, AUC = 0.903561
X.shape (700, 900)
method = MACC k0=30, AUC = 0.928371
Classifying subgraphs in NYU9-Virginia63 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 810.00it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 761.29it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 869.63it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 149.06it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 148.74it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 157.73it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 134.51it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [22:45<00:00,  1.95s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.822129
X.shape (700, 1)
method = min_degree, AUC = 0.628051
X.shape (700, 1)
method = max_degree, AUC = 0.677471
X.shape (700, 1)
method = diameter, AUC = 0.781513
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.630652
X.shape (700, 1)
method = num_cliques, AUC = 0.714486
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.715886
X.shape (700, 1)
method = edeg_density, AUC = 0.822129
X.shape (700, 25)
method = MACC k0=5, AUC = 0.862345
X.shape (700, 100)
method = MACC k0=10, AUC = 0.924370
X.shape (700, 400)
method = MACC k0=20, AUC = 0.926771
X.shape (700, 900)
method = MACC k0=30, AUC = 0.929972
Classifying subgraphs in NYU9-UCLA26 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 863.93it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 757.96it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 850.33it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 147.67it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 150.22it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 164.63it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 137.82it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [22:44<00:00,  1.95s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.870548
X.shape (700, 1)
method = min_degree, AUC = 0.519408
X.shape (700, 1)
method = max_degree, AUC = 0.808924
X.shape (700, 1)
method = diameter, AUC = 0.780312
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.591837
X.shape (700, 1)
method = num_cliques, AUC = 0.767507
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.846739
X.shape (700, 1)
method = edeg_density, AUC = 0.870548
X.shape (700, 25)
method = MACC k0=5, AUC = 0.890356
X.shape (700, 100)
method = MACC k0=10, AUC = 0.933173
X.shape (700, 400)
method = MACC k0=20, AUC = 0.937575
X.shape (700, 900)
method = MACC k0=30, AUC = 0.924770
Classifying subgraphs in NYU9-Wisconsin87 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 806.04it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 764.36it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 855.53it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 147.88it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 151.95it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 166.90it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 140.87it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [22:44<00:00,  1.95s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.879152
X.shape (700, 1)
method = min_degree, AUC = 0.652261
X.shape (700, 1)
method = max_degree, AUC = 0.793717
X.shape (700, 1)
method = diameter, AUC = 0.856343
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.683473
X.shape (700, 1)
method = num_cliques, AUC = 0.750500
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.701481
X.shape (700, 1)
method = edeg_density, AUC = 0.879152
X.shape (700, 25)
method = MACC k0=5, AUC = 0.913165
X.shape (700, 100)
method = MACC k0=10, AUC = 0.938776
X.shape (700, 400)
method = MACC k0=20, AUC = 0.942377
X.shape (700, 900)
method = MACC k0=30, AUC = 0.958784
Classifying subgraphs in Virginia63-UCLA26 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 757.57it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 754.65it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 846.60it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 147.59it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 148.78it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 121.84it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 135.01it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [22:46<00:00,  1.95s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.884754
X.shape (700, 1)
method = min_degree, AUC = 0.494398
X.shape (700, 1)
method = max_degree, AUC = 0.764706
X.shape (700, 1)
method = diameter, AUC = 0.791317
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.691877
X.shape (700, 1)
method = num_cliques, AUC = 0.713085
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.730692
X.shape (700, 1)
method = edeg_density, AUC = 0.884754
X.shape (700, 25)
method = MACC k0=5, AUC = 0.881152
X.shape (700, 100)
method = MACC k0=10, AUC = 0.864746
X.shape (700, 400)
method = MACC k0=20, AUC = 0.885554
X.shape (700, 900)
method = MACC k0=30, AUC = 0.870348
Classifying subgraphs in Virginia63-Wisconsin87 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 785.16it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 567.14it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 848.64it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 144.49it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 152.06it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 160.09it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 139.17it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [22:42<00:00,  1.95s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.807723
X.shape (700, 1)
method = min_degree, AUC = 0.578231
X.shape (700, 1)
method = max_degree, AUC = 0.698079
X.shape (700, 1)
method = diameter, AUC = 0.827731
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.621849
X.shape (700, 1)
method = num_cliques, AUC = 0.711084
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.710684
X.shape (700, 1)
method = edeg_density, AUC = 0.807723
X.shape (700, 25)
method = MACC k0=5, AUC = 0.848739
X.shape (700, 100)
method = MACC k0=10, AUC = 0.920768
X.shape (700, 400)
method = MACC k0=20, AUC = 0.941176
X.shape (700, 900)
method = MACC k0=30, AUC = 0.942777
Classifying subgraphs in UCLA26-Wisconsin87 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 878.02it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 742.86it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 852.71it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 114.71it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 148.04it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 161.93it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 131.40it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [23:07<00:00,  1.98s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.825930
X.shape (700, 1)
method = min_degree, AUC = 0.569628
X.shape (700, 1)
method = max_degree, AUC = 0.739096
X.shape (700, 1)
method = diameter, AUC = 0.782113
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.551421
X.shape (700, 1)
method = num_cliques, AUC = 0.766907
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.778311
X.shape (700, 1)
method = edeg_density, AUC = 0.825930
X.shape (700, 25)
method = MACC k0=5, AUC = 0.893958
X.shape (700, 100)
method = MACC k0=10, AUC = 0.892357
X.shape (700, 400)
method = MACC k0=20, AUC = 0.912365
X.shape (700, 900)
method = MACC k0=30, AUC = 0.898359


In [82]:
path = "Output_files/subgraph_classification_data30.npy"
a = np.load(path, allow_pickle=True).item()

In [98]:
a.keys()

dict_keys(['Caltech36-Simmons81', 'Caltech36-Reed98', 'Caltech36-NYU9', 'Caltech36-Virginia63', 'Caltech36-UCLA26', 'Caltech36-Wisconsin87', 'Simmons81-Reed98', 'Simmons81-NYU9', 'Simmons81-Virginia63', 'Simmons81-UCLA26', 'Simmons81-Wisconsin87', 'Reed98-NYU9', 'Reed98-Virginia63', 'Reed98-UCLA26', 'Reed98-Wisconsin87', 'NYU9-Virginia63', 'NYU9-UCLA26', 'NYU9-Wisconsin87', 'Virginia63-UCLA26', 'Virginia63-Wisconsin87', 'UCLA26-Wisconsin87'])

In [99]:
c = a.get("Caltech36-Simmons81")

In [102]:
c.get("k=30").keys()

dict_keys(['num_nodes', 'num_edges', 'min_degree', 'max_degree', 'diameter', 'degree_assortativity_coef', 'num_cliques', 'Avg_clustering_coeff', 'edeg_density', 'MACC k0=5', 'MACC k0=10', 'MACC k0=20', 'MACC k0=30'])

In [109]:
c.get("diameter").keys()

dict_keys(['Y_test', 'Y_pred', 'AUC', 'Opt_threshold', 'Accuracy', 'Sensitivity', 'Specificity', 'Precision', 'Fall_out', 'Miss_rate'])

In [110]:
for i in np.arange(len(list(a.keys()))):
    for j in np.arange(i+1, len(list(a.keys()))):
        key = list(a.keys())[i]
        if i != j and key.split("-")[1] == 'Wisconsin87':
            key = list(a.keys())[i]
            b = a.get(key)
            c = b.get("k=30")
            for method in list(c.keys()):
                print("{}, method={}, AUC={}".format(key, method, c.get(method).get("AUC")))

Caltech36-Wisconsin87, method=num_nodes, AUC=0.5
Caltech36-Wisconsin87, method=num_edges, AUC=0.8673469387755103
Caltech36-Wisconsin87, method=min_degree, AUC=0.6318527410964385
Caltech36-Wisconsin87, method=max_degree, AUC=0.7691076430572229
Caltech36-Wisconsin87, method=diameter, AUC=0.8105242096838735
Caltech36-Wisconsin87, method=degree_assortativity_coef, AUC=0.6742697078831532
Caltech36-Wisconsin87, method=num_cliques, AUC=0.7322929171668667
Caltech36-Wisconsin87, method=Avg_clustering_coeff, AUC=0.7723089235694277
Caltech36-Wisconsin87, method=edeg_density, AUC=0.8673469387755103
Caltech36-Wisconsin87, method=MACC k0=5, AUC=0.9107643057222888
Caltech36-Wisconsin87, method=MACC k0=10, AUC=0.9191676670668267
Caltech36-Wisconsin87, method=MACC k0=20, AUC=0.9171668667466987
Caltech36-Wisconsin87, method=MACC k0=30, AUC=0.8975590236094437
Caltech36-Wisconsin87, method=num_nodes, AUC=0.5
Caltech36-Wisconsin87, method=num_edges, AUC=0.8673469387755103
Caltech36-Wisconsin87, method=min_

In [111]:
path = "Output_files/subgraph_classification_data30.npy"
results_0 = np.load(path, allow_pickle=True).item()

In [141]:
strz

'This is a lineaThis is line 2bThis is line 3'

In [112]:
results_0.keys()

dict_keys(['Caltech36-Simmons81', 'Caltech36-Reed98', 'Caltech36-NYU9', 'Caltech36-Virginia63', 'Caltech36-UCLA26', 'Caltech36-Wisconsin87', 'Simmons81-Reed98', 'Simmons81-NYU9', 'Simmons81-Virginia63', 'Simmons81-UCLA26', 'Simmons81-Wisconsin87', 'Reed98-NYU9', 'Reed98-Virginia63', 'Reed98-UCLA26', 'Reed98-Wisconsin87', 'NYU9-Virginia63', 'NYU9-UCLA26', 'NYU9-Wisconsin87', 'Virginia63-UCLA26', 'Virginia63-Wisconsin87', 'UCLA26-Wisconsin87'])

In [150]:
import pandas as pd

method_list = ['edeg_density', 'min_degree', 'max_degree', 'diameter', 'degree_assortativity_coef', 'num_cliques', 'Avg_clustering_coeff',  'MACC k0=5', 'MACC k0=10', 'MACC k0=20', 'MACC k0=30']
pairs_list = list(results_0.keys())
method_list_new = [" ".join(method.split("_")) for method in method_list]

method_0 = []
AUC_list_total = []
for method in method_list:
    AUC_list = []
    for idx in range(len(pairs_list)):
        pair = pairs_list[idx]
        result1 = results_0.get(pair)
        result11 = result1.get("k=30")
        AUC = result11.get(method).get("AUC")
        AUC_list.append(np.round(AUC, 3))
    AUC_list_total.append(AUC_list)

AUC_array = np.asarray(AUC_list_total).T
     
pd.set_option('display.max_rows', None)
pd.set_option('display.width', 10)
result = pd.DataFrame(data=AUC_array)
result = result.set_axis(method_list_new, axis=1, inplace=False)
#result.insert(0, column='xi', value= xi_0)
#result.insert(0, column='Method', value= method_0)
#result = round(result.groupby(['Method', 'xi'], sort=False).agg(['mean','std']),3)
result["Networks"] = pairs_list
result = result.set_index("Networks")

result

result.to_csv('Figures/table.csv', index=True)

In [66]:
# Subgraph sampling for subgraph classification
# Output = subgraph_list as NNetwork objects

ntwk_list = ['Caltech36', 'Simmons81', 'Reed98', 'NYU9', 'Virginia63', 'UCLA26', 'Wisconsin87'] # Wisconsin87, UCLA26, Caltech36

#ntwk_list = ['Caltech36', 'Simmons81', 'Reed98', 'NYU9'] # Wisconsin87, UCLA26, Caltech36

label_list = []
subgraph_list = []
num_subgraphs = 100
k_list = [30]
k0_list = [5, 10,20,30]

#X_MACC = []
#X_edge_density = []
#X_adj = []

nn_network_list = []
for ntwk in ntwk_list:
    ntwk_nonumber = ''.join([i for i in ntwk if not i.isdigit()])
    path = "Data/Facebook/" + str(ntwk) + '.txt'
    G = nn.NNetwork()
    G.load_add_edges(path, increment_weights=False, use_genfromtxt=True)
    nn_network_list.append(G)
    print('num nodes in G', len(G.nodes()))
    print('num edges in G', len(G.get_edges()))

y = [0]*num_subgraphs + [1]*num_subgraphs

results_all = {}

for a in np.arange(len(nn_network_list)):
    for b in np.arange(a+1, len(nn_network_list)):
        if a != b:
            nn_network_list_sub = [nn_network_list[a],nn_network_list[b]]
            print("Classifying subgraphs in {}-{} ...".format(ntwk_list[a], ntwk_list[b]))

            subgraph_list_dict = {}
            output_dict_list_total = {}

            for k in k_list:
                print("sampling subgraphs with k={} nodes".format(k))
                subgraph_list = []
                for q in np.arange(len(nn_network_list)):
                    G = nn_network_list[q]

                    for i in trange(num_subgraphs):
                        label_list.append(str(ntwk))

                        # take the induced subgraph 
                        #X, embs = G.get_patches(k=k, sample_size=10, skip_folded_hom=False, sampling_alg = 'pivot')
                        #H = G.subgraph(embs[-1]) # take the last instance of MCMC sampling
                    
                        H = G.k_node_ind_subgraph(k=k)
                        while H is None:
                            H = G.k_node_ind_subgraph(k=k)
                        
                        #A_adj = H.get_adjacency_matrix()
                        subgraph_list.append(H)
                        #subgraph_list.append(A_adj)

                        """
                        ## compute summary stats of subgraphs 

                        A_adj = H.get_adjacency_matrix()
                        X_adj.append(A_adj)

                        X0, embs0 = H.get_patches(k=k0, sample_size=1000, skip_folded_hom=False, sampling_alg = 'pivot')
                        MACC = np.sum(X0, axis=1)/X0.shape[1]
                        MACC = MACC.reshape(k0,k0)
                        X_MACC.append(MACC)

                        X_edge_density.append(np.linalg.norm(A_adj, 1)/len(H.nodes()))
                        """

                subgraph_list_dict.update({"k={}".format(k) : subgraph_list})

                print("extracting subgraph features..")
                output_dict_list = datagen_graph_features(X=subgraph_list, k0_list = k0_list)
                results_dict_total = run_binary_classification(output_dict_list, y=y, scale=0.3)

                output_dict_list_total.update({"subgraph_list": subgraph_list_dict})
                output_dict_list_total.update({"k={}".format(k): results_dict_total})
            output_dict_list_total.update({"subgraph_list": subgraph_list_dict})
            results_all.update({"{}-{}".format(ntwk_list[a], ntwk_list[b]) : output_dict_list_total})
            np.save("Output_files/subgraph_classification_data30", results_all)

num nodes in G 769
num edges in G 33312
num nodes in G 1518
num edges in G 65976
num nodes in G 962
num edges in G 37624
num nodes in G 21679
num edges in G 1431430
num nodes in G 21325
num edges in G 1396356
num nodes in G 20467
num edges in G 1495226
num nodes in G 23842
num edges in G 1671904
Classifying subgraphs in Caltech36-Simmons81 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 834.60it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 729.64it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 843.11it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 141.95it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 149.90it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 162.74it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 136.26it/s]


extracting subgraph features..


100%|███████████████████████████████████████| 700/700 [1:21:58<00:00,  7.03s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.772309
X.shape (700, 1)
method = min_degree, AUC = 0.616447
X.shape (700, 1)
method = max_degree, AUC = 0.691477
X.shape (700, 1)
method = diameter, AUC = 0.826130
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.523009
X.shape (700, 1)
method = num_cliques, AUC = 0.707683
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.670668
X.shape (700, 1)
method = edeg_density, AUC = 0.772309
X.shape (700, 25)
method = MACC k0=5, AUC = 0.828331
X.shape (700, 100)
method = MACC k0=10, AUC = 0.887555
X.shape (700, 400)
method = MACC k0=20, AUC = 0.907163
X.shape (700, 900)
method = MACC k0=30, AUC = 0.902361
Classifying subgraphs in Wisconsin87-Reed98 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 501.19it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 684.04it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 823.89it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 146.79it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 148.19it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 161.03it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 134.21it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [22:47<00:00,  1.95s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.859944
X.shape (700, 1)
method = min_degree, AUC = 0.612245
X.shape (700, 1)
method = max_degree, AUC = 0.774310
X.shape (700, 1)
method = diameter, AUC = 0.804122
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.455782
X.shape (700, 1)
method = num_cliques, AUC = 0.778511
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.785114
X.shape (700, 1)
method = edeg_density, AUC = 0.859944
X.shape (700, 25)
method = MACC k0=5, AUC = 0.903962
X.shape (700, 100)
method = MACC k0=10, AUC = 0.950780
X.shape (700, 400)
method = MACC k0=20, AUC = 0.957583
X.shape (700, 900)
method = MACC k0=30, AUC = 0.957983
Classifying subgraphs in Wisconsin87-NYU9 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 557.51it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 722.51it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 855.22it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 149.41it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 148.27it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 164.37it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 135.11it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [22:55<00:00,  1.96s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.821729
X.shape (700, 1)
method = min_degree, AUC = 0.569228
X.shape (700, 1)
method = max_degree, AUC = 0.736695
X.shape (700, 1)
method = diameter, AUC = 0.810724
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.575030
X.shape (700, 1)
method = num_cliques, AUC = 0.737695
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.803922
X.shape (700, 1)
method = edeg_density, AUC = 0.821729
X.shape (700, 25)
method = MACC k0=5, AUC = 0.867147
X.shape (700, 100)
method = MACC k0=10, AUC = 0.949980
X.shape (700, 400)
method = MACC k0=20, AUC = 0.933173
X.shape (700, 900)
method = MACC k0=30, AUC = 0.947979
Classifying subgraphs in Wisconsin87-Virginia63 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 861.59it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 642.89it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 844.94it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 148.75it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 149.62it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 163.42it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 140.35it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [47:47<00:00,  4.10s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.840136
X.shape (700, 1)
method = min_degree, AUC = 0.711285
X.shape (700, 1)
method = max_degree, AUC = 0.741297
X.shape (700, 1)
method = diameter, AUC = 0.798319
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.659064
X.shape (700, 1)
method = num_cliques, AUC = 0.685074
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.754302
X.shape (700, 1)
method = edeg_density, AUC = 0.840136
X.shape (700, 25)
method = MACC k0=5, AUC = 0.897559
X.shape (700, 100)
method = MACC k0=10, AUC = 0.900360
X.shape (700, 400)
method = MACC k0=20, AUC = 0.891156
X.shape (700, 900)
method = MACC k0=30, AUC = 0.899560
Classifying subgraphs in Wisconsin87-UCLA26 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 787.97it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 736.19it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 826.54it/s]
100%|█████████████████████████████████████████| 100/100 [00:54<00:00,  1.83it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 150.48it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 159.39it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 101.17it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [50:19<00:00,  4.31s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.857343
X.shape (700, 1)
method = min_degree, AUC = 0.653661
X.shape (700, 1)
method = max_degree, AUC = 0.782913
X.shape (700, 1)
method = diameter, AUC = 0.824330
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.573830
X.shape (700, 1)
method = num_cliques, AUC = 0.719888
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.749100
X.shape (700, 1)
method = edeg_density, AUC = 0.857343
X.shape (700, 25)
method = MACC k0=5, AUC = 0.897959
X.shape (700, 100)
method = MACC k0=10, AUC = 0.917567
X.shape (700, 400)
method = MACC k0=20, AUC = 0.910364
X.shape (700, 900)
method = MACC k0=30, AUC = 0.924770
Classifying subgraphs in Simmons81-Reed98 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 760.29it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 624.83it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 811.50it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 140.21it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 150.12it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 161.51it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 138.39it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [22:56<00:00,  1.97s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.878151
X.shape (700, 1)
method = min_degree, AUC = 0.749700
X.shape (700, 1)
method = max_degree, AUC = 0.820928
X.shape (700, 1)
method = diameter, AUC = 0.843737
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.596639
X.shape (700, 1)
method = num_cliques, AUC = 0.803521
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.768707
X.shape (700, 1)
method = edeg_density, AUC = 0.878151
X.shape (700, 25)
method = MACC k0=5, AUC = 0.928772
X.shape (700, 100)
method = MACC k0=10, AUC = 0.939176
X.shape (700, 400)
method = MACC k0=20, AUC = 0.948780
X.shape (700, 900)
method = MACC k0=30, AUC = 0.952781
Classifying subgraphs in Wisconsin87-NYU9 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 768.34it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 725.91it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 839.90it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 145.90it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 149.13it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 163.98it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 138.27it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [22:50<00:00,  1.96s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.854742
X.shape (700, 1)
method = min_degree, AUC = 0.625650
X.shape (700, 1)
method = max_degree, AUC = 0.730892
X.shape (700, 1)
method = diameter, AUC = 0.855142
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.667067
X.shape (700, 1)
method = num_cliques, AUC = 0.764106
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.745498
X.shape (700, 1)
method = edeg_density, AUC = 0.854742
X.shape (700, 25)
method = MACC k0=5, AUC = 0.939176
X.shape (700, 100)
method = MACC k0=10, AUC = 0.955582
X.shape (700, 400)
method = MACC k0=20, AUC = 0.958784
X.shape (700, 900)
method = MACC k0=30, AUC = 0.955582
Classifying subgraphs in Wisconsin87-Virginia63 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 799.34it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 663.87it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 821.68it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 146.85it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 150.45it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 163.62it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 141.23it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [22:49<00:00,  1.96s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.771909
X.shape (700, 1)
method = min_degree, AUC = 0.456983
X.shape (700, 1)
method = max_degree, AUC = 0.688075
X.shape (700, 1)
method = diameter, AUC = 0.798920
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.703882
X.shape (700, 1)
method = num_cliques, AUC = 0.654662
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.715486
X.shape (700, 1)
method = edeg_density, AUC = 0.771909
X.shape (700, 25)
method = MACC k0=5, AUC = 0.839136
X.shape (700, 100)
method = MACC k0=10, AUC = 0.905562
X.shape (700, 400)
method = MACC k0=20, AUC = 0.898359
X.shape (700, 900)
method = MACC k0=30, AUC = 0.914366
Classifying subgraphs in Wisconsin87-UCLA26 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 860.10it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 735.60it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 829.82it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 148.03it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 154.93it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 160.86it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 143.66it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [22:46<00:00,  1.95s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.887355
X.shape (700, 1)
method = min_degree, AUC = 0.625250
X.shape (700, 1)
method = max_degree, AUC = 0.826731
X.shape (700, 1)
method = diameter, AUC = 0.824130
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.685874
X.shape (700, 1)
method = num_cliques, AUC = 0.729092
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.767907
X.shape (700, 1)
method = edeg_density, AUC = 0.887355
X.shape (700, 25)
method = MACC k0=5, AUC = 0.879952
X.shape (700, 100)
method = MACC k0=10, AUC = 0.903962
X.shape (700, 400)
method = MACC k0=20, AUC = 0.894358
X.shape (700, 900)
method = MACC k0=30, AUC = 0.915166
Classifying subgraphs in Reed98-NYU9 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 717.30it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 725.06it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 855.20it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 151.64it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 151.00it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 165.53it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 140.71it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [22:54<00:00,  1.96s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.875750
X.shape (700, 1)
method = min_degree, AUC = 0.695278
X.shape (700, 1)
method = max_degree, AUC = 0.783313
X.shape (700, 1)
method = diameter, AUC = 0.837335
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.612645
X.shape (700, 1)
method = num_cliques, AUC = 0.785714
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.831132
X.shape (700, 1)
method = edeg_density, AUC = 0.875750
X.shape (700, 25)
method = MACC k0=5, AUC = 0.925970
X.shape (700, 100)
method = MACC k0=10, AUC = 0.971989
X.shape (700, 400)
method = MACC k0=20, AUC = 0.969188
X.shape (700, 900)
method = MACC k0=30, AUC = 0.966787
Classifying subgraphs in Wisconsin87-Virginia63 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 866.33it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 726.92it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 837.24it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 121.42it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 153.03it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 164.05it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 143.24it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [22:44<00:00,  1.95s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.840736
X.shape (700, 1)
method = min_degree, AUC = 0.595838
X.shape (700, 1)
method = max_degree, AUC = 0.724690
X.shape (700, 1)
method = diameter, AUC = 0.759904
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.662665
X.shape (700, 1)
method = num_cliques, AUC = 0.690476
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.779912
X.shape (700, 1)
method = edeg_density, AUC = 0.840736
X.shape (700, 25)
method = MACC k0=5, AUC = 0.883954
X.shape (700, 100)
method = MACC k0=10, AUC = 0.919568
X.shape (700, 400)
method = MACC k0=20, AUC = 0.924370
X.shape (700, 900)
method = MACC k0=30, AUC = 0.912365
Classifying subgraphs in Wisconsin87-UCLA26 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 846.17it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 723.70it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 818.51it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 112.83it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 154.25it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 165.78it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 141.46it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [28:26<00:00,  2.44s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.809724
X.shape (700, 1)
method = min_degree, AUC = 0.599640
X.shape (700, 1)
method = max_degree, AUC = 0.745698
X.shape (700, 1)
method = diameter, AUC = 0.791317
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.641857
X.shape (700, 1)
method = num_cliques, AUC = 0.710484
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.774310
X.shape (700, 1)
method = edeg_density, AUC = 0.809724
X.shape (700, 25)
method = MACC k0=5, AUC = 0.857543
X.shape (700, 100)
method = MACC k0=10, AUC = 0.939176
X.shape (700, 400)
method = MACC k0=20, AUC = 0.940376
X.shape (700, 900)
method = MACC k0=30, AUC = 0.948780
Classifying subgraphs in NYU9-Virginia63 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 691.27it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 723.12it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 823.15it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 145.69it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 138.97it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 149.75it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 135.41it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [24:20<00:00,  2.09s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.830332
X.shape (700, 1)
method = min_degree, AUC = 0.522209
X.shape (700, 1)
method = max_degree, AUC = 0.743497
X.shape (700, 1)
method = diameter, AUC = 0.742897
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.573830
X.shape (700, 1)
method = num_cliques, AUC = 0.705682
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.729092
X.shape (700, 1)
method = edeg_density, AUC = 0.830332
X.shape (700, 25)
method = MACC k0=5, AUC = 0.909164
X.shape (700, 100)
method = MACC k0=10, AUC = 0.941176
X.shape (700, 400)
method = MACC k0=20, AUC = 0.962785
X.shape (700, 900)
method = MACC k0=30, AUC = 0.958784
Classifying subgraphs in Wisconsin87-UCLA26 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 633.26it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 698.88it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 835.76it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 119.63it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 131.14it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 142.58it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 116.56it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [23:29<00:00,  2.01s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.859744
X.shape (700, 1)
method = min_degree, AUC = 0.648659
X.shape (700, 1)
method = max_degree, AUC = 0.754902
X.shape (700, 1)
method = diameter, AUC = 0.899760
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.685474
X.shape (700, 1)
method = num_cliques, AUC = 0.755102
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.770308
X.shape (700, 1)
method = edeg_density, AUC = 0.859744
X.shape (700, 25)
method = MACC k0=5, AUC = 0.872349
X.shape (700, 100)
method = MACC k0=10, AUC = 0.927571
X.shape (700, 400)
method = MACC k0=20, AUC = 0.916367
X.shape (700, 900)
method = MACC k0=30, AUC = 0.923970
Classifying subgraphs in Virginia63-UCLA26 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 791.77it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 777.29it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 890.75it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 108.58it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 141.23it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 154.87it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 131.49it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [23:03<00:00,  1.98s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.880152
X.shape (700, 1)
method = min_degree, AUC = 0.666066
X.shape (700, 1)
method = max_degree, AUC = 0.733093
X.shape (700, 1)
method = diameter, AUC = 0.835934
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.669468
X.shape (700, 1)
method = num_cliques, AUC = 0.749700
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.779512
X.shape (700, 1)
method = edeg_density, AUC = 0.880152
X.shape (700, 25)
method = MACC k0=5, AUC = 0.963585
X.shape (700, 100)
method = MACC k0=10, AUC = 0.969588
X.shape (700, 400)
method = MACC k0=20, AUC = 0.978792
X.shape (700, 900)
method = MACC k0=30, AUC = 0.975990
Classifying subgraphs in UCLA26-Wisconsin87 ...
sampling subgraphs with k=30 nodes


100%|████████████████████████████████████████| 100/100 [00:00<00:00, 736.31it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 719.15it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 816.83it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 148.55it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 147.22it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 124.15it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 135.38it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 700/700 [22:52<00:00,  1.96s/it]


X.shape (700, 1)
method = num_nodes, AUC = 0.500000
X.shape (700, 1)
method = num_edges, AUC = 0.818327
X.shape (700, 1)
method = min_degree, AUC = 0.560024
X.shape (700, 1)
method = max_degree, AUC = 0.710884
X.shape (700, 1)
method = diameter, AUC = 0.819728
X.shape (700, 1)
method = degree_assortativity_coef, AUC = 0.697479
X.shape (700, 1)
method = num_cliques, AUC = 0.725490
X.shape (700, 1)
method = Avg_clustering_coeff, AUC = 0.744298
X.shape (700, 1)
method = edeg_density, AUC = 0.818327
X.shape (700, 25)
method = MACC k0=5, AUC = 0.874350
X.shape (700, 100)
method = MACC k0=10, AUC = 0.896359
X.shape (700, 400)
method = MACC k0=20, AUC = 0.913966
X.shape (700, 900)
method = MACC k0=30, AUC = 0.921569


In [67]:
path = "Output_files/subgraph_classification_data30.npy"
a = np.load(path, allow_pickle=True).item()

In [70]:
a.keys()

dict_keys(['Wisconsin87-Simmons81', 'Wisconsin87-Reed98', 'Wisconsin87-NYU9', 'Wisconsin87-Virginia63', 'Wisconsin87-UCLA26', 'Wisconsin87-Wisconsin87'])

In [72]:
results_all.keys()

dict_keys(['Wisconsin87-Simmons81', 'Wisconsin87-Reed98', 'Wisconsin87-NYU9', 'Wisconsin87-Virginia63', 'Wisconsin87-UCLA26', 'Wisconsin87-Wisconsin87'])

In [32]:
# Subgraph sampling for subgraph classification
# Output = subgraph_list as NNetwork objects

ntwk_list = ['Wisconsin87', 'UCLA26'] # Wisconsin87, UCLA26, Caltech36
label_list = []
subgraph_list = []
num_subgraphs = 100
k_list = [110]
k0_list = [30,40,50,60,70]

#X_MACC = []
#X_edge_density = []
#X_adj = []

nn_network_list = []
for ntwk in ntwk_list:
    ntwk_nonumber = ''.join([i for i in ntwk if not i.isdigit()])
    path = "Data/Facebook/" + str(ntwk) + '.txt'
    G = nn.NNetwork()
    G.load_add_edges(path, increment_weights=False, use_genfromtxt=True)
    nn_network_list.append(G)
    print('num nodes in G', len(G.nodes()))
    print('num edges in G', len(G.get_edges()))

y = [0]*num_subgraphs + [1]*num_subgraphs

subgraph_list_dict = {}
output_dict_list_total = {}

for k in k_list:
    print("sampling subgraphs with k={} nodes".format(k)
    subgraph_list = []
    for a in np.arange(len(nn_network_list)):
        G = nn_network_list[a]
        
        for i in trange(num_subgraphs):
            label_list.append(str(ntwk))

            # take the induced subgraph 
            X, embs = G.get_patches(k=k, sample_size=10, skip_folded_hom=False, sampling_alg = 'pivot')
            H = G.subgraph(embs[-1]) # take the last instance of MCMC sampling
            #A_adj = H.get_adjacency_matrix()
            subgraph_list.append(H)
            #subgraph_list.append(A_adj)

            """
            ## compute summary stats of subgraphs 

            A_adj = H.get_adjacency_matrix()
            X_adj.append(A_adj)

            X0, embs0 = H.get_patches(k=k0, sample_size=1000, skip_folded_hom=False, sampling_alg = 'pivot')
            MACC = np.sum(X0, axis=1)/X0.shape[1]
            MACC = MACC.reshape(k0,k0)
            X_MACC.append(MACC)

            X_edge_density.append(np.linalg.norm(A_adj, 1)/len(H.nodes()))
            """

    subgraph_list_dict.update({"k={}".format(k) : subgraph_list})

    print("extracting subgraph features..")
    output_dict_list = datagen_graph_features(X=subgraph_list, k0_list = k0_list)
    results_dict_total = run_binary_classification(output_dict_list, y=y, scale=0.3)
    
    output_dict_list_total.update({"k={}".format(k): results_dict_total})

num nodes in G 23842
num edges in G 1671904
num nodes in G 20467
num edges in G 1495226
sampling subgraphs with k=110 nodes


100%|█████████████████████████████████████████| 100/100 [00:20<00:00,  4.84it/s]
100%|█████████████████████████████████████████| 100/100 [00:19<00:00,  5.20it/s]


extracting subgraph features..


  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)
100%|█████████████████████████████████████████| 200/200 [45:41<00:00, 13.71s/it]

X.shape (200, 1)
method = num_nodes, AUC = 0.467587
X.shape (200, 1)
method = num_edges, AUC = 0.462185
X.shape (200, 1)
method = min_degree, AUC = 0.513405
X.shape (200, 1)
method = max_degree, AUC = 0.665866
X.shape (200, 1)
method = diameter, AUC = 0.518607
X.shape (200, 1)
method = degree_assortativity_coef, AUC = 0.617047
X.shape (200, 1)
method = num_cliques, AUC = 0.486595
X.shape (200, 1)
method = Avg_clustering_coeff, AUC = 0.656663
X.shape (200, 1)
method = edeg_density, AUC = 0.522209
X.shape (200, 900)
method = MACC k0=30, AUC = 0.745898
X.shape (200, 1600)
method = MACC k0=40, AUC = 0.787915
X.shape (200, 2500)
method = MACC k0=50, AUC = 0.749500
X.shape (200, 3600)
method = MACC k0=60, AUC = 0.757903
X.shape (200, 4900)
method = MACC k0=70, AUC = 0.793517





In [34]:
path = "Output_files/subgraph_classification_ex1"
np.save(path, output_dict_list_total)

In [25]:
# Subgraph sampling for subgraph classification
# Output = subgraph_list as NNetwork objects

ntwk_list = ['UCLA26', 'true_edgelist_for_BA_5000_m_25'] # Wisconsin87, UCLA26, Caltech36
label_list = []
subgraph_list = []
num_subgraphs = 100
k_list = [50]
k0_list = [15,20,30]

#X_MACC = []
#X_edge_density = []
#X_adj = []

nn_network_list = []
for ntwk in ntwk_list:
    ntwk_nonumber = ''.join([i for i in ntwk if not i.isdigit()])
    path = "Data/Facebook/" + str(ntwk) + '.txt'
    G = nn.NNetwork()
    G.load_add_edges(path, increment_weights=False, use_genfromtxt=True)
    nn_network_list.append(G)
    print('num nodes in G', len(G.nodes()))
    print('num edges in G', len(G.get_edges()))

y = [0]*num_subgraphs + [1]*num_subgraphs

subgraph_list_dict = {}
output_dict_list_total = {}

for k in k_list:
    print("sampling subgraphs with k={} nodes".format(k))
    subgraph_list = []
    for a in np.arange(len(nn_network_list)):
        G = nn_network_list[a]
        
        for i in trange(num_subgraphs):
            label_list.append(str(ntwk))

            # take the induced subgraph 
            X, embs = G.get_patches(k=k, sample_size=10, skip_folded_hom=False, sampling_alg = 'pivot')
            H = G.subgraph(embs[-1]) # take the last instance of MCMC sampling
            #A_adj = H.get_adjacency_matrix()
            subgraph_list.append(H)
            #subgraph_list.append(A_adj)

            """
            ## compute summary stats of subgraphs 

            A_adj = H.get_adjacency_matrix()
            X_adj.append(A_adj)

            X0, embs0 = H.get_patches(k=k0, sample_size=1000, skip_folded_hom=False, sampling_alg = 'pivot')
            MACC = np.sum(X0, axis=1)/X0.shape[1]
            MACC = MACC.reshape(k0,k0)
            X_MACC.append(MACC)

            X_edge_density.append(np.linalg.norm(A_adj, 1)/len(H.nodes()))
            """

    subgraph_list_dict.update({"k={}".format(k) : subgraph_list})

    print("extracting subgraph features..")
    output_dict_list = datagen_graph_features(X=subgraph_list, k0_list = k0_list)
    results_dict_total = run_binary_classification(output_dict_list, y=y, scale=0.3)
    
    output_dict_list_total.update({"k={}".format(k): results_dict_total})

num nodes in G 20467
num edges in G 1495226
num nodes in G 5000
num edges in G 248750
sampling subgraphs with k=50 nodes


100%|█████████████████████████████████████████| 100/100 [00:07<00:00, 13.79it/s]
100%|█████████████████████████████████████████| 100/100 [00:04<00:00, 23.82it/s]


extracting subgraph features..


100%|█████████████████████████████████████████| 200/200 [06:31<00:00,  1.96s/it]

X.shape (200, 1)
method = num_nodes, AUC = 0.602241
X.shape (200, 1)
method = num_edges, AUC = 0.697679
X.shape (200, 1)
method = min_degree, AUC = 0.554222
X.shape (200, 1)
method = max_degree, AUC = 0.846939
X.shape (200, 1)
method = diameter, AUC = 0.925570
X.shape (200, 1)
method = degree_assortativity_coef, AUC = 0.769908
X.shape (200, 1)
method = num_cliques, AUC = 0.736695
X.shape (200, 1)
method = Avg_clustering_coeff, AUC = 0.992397
X.shape (200, 1)
method = edeg_density, AUC = 0.726291
X.shape (200, 225)
method = MACC k0=15, AUC = 0.989596
X.shape (200, 400)
method = MACC k0=20, AUC = 0.993998
X.shape (200, 900)
method = MACC k0=30, AUC = 0.996799





In [None]:
results_dict_total = run_binary_classification(output_dict_list, scale=1)

In [None]:
# Using full adjacency matrix 
X = np.asarray(X_adj).reshape(-1,k0**2)
print("X.shape", X.shape)
X_train = X[X_train_idx,:]
X_test = X[X_test_idx,:]

clf = LogisticRegression(random_state=0).fit(X_train, y_train)
y_pred = clf.predict(X_test)
P_pred = clf.predict_proba(X_test)
#print("P_pred", P_pred)

compute_accuracy_metrics(y_test, P_pred[:,1], use_opt_threshold=False, verbose=False)

In [None]:
def display_graphs(title,
                     save_path,
                     grid_shape=[2,3],
                     fig_size=[10,10],
                     data = None, # [X, embs]
                     show_importance=False):

        # columns of X = vectorized k x k adjacency matrices
        # corresponding list in embs = sequence of nodes (may overalp)
        X, embs = data
        print('X.shape', X.shape)

        rows = grid_shape[0]
        cols = grid_shape[1]

        fig = plt.figure(figsize=fig_size, constrained_layout=False)
        # make outer gridspec

        idx = np.arange(X.shape[1])
        outer_grid = gridspec.GridSpec(nrows=rows, ncols=cols, wspace=0.02, hspace=0.05)

        # make nested gridspecs
        for i in range(rows * cols):
            a = i // cols
            b = i % rows

            Ndict_wspace = 0.05
            Ndict_hspace = 0.05

            # display graphs
            inner_grid = outer_grid[i].subgridspec(1, 1, wspace=Ndict_wspace, hspace=Ndict_hspace)

            # get rid of duplicate nodes
            A = X[:,idx[i]]
            A = X[:,idx[i]].reshape(int(np.sqrt(X.shape[0])), -1)
            H = nn.NNetwork()
            H.read_adj(A, embs[idx[i]])
            A_sub = H.get_adjacency_matrix()

            # read in as a nx graph for plotting
            G1 = nx.from_numpy_matrix(A_sub)
            ax = fig.add_subplot(inner_grid[0, 0])
            pos = nx.spring_layout(G1)
            edges = G1.edges()
            weights = [1*G1[u][v]['weight'] for u,v in edges]
            nx.draw(G1, with_labels=False, node_size=20, ax=ax, width=weights, label='Graph')

            ax.set_xticks([])
            ax.set_yticks([])

        plt.suptitle(title, fontsize=15)
        fig.subplots_adjust(left=0.1, bottom=0.1, right=0.9, top=0.9, wspace=0.2, hspace=0)
        fig.savefig(save_path, bbox_inches='tight')

In [None]:
def display_dict_and_graph(title,
                           W, 
                           singular_values, 
                         save_path,
                         grid_shape=None,
                         fig_size=[10,10],
                         show_importance=False):
        
        n_components = W.shape[1]
        k = int(np.sqrt(W.shape[0]))

        rows = np.round(np.sqrt(n_components))
        rows = rows.astype(int)
        if grid_shape is not None:
            rows = grid_shape[0]
            cols = grid_shape[1]
        else:
            if rows ** 2 == n_components:
                cols = rows
            else:
                cols = rows + 1

        if show_importance:
            # importance = np.sum(self.code, axis=1) / sum(sum(self.code))
            idx = np.argsort(singular_values)
            idx = np.flip(idx)
        else:
            idx = np.arange(W.shape[1])

        Ndict_wspace = 0.05
        Ndict_hspace = 0.05

        fig = plt.figure(figsize=fig_size, constrained_layout=False)
        outer_grid = gridspec.GridSpec(nrows=1, ncols=2, wspace=0.02, hspace=0.05)
        for t in np.arange(2):
            # make nested gridspecs

            if t == 0:
                ### Make gridspec
                inner_grid = outer_grid[t].subgridspec(rows, cols, wspace=Ndict_wspace, hspace=Ndict_hspace)
                #gs1 = fig.add_gridspec(nrows=rows, ncols=cols, wspace=0.05, hspace=0.05)

                for i in range(rows * cols):
                    a = i // cols
                    b = i % cols
                    ax = fig.add_subplot(inner_grid[a, b])
                    ax.imshow(W.T[idx[i]].reshape(k, k), cmap="viridis", interpolation='nearest')
                    # ax.set_xlabel('%1.2f' % importance[idx[i]], fontsize=13)  # get the largest first
                    # ax.xaxis.set_label_coords(0.5, -0.05)  # adjust location of importance appearing beneath patches
                    ax.set_xticks([])
                    ax.set_yticks([])
            if t == 1:
                inner_grid = outer_grid[t].subgridspec(rows, cols, wspace=Ndict_wspace, hspace=Ndict_hspace)
                #gs1 = fig.add_gridspec(nrows=rows, ncols=cols, wspace=0.05, hspace=0.05)

                for i in range(rows * cols):
                    a = i // cols
                    b = i % cols

                    G1 = nx.from_numpy_matrix(W[:,idx[i]].reshape(int(np.sqrt(W.shape[0])),-1))
                    ax = fig.add_subplot(inner_grid[a, b])
                    pos = nx.spring_layout(G1)
                    edges = G1.edges()
                    weights = [5*G1[u][v]['weight'] for u,v in edges]
                    nx.draw(G1, with_labels=False, node_size=10, ax=ax, width=weights, label='Graph')
                    if show_importance:
                        ax.set_xlabel('%1.2f' % importance[idx[i]], fontsize=13)  # get the largest first
                        ax.xaxis.set_label_coords(0.5, -0.05)  # adjust location of importance appearing beneath patches

                    ax.set_xticks([])
                    ax.set_yticks([])

        plt.suptitle(title, fontsize=25)
        fig.subplots_adjust(left=0.1, bottom=0.1, right=0.9, top=0.9, wspace=0.2, hspace=0)
        fig.savefig(save_path, bbox_inches='tight')

In [None]:
sampling_alg = 'pivot'

ntwk = 'Caltech36' # COVID_PPI, Wisconsin87, UCLA26
ntwk_nonumber = ''.join([i for i in ntwk if not i.isdigit()])
save_folder = 'temp/'
k=10

path = "Data/Networks_all_NDL/" + str(ntwk) + '.txt'
G = nn.NNetwork()
G.load_add_edges(path, increment_weights=False, use_genfromtxt=True)
print('num nodes in G', len(G.nodes()))
print('num edges in G', len(G.get_edges()))

#mx0 = G.get_adjacency_matrix(ordered_node_list=G.nodes())
#plt.imshow(mx0)

In [None]:
X, embs = G.get_patches(k=k, sample_size=1000, skip_folded_hom=True)

display_graphs(title='induced subgraphs on {}-walks in {}'.format(k, ntwk_nonumber),
                 save_path=save_folder + ntwk_nonumber + "_subgraphs_"+ str(sampling_alg) + "_walk", 
                 data = [X, embs],
                 grid_shape = [5, 15],
                 fig_size = [15, 5],
                 show_importance=False)

In [None]:
np.save("MC_data_matrix", X)

In [None]:
plt.imshow(X[:,4].reshape(k,k))

In [None]:
from sklearn.decomposition import PCA ### Use truncated SVD / online PCA later for better computational efficiency 
pca = PCA(n_components=25)
pca.fit(X)

In [None]:
Y = pca.fit_transform(X)
singular_values = pca.singular_values_

In [None]:
display_dict_and_graph(title='{}-node induced subgraphs in {} (sampling : {})'.format(k, ntwk_nonumber, sampling_alg),
                 save_path=save_folder + ntwk_nonumber + "_subgraphs_"+ str(sampling_alg), 
                 W = Y,
                 singular_values = singular_values,
                 grid_shape = [5, 5],
                 fig_size = [15, 10],
                 show_importance=False)