In [7]:
import torch
import torch.nn as nn
import torch.optim as optim

from utils import load_data, process_graph_data
from utils import package_mxl, adj_rw_norm
from utils import sparse_mx_to_torch_sparse_tensor
from utils import ResultRecorder

from model import GCN, GCNBias, SGC, ResGCN, GCNII, APPNP
from layers import GraphConv
from load_semigcn_data import load_data_gcn
from data_loader import DataLoader

from sklearn.metrics import f1_score
from sklearn.metrics import pairwise_distances
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from scipy.sparse.csgraph import connected_components

from tqdm import trange
import numpy as np
import copy 
import time
import pickle
import os

DATASET = 'citeseer'
SAVE_DIR = './exp_results/figures/%s/'%DATASET

layers = [i for i in range(2,11)]
repeats = [i for i in range(10)]   

In [8]:
for ALGORITHM in ['GCN', 'GCNBias', 'SGC', 'ResGCN', 'GCNII', 'APPNP']:
    dM_all = []
    dM_after_all = []
    dM_before_one_model = []
    dM_after_one_model = []
    
    inner_dist_all = []
    cross_dist_all = []
    
    inner_dist_after_all = []
    inner_dist_one_model = []
    cross_dist_after_all = []
    cross_dist_one_model = []

    sigW_all = []
    sigW_after_all = []

    test_acc_all = []

    for layer in layers:
        dM_current = list()
        sigW_current = list()
        dM_after_current = list()
        sigW_after_current = list()
        test_acc_current = list()
        inner_dist_current = list()
        cross_dist_current = list()
        inner_dist_after_current = list()
        cross_dist_after_current = list()

        for repeat in repeats:
            save_path = os.path.join('exp_results/%s/'%DATASET, 
                                 'results_%s_L%d_repteat%d.pkl'%(DATASET, layer, repeat))
            with open(save_path, 'rb') as f:
                results_list = pickle.load(f)

            for result in results_list:
                if result.note == '%s (L=%d)'%(ALGORITHM, layer):
                    dM_current.append(result.dM_before[-1]*result.dM_before[0])
            
                    sigW_ = list()
                    for k, v in result.w_sigval_before.items():
                        if 'gcs.0' in k:
                            continue
                        elif 'gcs' in k:
                            sigW_.append(v)
                    sigW_ = np.mean(sigW_) if len(sigW_) > 0 else 1
                    sigW_current.append(sigW_)

                    dM_after_current.append(result.dM_after[-1]*result.dM_after[0])
                    sigW_ = list()
                    for k, v in result.w_sigval_after.items():
                        if 'gcs' in k:
                            sigW_.append(v)
                    sigW_ = np.mean(sigW_) if len(sigW_) > 0 else 1
                    sigW_after_current.append(sigW_)

                    test_acc_current.append(result.test_acc)
                    inner_dist_current.append(result.inner_dist[-1])
                    cross_dist_current.append(result.cross_dist[-1])
                    inner_dist_after_current.append(result.inner_dist_after[-1])
                    cross_dist_after_current.append(result.cross_dist_after[-1])
                    
                    if layer == max(layers)-1:
                        inner_dist_one_model.append(result.inner_dist_after)
                        cross_dist_one_model.append(result.cross_dist_after)
                        dM_before_one_model.append(result.dM_before)
                        dM_after_one_model.append(result.dM_after)


        dM_all.append(dM_current)
        sigW_all.append(sigW_current)
        dM_after_all.append(dM_after_current)
        sigW_after_all.append(sigW_after_current)
        test_acc_all.append(test_acc_current)
        
        inner_dist_all.append(inner_dist_current)
        cross_dist_all.append(cross_dist_current)
        inner_dist_after_all.append(inner_dist_after_current)
        cross_dist_after_all.append(cross_dist_after_current)
        
    ###################
    test_acc_all = np.array(test_acc_all) 
    fig, axs = plt.subplots()

    y_vals = np.mean(test_acc_all, axis=1)
    y_stds = np.std(test_acc_all, axis=1)
    x_vals = np.arange(len(y_vals))+2
    axs.plot(x_vals, y_vals, label='Testing')
    axs.fill_between(x_vals, y_vals-y_stds, y_vals+y_stds ,alpha=0.3)

    plt.title('%s: Testing F1-score / Num of layers'%ALGORITHM)
    axs.set_xlabel('Num of layers')
    axs.set_ylabel('F1-score')
    axs.grid(True)
    plt.gca().xaxis.set_major_locator(mticker.MultipleLocator(1))
    fig.tight_layout()
    plt.legend()
    plt.savefig(SAVE_DIR+'testing_f1_score_%s_%s.pdf'%(DATASET, ALGORITHM))
    plt.close()
    ####################
    fig, axs = plt.subplots()
    
    y_vals = np.mean(dM_all, axis=1)
    y_stds = np.std(dM_all, axis=1)
    x_vals = np.arange(len(y_vals))+2
    axs.plot(x_vals, y_vals, label='Before training')
    axs.fill_between(x_vals, y_vals-y_stds, y_vals+y_stds ,alpha=0.3)

    plt.title('%s: $d_M(H^{(\ell)})$ / Num of layers'%ALGORITHM)
    axs.set_xlabel('Num of layers')
    axs.set_ylabel('$d_M(H^{(\ell)})$')
    axs.grid(True)
    plt.gca().xaxis.set_major_locator(mticker.MultipleLocator(1))
    fig.tight_layout()
    plt.legend()
    plt.savefig(SAVE_DIR+'dM_%s_%s_before.pdf'%(DATASET, ALGORITHM))
    plt.close()
    ####################
    fig, axs = plt.subplots()
    
    y_vals = np.mean(dM_after_all, axis=1)
    y_stds = np.std(dM_after_all, axis=1)
    x_vals = np.arange(len(y_vals))+2
    axs.plot(x_vals, y_vals, label='After training')
    axs.fill_between(x_vals, y_vals-y_stds, y_vals+y_stds ,alpha=0.3)

    plt.title('%s: $d_M(H^{(\ell)})$ / Num of layers'%ALGORITHM)
    axs.set_xlabel('Num of layers')
    axs.set_ylabel('$d_M(H^{(\ell)})$')
    axs.grid(True)
    plt.gca().xaxis.set_major_locator(mticker.MultipleLocator(1))
    fig.tight_layout()
    plt.legend()
    plt.savefig(SAVE_DIR+'dM_%s_%s_after.pdf'%(DATASET, ALGORITHM))
    plt.close()
    ####################
    fig, axs = plt.subplots()
    
    # dM_before_one_model = np.array(dM_before_one_model) 
    y_vals = np.mean(dM_before_one_model, axis=1)
    y_stds = np.std(dM_before_one_model, axis=1)
    x_vals = np.arange(len(y_vals))+2
    axs.plot(x_vals, y_vals, label='Before training')
    axs.fill_between(x_vals, y_vals-y_stds, y_vals+y_stds ,alpha=0.3)
    
    # dM_after_one_model = np.array(dM_after_one_model) 
    y_vals = np.mean(dM_after_one_model, axis=1)
    y_stds = np.std(dM_after_one_model, axis=1)
    x_vals = np.arange(len(y_vals))+2
    axs.plot(x_vals, y_vals, label='After training')
    axs.fill_between(x_vals, y_vals-y_stds, y_vals+y_stds ,alpha=0.3)

    plt.title('%s: $d_M(H^{(\ell)})$ / Num of layers'%ALGORITHM)
    axs.set_xlabel('Num of layers')
    axs.set_ylabel('$d_M(H^{(\ell)})$')
    axs.grid(True)
    plt.gca().xaxis.set_major_locator(mticker.MultipleLocator(1))
    fig.tight_layout()
    plt.legend()
    plt.savefig(SAVE_DIR+'dM_%s_%s_one_model.pdf'%(DATASET, ALGORITHM))
    plt.close()
    ####################
    fig, axs = plt.subplots()

    inner_dist_all = np.array(inner_dist_all) 
    y_vals = np.mean(inner_dist_all, axis=1)
    y_stds = np.std(inner_dist_all, axis=1)
    x_vals = np.arange(len(y_vals))+2
    axs.plot(x_vals, y_vals, label='Inner class distance')
    axs.fill_between(x_vals, y_vals-y_stds, y_vals+y_stds ,alpha=0.3)
    
    cross_dist_all = np.array(cross_dist_all) 
    y_vals = np.mean(cross_dist_all, axis=1)
    y_stds = np.std(cross_dist_all, axis=1)
    x_vals = np.arange(len(y_vals))+2
    axs.plot(x_vals, y_vals, label='Cross class distance')
    axs.fill_between(x_vals, y_vals-y_stds, y_vals+y_stds ,alpha=0.3)

    plt.title('%s: Pairwise Euclidean distance / Num of layers'%ALGORITHM)
    axs.set_xlabel('Num of layers')
    axs.set_ylabel('Pairwise Euclidean distance')
    axs.grid(True)
    plt.gca().xaxis.set_major_locator(mticker.MultipleLocator(1))
    fig.tight_layout()
    plt.legend()
    plt.savefig(SAVE_DIR+'pairwise_euclidean_distance_%s_%s.pdf'%(DATASET, ALGORITHM))
    plt.close()
    ####################
    fig, axs = plt.subplots()

    inner_dist_all = np.array(inner_dist_one_model) 
    y_vals = np.mean(inner_dist_all, axis=0)
    y_stds = np.std(inner_dist_all, axis=0)
    x_vals = np.arange(len(y_vals))+2
    axs.plot(x_vals, y_vals, label='Inner class distance')
    axs.fill_between(x_vals, y_vals-y_stds, y_vals+y_stds ,alpha=0.3)
    
    cross_dist_all = np.array(cross_dist_one_model) 
    y_vals = np.mean(cross_dist_all, axis=0)
    y_stds = np.std(cross_dist_all, axis=0)
    x_vals = np.arange(len(y_vals))+2
    axs.plot(x_vals, y_vals, label='Cross class distance')
    axs.fill_between(x_vals, y_vals-y_stds, y_vals+y_stds ,alpha=0.3)

    plt.title('%s: Pairwise Euclidean distance / Num of layers'%ALGORITHM)
    axs.set_xlabel('Num of layers')
    axs.set_ylabel('Pairwise Euclidean distance')
    axs.grid(True)
    plt.gca().xaxis.set_major_locator(mticker.MultipleLocator(1))
    fig.tight_layout()
    plt.legend()
    plt.savefig(SAVE_DIR+'pairwise_euclidean_distance_%s_%s_one_model.pdf'%(DATASET, ALGORITHM))
    plt.close()
    ####################
    fig, axs = plt.subplots()

    inner_dist_all = np.array(inner_dist_after_all) 
    y_vals = np.mean(inner_dist_all, axis=1)
    y_stds = np.std(inner_dist_all, axis=1)
    x_vals = np.arange(len(y_vals))+2
    axs.plot(x_vals, y_vals, label='Inner class distance (After)')
    axs.fill_between(x_vals, y_vals-y_stds, y_vals+y_stds ,alpha=0.3)
    
    cross_dist_all = np.array(cross_dist_after_all) 
    y_vals = np.mean(cross_dist_all, axis=1)
    y_stds = np.std(cross_dist_all, axis=1)
    x_vals = np.arange(len(y_vals))+2
    axs.plot(x_vals, y_vals, label='Cross class distance (After)')
    axs.fill_between(x_vals, y_vals-y_stds, y_vals+y_stds ,alpha=0.3)

    plt.title('%s: Pairwise Euclidean distance / Num of layers'%ALGORITHM)
    axs.set_xlabel('Num of layers')
    axs.set_ylabel('Pairwise Euclidean distance')
    axs.grid(True)
    plt.gca().xaxis.set_major_locator(mticker.MultipleLocator(1))
    fig.tight_layout()
    plt.legend()
    plt.savefig(SAVE_DIR+'pairwise_euclidean_distance_%s_%s_after.pdf'%(DATASET, ALGORITHM))
    plt.close()
    ####################
    sigW_all = np.array(sigW_all) 
    fig, axs = plt.subplots()

    y_vals = np.mean(sigW_all, axis=1)
    y_stds = np.std(sigW_all, axis=1)
    x_vals = np.arange(len(y_vals))+2
    axs.plot(x_vals, y_vals, label='Before training')
    axs.fill_between(x_vals, y_vals-y_stds, y_vals+y_stds ,alpha=0.3)
    
    y_vals = np.mean(sigW_after_all, axis=1)
    y_stds = np.std(sigW_after_all, axis=1)
    x_vals = np.arange(len(y_vals))+2
    axs.plot(x_vals, y_vals, label='After training')
    axs.fill_between(x_vals, y_vals-y_stds, y_vals+y_stds ,alpha=0.3)
    
    plt.title('%s: $\lambda_\max(W^{(\ell)})$ / Num of layers'%ALGORITHM)
    axs.set_xlabel('Num of layers')
    axs.set_ylabel('$\lambda_\max(W^{(\ell)})$')
    axs.grid(True)
    plt.gca().xaxis.set_major_locator(mticker.MultipleLocator(1))
    fig.tight_layout()
    plt.legend()
    plt.savefig(SAVE_DIR+'sigW_%s_%s.pdf'%(DATASET, ALGORITHM))
    plt.close()

In [9]:
for result in results_list:
    print(result.note)

SGC (L=10)
GCN (L=10)
GCNBias (L=10)
ResGCN (L=10)
APPNP (L=10)
GCNII (L=10)


In [10]:
result.w_sigval_before

{'gcs.0.linear.weight': 1.1526110172271729,
 'gcs.1.linear.weight': 1.158475637435913,
 'gcs.2.linear.weight': 1.1382489204406738,
 'gcs.3.linear.weight': 1.1306782960891724,
 'gcs.4.linear.weight': 1.1244324445724487,
 'gcs.5.linear.weight': 1.0891273021697998,
 'gcs.6.linear.weight': 1.1014307737350464,
 'gcs.7.linear.weight': 1.1215291023254395,
 'gcs.8.linear.weight': 1.0771151781082153,
 'gcs.9.linear.weight': 1.1258124113082886}

In [11]:
dM_after_one_model

[array([1.        , 0.55944998, 0.54341049, 0.53479422, 0.52992572,
        0.52668019, 0.52448317, 0.52292475, 0.521804  , 0.52098311]),
 array([1.        , 0.58980613, 0.57254562, 0.56311345, 0.55782612,
        0.5542882 , 0.55189836, 0.5502022 , 0.54898326, 0.54809056]),
 array([1.        , 0.49728911, 0.48381407, 0.4765519 , 0.47247124,
        0.46974386, 0.46789797, 0.46658612, 0.46564156, 0.46494855]),
 array([1.        , 0.53108526, 0.51623642, 0.50825655, 0.5037566 ,
        0.50075692, 0.49872614, 0.49728512, 0.49624817, 0.49548819]),
 array([1.        , 0.53704024, 0.5217603 , 0.51343281, 0.50878206,
        0.50567181, 0.50357297, 0.50208348, 0.50101318, 0.50022919]),
 array([1.        , 0.51569108, 0.50178458, 0.494364  , 0.49016051,
        0.48735425, 0.48544949, 0.48409525, 0.48311902, 0.4824024 ]),
 array([1.        , 0.58599296, 0.56928622, 0.56023669, 0.55513845,
        0.55173053, 0.54942501, 0.54778813, 0.54661097, 0.54574842]),
 array([1.        , 0.57526765, 0.

In [12]:
dM_before_one_model

[array([1.        , 0.04551556, 0.04085813, 0.03834421, 0.03716378,
        0.03642824, 0.03598481, 0.0356932 , 0.03550011, 0.03536795]),
 array([1.        , 0.04555687, 0.040893  , 0.03837948, 0.0372031 ,
        0.03647097, 0.03603011, 0.03574038, 0.03554861, 0.03541738]),
 array([1.        , 0.04554553, 0.04086448, 0.03834916, 0.03716548,
        0.03643025, 0.03598705, 0.0356963 , 0.03550403, 0.0353727 ]),
 array([1.        , 0.04500945, 0.04034941, 0.03782241, 0.03664486,
        0.0359102 , 0.03546898, 0.03517905, 0.03498753, 0.03485662]),
 array([1.        , 0.04528115, 0.04060515, 0.03807699, 0.0368981 ,
        0.03616394, 0.03572288, 0.03543332, 0.03524203, 0.03511134]),
 array([1.        , 0.04533132, 0.04065386, 0.03814687, 0.03696962,
        0.03623879, 0.03579876, 0.03551002, 0.0353191 , 0.03518862]),
 array([1.        , 0.04559877, 0.04095676, 0.03846057, 0.03728885,
        0.03656052, 0.0361214 , 0.03583296, 0.03564199, 0.03551136]),
 array([1.        , 0.04537312, 0.