In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

from utils import load_data, process_graph_data
from utils import package_mxl, adj_rw_norm
from utils import sparse_mx_to_torch_sparse_tensor
from utils import ResultRecorder

from model import GCN, GCNBias, SGC, ResGCN, GCNII, APPNP, MLP
from load_semigcn_data import load_data_gcn
from data_loader import DataLoader

import numpy as np
import copy 
import time
from sklearn.metrics import f1_score
from sklearn.metrics import pairwise_distances
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from tqdm import trange
from scipy.sparse.csgraph import connected_components
import pickle
import os


In [3]:
import sys; sys.argv=['']; del sys
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=UserWarning)

import argparse
"""
Dataset arguments
"""
parser = argparse.ArgumentParser(
    description='Training GCN on Large-scale Graph Datasets')
parser.add_argument('--dataset', type=str, default='cora',
                    help='Dataset name: pubmed/flickr/reddit/ppi-large')
parser.add_argument('--method', type=str, default='GCN/ResGCN/GCNII', # SGC/GCN/GCNBias/ResGCN/GCNII/APPNP
                    help='Algorithms: seperate using slash')
parser.add_argument('--nhid', type=int, default=64,
                    help='Hidden state dimension')
parser.add_argument('--epoch_num', type=int, default=300,
                    help='Number of Epoch')
parser.add_argument('--batch_size', type=int, default=20480000000,
                    help='size of output node in a batch')
parser.add_argument('--n_layers', type=int, default=2,
                    help='Number of GCN layers')
parser.add_argument('--dropout', type=float, default=0,
                    help='Dropout rate')
parser.add_argument('--cuda', type=int, default=0,
                    help='Avaiable GPU ID')
args = parser.parse_args()
print(args)

method = args.method.split('/')

"""
Prepare devices
"""
if args.cuda != -1:
    device = torch.device("cuda:" + str(args.cuda))
else:
    device = torch.device("cpu")
    
if args.dataset not in ['cora', 'citeseer', 'pubmed']:
    temp_data = load_data(args.dataset) 
else:
    temp_data = load_data_gcn(args.dataset)

adj_full, adj_train, feat_data, labels, role = process_graph_data(*temp_data)

train_nodes = np.array(role['tr'])
valid_nodes = np.array(role['va'])
test_nodes = np.array(role['te'])

data_loader = DataLoader(adj_full, train_nodes, valid_nodes, test_nodes, device)

Namespace(batch_size=20480000000, cuda=0, dataset='cora', dropout=0, epoch_num=300, method='GCN/ResGCN/GCNII', n_layers=2, nhid=64)


In [4]:
# def weight_constaint(model):
#     for p in model.parameters():
#         if p.data.norm(2) > 20:
#             p.data = p.data / p.data.norm(2)

In [5]:
"""
Setup datasets and models for training (multi-class use sigmoid+binary_cross_entropy, use softmax+nll_loss otherwise)
"""

if args.dataset in ['flickr', 'reddit', 'cora', 'citeseer', 'pubmed']:
    feat_data_th = torch.FloatTensor(feat_data)
    labels_th = torch.LongTensor(labels.argmax(1))
    num_classes = labels_th.max().item()+1
    criterion = nn.CrossEntropyLoss()
    multi_class=False
elif args.dataset in ['ppi', 'ppi-large', 'amazon', 'yelp']:
    feat_data_th = torch.FloatTensor(feat_data)
    labels_th = torch.FloatTensor(labels)
    num_classes = labels_th.shape[1]
    criterion = nn.BCEWithLogitsLoss()
    multi_class=True

feat_data_th = feat_data_th.to(device)
labels_th = labels_th.to(device)

def sgd_step(net, optimizer, feat_data, labels, train_data, device):
    """
    Function to updated weights with a SGD backpropagation
    args : net, optimizer, train_loader, test_loader, loss function, number of inner epochs, args
    return : train_loss, test_loss, grad_norm_lb
    """
    net.train()
    epoch_loss = []
    epoch_acc = []
    
    # Run over the train_loader
    mini_batches, adj = train_data
    for mini_batch in mini_batches:

        # compute current stochastic gradient
        optimizer.zero_grad()
        output = net(feat_data, adj)
        
        loss = net.criterion(output[mini_batch], labels[mini_batch])
        loss.backward()
        
        optimizer.step()
        epoch_loss.append(loss.item())
        
        if multi_class:
            output[output > 0.5] = 1
            output[output <= 0.5] = 0
        else:
            output = output.argmax(dim=1)

        acc = f1_score(output[mini_batch].detach().cpu(), 
                       labels[mini_batch].detach().cpu(), average="micro")
        epoch_acc.append(acc)

    return epoch_loss, epoch_acc

@torch.no_grad()
def inference(eval_model, feat_data, labels, test_data, device):
    eval_model = eval_model.to(device)
    mini_batch, adj = test_data    
    output = eval_model(feat_data, adj)
    loss = eval_model.criterion(output[mini_batch], labels[mini_batch]).item()
    
    if multi_class:
        output[output > 0.5] = 1
        output[output <= 0.5] = 0
    else:
        output = output.argmax(dim=1)
        
    acc = f1_score(output[mini_batch].detach().cpu(), 
                   labels[mini_batch].detach().cpu(), average="micro")
    return loss, acc

In [6]:
from collections import OrderedDict
def copy_model(model):
    state_dict = {k:v.to('cpu') for k, v in model.named_parameters()}
    state_dict = OrderedDict(state_dict)
    return state_dict
def copy_grad(model):
    state_dict = {k:v.grad.to('cpu') for k, v in model.named_parameters()}
    state_dict = OrderedDict(state_dict)
    return state_dict

In [7]:
"""
Train without sampling
"""

def train_model(model, data_loader, note):
    train_model = copy.deepcopy(model).to(device)
    
    results = ResultRecorder(note=note)
    
    optimizer = optim.Adam(train_model.parameters())

    tbar = trange(args.epoch_num, desc='Training Epochs')
    for epoch in tbar:
        # fetch train data 
        
        sample_time_st = time.perf_counter()
        train_data = data_loader.get_mini_batches(batch_size=args.batch_size)
        sample_time = time.perf_counter() - sample_time_st
        
        compute_time_st = time.perf_counter()
        train_loss, train_acc = sgd_step(train_model, optimizer, feat_data_th, labels_th, train_data, device)
        compute_time = time.perf_counter() - compute_time_st
        results.grad_norms += [copy_grad(train_model)]
        
        epoch_train_loss = np.mean(train_loss)
        epoch_train_acc = np.mean(train_acc)

        valid_data = data_loader.get_valid_batch()
        epoch_valid_loss, epoch_valid_acc = inference(train_model, feat_data_th, labels_th, valid_data, device)
        tbar.set_postfix(loss=epoch_train_loss,
                         val_loss=epoch_valid_loss,
                         val_score=epoch_valid_acc)

        results.update(epoch_train_loss, 
                       epoch_train_acc,
                       epoch_valid_loss, 
                       epoch_valid_acc, 
                       train_model, sample_time=sample_time, compute_time=compute_time)
        
        results.state_dicts += [copy_model(train_model)]


#     test_data = data_loader.get_test_batch()
#     epoch_test_loss, epoch_test_acc = inference(results.best_model, feat_data_th, labels_th, test_data, device)
#     results.test_loss = epoch_test_loss
#     results.test_acc = epoch_test_acc
    
#     print('Test_loss: %.4f | test_acc: %.4f' % (epoch_test_loss, epoch_test_acc))
    
    print('Average sampling time %.5fs, average computing time %.5fs'%
          (np.mean(results.sample_time), np.mean(results.compute_time)))
    
    return results

In [8]:
import math
class GCN(nn.Module):
    def __init__(self, n_feat, n_hid, n_classes, n_layers, dropout, criterion):
        from layers import GraphConv
        super(GCN, self).__init__()
        self.n_layers = n_layers
        self.n_hid = n_hid
        
        self.gcs = nn.ModuleList()
        for _ in range(n_layers):
            self.gcs.append(GraphConv(n_hid,  n_hid))
        self.linear_in = nn.Linear(n_feat, n_hid)
        self.linear_out = nn.Linear(n_hid, n_classes)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        self.criterion = criterion

    def forward(self, x, adj):
        x = self.linear_in(x)
        for ell in range(len(self.gcs)):
            x = self.gcs[ell](x, adj)
            x = self.relu(x)
            x = self.dropout(x)
        x = self.linear_out(x)
        return x
    
class ResGCN(nn.Module):
    def __init__(self, n_feat, n_hid, n_classes, n_layers, dropout, criterion):
        from layers import GraphConv
        super(ResGCN, self).__init__()
        self.n_layers = n_layers
        self.n_hid = n_hid
        
        self.gcs = nn.ModuleList()
        for _ in range(n_layers):
            self.gcs.append(GraphConv(n_hid,  n_hid))
        self.linear_in = nn.Linear(n_feat, n_hid)
        self.linear_out = nn.Linear(n_hid, n_classes)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        self.criterion = criterion

    def forward(self, x, adj):
        x = self.linear_in(x)
        for ell in range(len(self.gcs)):
            x_res = x.clone()
            x = self.gcs[ell](x, adj)
            x = self.relu(x)
            x = self.dropout(x) + x_res
        x = self.linear_out(x)
        return x
    
class GCNII(nn.Module):
    def __init__(self, n_feat, n_hid, n_classes, n_layers, dropout, criterion):
        from layers import GCNIILayer
        super(GCNII, self).__init__()
        self.n_layers = n_layers
        self.n_hid = n_hid
        
        self.gcs = nn.ModuleList()
        for _ in range(n_layers):
            self.gcs.append(GCNIILayer(n_hid,  n_hid))
        self.linear_in = nn.Linear(n_feat, n_hid)
        self.linear_out = nn.Linear(n_hid, n_classes)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        self.criterion = criterion

    def get_alpha_beta(self, ell):
        alpha = 0.9
        beta = math.log(0.5/(ell+1)+1)
        return alpha, beta
    
    def forward(self, x, adj):
        x = self.linear_in(x)
        x_0 = x.clone()
        for ell in range(len(self.gcs)):
            alpha, beta = self.get_alpha_beta(ell)
            x = self.gcs[ell](x, adj, x_0, alpha, beta)
            x = self.relu(x)
            x = self.dropout(x)
        x = self.linear_out(x)
        return x

In [10]:
model = GCN(n_feat=feat_data.shape[1], 
                n_hid=args.nhid, 
                n_classes=num_classes, 
                n_layers=args.n_layers, 
                dropout=args.dropout, 
                criterion=criterion)

# def get_weight_sigval(model):
#     weight_sigval= dict()
#     for n, p in model.named_parameters():
#         if 'weight' in n and 'gcs' in n:
#             U, S, V = torch.svd(p.data, compute_uv=False)
#             weight_sigval[n] = S.max().item()
#     return weight_sigval
# get_weight_sigval(model)

def get_sigval(model):
    for n, p in model.named_parameters():
        if 'weight' in n:
            _, val, _ = torch.svd(p.data)
            print(n, val.mean(), val.max(), val.min())
get_sigval(model)

gcs.0.linear.weight tensor(0.4865) tensor(1.0987) tensor(0.0082)
gcs.1.linear.weight tensor(0.4945) tensor(1.1197) tensor(0.0093)
linear_in.weight tensor(0.5737) tensor(0.6902) tensor(0.4574)
linear_out.weight tensor(0.5544) tensor(0.7141) tensor(0.3792)


In [None]:

results_list = []
for repeat in range(5):
    if 'GCN' in method:
        model = GCN(n_feat=feat_data.shape[1], 
                    n_hid=args.nhid, 
                    n_classes=num_classes, 
                    n_layers=args.n_layers, 
                    dropout=args.dropout, 
                    criterion=criterion)

        results = train_model(model, data_loader, note="GCN (L=%d, repeat=%d)"%(args.n_layers, repeat))
        results_list.append(results)

    if 'ResGCN' in method:
        model = ResGCN(n_feat=feat_data.shape[1], 
                    n_hid=args.nhid, 
                    n_classes=num_classes, 
                    n_layers=args.n_layers, 
                    dropout=args.dropout, 
                    criterion=criterion)

        results = train_model(model, data_loader, note="ResGCN (L=%d, repeat=%d)"%(args.n_layers, repeat))
        results_list.append(results)

    if 'GCNII' in method:
        model = GCNII(n_feat=feat_data.shape[1], 
                    n_hid=args.nhid, 
                    n_classes=num_classes, 
                    n_layers=args.n_layers, 
                    dropout=args.dropout, 
                    criterion=criterion)

        results = train_model(model, data_loader, note="GCNII (L=%d, repeat=%d)"%(args.n_layers, repeat))
        results_list.append(results)

In [None]:
fig, axs = plt.subplots()

for algorithm in ['GCN', 'ResGCN', 'GCNII']:
    color = next(axs._get_lines.prop_cycler)['color']
    
    train_acc = []
    y_vals = []
    for result in results_list:
        if result.note.split()[0] == algorithm:
            train_acc.append(np.array(result.train_acc_record))
            train_loss = np.array(result.train_loss_record)
            valid_loss = np.array(result.loss_record)
            y_vals.append(np.abs(train_loss-valid_loss))

    train_acc = np.mean(train_acc, axis=0)
    x_stop = np.argmax(train_acc)

    y_val_mean = np.mean(y_vals, axis=0)[:x_stop]
    y_val_std = np.std(y_vals, axis=0)[:x_stop]
    x_vals = np.arange(len(y_val_mean))
    
    axs.plot(x_vals, y_val_mean, label='%s (L=%d)'%(algorithm, args.n_layers), color=color)
    axs.fill_between(x_vals, y_val_mean-y_val_std, y_val_mean+y_val_std ,alpha=0.3, color=color)
    
plt.title('Generalization gap / Iters')
axs.set_xlabel('Iters')
axs.set_ylabel('| Train Error - Valid Error |')

axs.grid(True)

fig.tight_layout()
plt.legend()
plt.savefig('generalization gap.pdf')

In [None]:
fig, axs = plt.subplots()

for algorithm in ['GCN', 'ResGCN', 'GCNII']:
    color = next(axs._get_lines.prop_cycler)['color']
    
    y_vals = []
    for result in results_list:
        if result.note.split()[0] == algorithm:
            y_vals.append(np.array(result.train_acc_record))
            
    y_val_mean = np.mean(y_vals, axis=0)
    y_val_std = np.std(y_vals, axis=0)
    x_vals = np.arange(len(y_val_mean))

    axs.plot(x_vals, y_val_mean, label='Train: %s (L=%d)'%(algorithm, args.n_layers), color=color)
    axs.fill_between(x_vals, y_val_mean-y_val_std, y_val_mean+y_val_std ,alpha=0.3, color=color)
    
    y_vals = []
    for result in results_list:
        if result.note.split()[0] == algorithm:
            y_vals.append(np.array(result.acc_record))
            
    y_val_mean = np.mean(y_vals, axis=0)
    y_val_std = np.std(y_vals, axis=0)
    x_vals = np.arange(len(y_val_mean))

    axs.plot(x_vals, y_val_mean, label='Valid: %s (L=%d)'%(algorithm, args.n_layers), color=color, linestyle='--')
    axs.fill_between(x_vals, y_val_mean-y_val_std, y_val_mean+y_val_std ,alpha=0.3, color=color, linestyle='--')
    
    
plt.title('F1-score / Iters')
axs.set_xlabel('Iters')
axs.set_ylabel('F1-score')

axs.grid(True)

fig.tight_layout()
plt.legend()
plt.savefig('gcn_resnet_gcnii_f1_score.pdf')