In [15]:
from __future__ import division
from __future__ import print_function

import time
import numpy as np

import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim

from utils import load_data, test, train, accuracy
from models import GCN_2, GCN_3
from layers import GraphConvolution

In [16]:
adj, features, labels, idx_train, idx_val, idx_test = load_data(path="../data/cora/", dataset="cora")

Loading cora dataset...


In [40]:
hidden = 16
dropout = 0.5
lr = 0.01
weight_decay = 5e-4
num_epochs = 200
smooth_fac = 0.2

In [29]:
class ite_GCN(nn.Module):
    def __init__(self, nfeat, nclass, dropout, train_nite, eval_nite=0, allow_grad=True, smooth_fac=0):
        '''     
        - This model is a 1-layer GCN with nite iterations, followed by a linear layer and a log_softmax
            - GC layer:     nfeat to nfeat
            - linear layer: nfeat to nclass, (to cast hidden representations of nodes to a dimension of nclass)
        - Activation: ReLu
        - Input:
            - nfeat:        the number of features of each node
            - nclass:       the number of target classes (we are doing a node classification task here)
            - dropout:      dropout rate
            - train_nite:   the number of iterations during training
            - eval_nite:    the number of iterations during evaluation, 
                            if not specified (or invalid), intialize to the same as train_nite
            - allow_grad:   (bool) defaulted to True. 
                            whether or nor allow gradients to flow through all GC iterations, 
                            if False, gradients will only flow to the last iteration
            - smooth_fac:   a number in [0,1], smoothing factor, controls how much of the OLD iteration result is
                            counted in the skip connection in each iteration
                            for example, smooth_fac = x means y_{i+1} = x * y_i + (1-x) * y_{i+1}
                            Invalid inputs will be treated as 0.
        - Output:
            - A probability vector of length nclass, by log_softmax
        '''
        super(ite_GCN, self).__init__()

        self.gc = GraphConvolution(nfeat, nfeat)
        self.linear_no_bias = nn.Linear(nfeat, nclass, bias=False)
        self.dropout = dropout
        self.train_nite = train_nite
        self.allow_grad = allow_grad
        self.smooth_fac = smooth_fac
        self.eval_nite = eval_nite
        
        if (smooth_fac > 1) or (smooth_fac < 0):
            print("Invalid smoothing factor. Treat as 0.")
            self.smooth_fac = 0
        if (eval_nite <= 0):
            print("Unspecified or invalid number of iterations for inference. Treat as the same as training iterations.")
            self.eval_nite = self.train_nite
        
        print("Initialize a 1-layer GCN with ", self.train_nite, "iterations")
        print("Gradient flows to all iterations: ", allow_grad)

    def run_one_layer(self, x, adj):
        x_old = x
        x_new = self.gc(x, adj)
        x = F.relu(self.smooth_fac * x_old + (1 - self.smooth_fac) * x_new)
        x = F.dropout(x, self.dropout, training=self.training)
        return x

    def forward(self, x, adj):
        if self.training:
            for i in range(self.train_nite):
                if not self.allow_grad:
                    # print("no no no! new new")
                    x = x.detach()
                    x = self.run_one_layer(x, adj)
                    # x.requires_grad_()
                    # self.gc.weight.requires_grad_()
                    # self.gc.weight.retain_grad()
                    # print(self.gc.weight.requires_grad)
                    # for name, param in self.named_parameters():
                    #         print(name, param.grad)
                else:
                    # print("yea yea yea")
                    x = self.run_one_layer(x, adj)
                    # for name, param in self.named_parameters():
                    #         print(name, param.grad)
        else:
            for i in range(self.eval_nite):
                x = self.run_one_layer(x, adj)

        x = self.linear_no_bias(x)
        # self.gc.weight.requires_grad_()
        # print("???")
        # for name, param in self.named_parameters():
        #     if param.grad is not None:
        #         print(name, param.grad.abs().sum())
        return F.log_softmax(x, dim=1)
        

In [41]:
def run_experiment(num_epochs, model, lr, weight_decay, features, adj, idx_train, idx_val, idx_test, labels):
    print("runrunrun!")
    optimizer = optim.Adam(model.parameters(),
                       lr=lr, weight_decay=weight_decay)
    t_total = time.time()
    loss_TRAIN = []
    acc_TRAIN = []
    loss_VAL = []
    acc_VAL = []
    for epoch in range(num_epochs):
        t = time.time()
    
        model.train()
        optimizer.zero_grad()
        
        output = model(features, adj)
        
        loss_train = F.nll_loss(output[idx_train], labels[idx_train])
        loss_TRAIN.append(loss_train)
        acc_train = accuracy(output[idx_train], labels[idx_train])
        acc_TRAIN.append(acc_train)

        # t3 = time.time()
        loss_train.backward()
        # t4 = time.time()
        # print("backward: ", t4-t3)
        # print("before step: ", model.gc.weight)
        optimizer.step()
        # print("after step: ", model.gc.weight)

        
        # Evaluate validation set performance separately,
        # deactivates dropout during validation run.
        model.eval()
        # t1 = time.time()
        output = model(features, adj)
        # print("eval output: ", output)
        # t2 = time.time()
        # print("forward time: ", t2-t1)

        loss_val = F.nll_loss(output[idx_val], labels[idx_val])
        loss_VAL.append(loss_val)
        acc_val = accuracy(output[idx_val], labels[idx_val])
        acc_VAL.append(acc_val)
        print('Epoch: {:04d}'.format(epoch+1),
            'loss_train: {:.4f}'.format(loss_train.item()),
            'acc_train: {:.4f}'.format(acc_train.item()),
            'loss_val: {:.4f}'.format(loss_val.item()),
            'acc_val: {:.4f}'.format(acc_val.item()),
            'time: {:.4f}s'.format(time.time() - t))
        

    print("Optimization Finished!")
    print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

    # Testing
    test(model, features, adj, idx_test, labels)
    return loss_TRAIN, acc_TRAIN, loss_VAL, acc_VAL

In [30]:
model3 = ite_GCN(nfeat=features.shape[1],
            nclass=labels.max().item() + 1,
            dropout=dropout,
            train_nite = 3,
            allow_grad=True,
            smooth_fac=0.3)

Unspecified or invalid number of iterations for inference. Treat as the same as training iterations.
Initialize a 1-layer GCN with  3 iterations
Gradient flows to all iterations:  True


# totally messed up

In [31]:
loss_TRAIN, acc_TRAIN, loss_VAL, acc_VAL = run_experiment(num_epochs=200, model=model3, lr=lr, weight_decay=weight_decay, features=features, adj=adj, idx_train=idx_train, idx_val=idx_val, idx_test=idx_test, labels=labels)

runrunrun!
before step:  Parameter containing:
tensor([[-1.4470e-02, -1.2860e-02,  2.6237e-02,  ...,  1.6917e-02,
         -2.5723e-02,  1.6576e-02],
        [ 1.3976e-02, -7.6831e-03, -2.3939e-02,  ...,  2.5840e-02,
         -6.5183e-03, -1.8812e-02],
        [ 2.1743e-02,  1.9856e-02,  5.1084e-03,  ..., -1.7527e-02,
          1.6048e-02, -2.1865e-02],
        ...,
        [ 8.4447e-03,  2.2534e-02,  1.0558e-02,  ...,  1.5942e-02,
          2.5936e-02, -1.3098e-02],
        [ 7.3724e-03,  1.6492e-02, -2.6382e-02,  ..., -1.3280e-02,
         -2.0492e-03, -1.6572e-02],
        [ 9.4801e-05, -2.1704e-02,  2.3851e-03,  ..., -1.8516e-02,
         -1.3804e-02,  1.6619e-02]], requires_grad=True)
after step:  Parameter containing:
tensor([[-0.0045, -0.0029,  0.0162,  ...,  0.0069, -0.0157,  0.0266],
        [ 0.0040,  0.0023, -0.0139,  ...,  0.0158,  0.0034, -0.0088],
        [ 0.0317,  0.0099, -0.0049,  ..., -0.0075,  0.0260, -0.0119],
        ...,
        [-0.0015,  0.0125,  0.0006,  ...,  

In [None]:
import matplotlib.pyplot as plt

In [None]:
l_t = []
for ten in loss_TRAIN:
    l_t.append(ten.item())

In [None]:
plt.plot(l_t, 'r')

In [None]:
a_t = []
for ten in acc_TRAIN:
    a_t.append(ten.item())

In [None]:
plt.plot(a_t, 'r')

In [None]:
l_v = []
for ten in loss_VAL:
    l_v.append(ten.item())

In [None]:
plt.plot(l_v, 'r')

In [None]:
a_v = []
for ten in acc_VAL:
    a_v.append(ten.item())

In [None]:
plt.plot(a_v, 'r')

In [None]:
model4 = ite_GCN(nfeat=features.shape[1],
            nclass=labels.max().item() + 1,
            dropout=dropout,
            train_nite = 3,
            allow_grad=False,
            smooth_fac=0.3)

In [None]:
run_experiment(num_epochs=400, model=model4, lr=lr, weight_decay=weight_decay, features=features, adj=adj, idx_train=idx_train, idx_val=idx_val, idx_test=idx_test, labels=labels)

In [None]:
for name, param in model3.named_parameters():
    if param.grad is not None:
        print(name, param.grad.abs().sum())

In [None]:
model1 = GCN_3(nfeat=features.shape[1],
            nhid=hidden,
            nclass=labels.max().item() + 1,
            dropout=dropout)

In [42]:
model2 = ite_GCN(nfeat=features.shape[1],
            nclass=labels.max().item() + 1,
            dropout=0,
            train_nite= 2,
            eval_nite= 0,
            allow_grad=True,
            smooth_fac=smooth_fac)
run_experiment(num_epochs, model2, lr, weight_decay, features, adj, idx_train, idx_val, idx_test, labels)


Unspecified or invalid number of iterations for inference. Treat as the same as training iterations.
Initialize a 1-layer GCN with  2 iterations
Gradient flows to all iterations:  True
runrunrun!
Epoch: 0001 loss_train: 1.9467 acc_train: 0.1500 loss_val: 1.8242 acc_val: 0.3500 time: 0.5056s
Epoch: 0002 loss_train: 1.8288 acc_train: 0.2929 loss_val: 1.9568 acc_val: 0.3500 time: 0.5538s
Epoch: 0003 loss_train: 1.9938 acc_train: 0.2929 loss_val: 1.7933 acc_val: 0.3500 time: 0.4208s
Epoch: 0004 loss_train: 1.7844 acc_train: 0.2929 loss_val: 1.8393 acc_val: 0.1567 time: 0.3897s
Epoch: 0005 loss_train: 1.8181 acc_train: 0.2000 loss_val: 1.8175 acc_val: 0.1567 time: 0.3952s
Epoch: 0006 loss_train: 1.7826 acc_train: 0.2000 loss_val: 1.7674 acc_val: 0.1567 time: 0.3865s
Epoch: 0007 loss_train: 1.7075 acc_train: 0.2000 loss_val: 1.6644 acc_val: 0.1700 time: 0.3814s
Epoch: 0008 loss_train: 1.5878 acc_train: 0.2000 loss_val: 1.5229 acc_val: 0.3600 time: 0.3854s
Epoch: 0009 loss_train: 1.4491 acc_t

([tensor(1.9467, grad_fn=<NllLossBackward0>),
  tensor(1.8288, grad_fn=<NllLossBackward0>),
  tensor(1.9938, grad_fn=<NllLossBackward0>),
  tensor(1.7844, grad_fn=<NllLossBackward0>),
  tensor(1.8181, grad_fn=<NllLossBackward0>),
  tensor(1.7826, grad_fn=<NllLossBackward0>),
  tensor(1.7075, grad_fn=<NllLossBackward0>),
  tensor(1.5878, grad_fn=<NllLossBackward0>),
  tensor(1.4491, grad_fn=<NllLossBackward0>),
  tensor(1.3233, grad_fn=<NllLossBackward0>),
  tensor(1.1683, grad_fn=<NllLossBackward0>),
  tensor(1.0475, grad_fn=<NllLossBackward0>),
  tensor(0.9025, grad_fn=<NllLossBackward0>),
  tensor(0.8180, grad_fn=<NllLossBackward0>),
  tensor(0.7659, grad_fn=<NllLossBackward0>),
  tensor(0.6561, grad_fn=<NllLossBackward0>),
  tensor(0.5746, grad_fn=<NllLossBackward0>),
  tensor(0.5410, grad_fn=<NllLossBackward0>),
  tensor(0.4596, grad_fn=<NllLossBackward0>),
  tensor(0.4081, grad_fn=<NllLossBackward0>),
  tensor(0.3659, grad_fn=<NllLossBackward0>),
  tensor(0.3071, grad_fn=<NllLossB

In [45]:
for i in range(2,100):
    model = ite_GCN(nfeat=features.shape[1],
            nclass=labels.max().item() + 1,
            dropout=0,
            train_nite= 3,
            eval_nite= i,
            allow_grad=True,
            smooth_fac=smooth_fac)
    model.load_state_dict(model2.state_dict().copy())
    print("i: ", i)
    test(model, features, adj, idx_test, labels)

Initialize a 1-layer GCN with  3 iterations
Gradient flows to all iterations:  True
i:  2
Test set results: loss= 0.7559 accuracy= 0.7880
inference time:  0.14795398712158203
Initialize a 1-layer GCN with  3 iterations
Gradient flows to all iterations:  True
i:  3
Test set results: loss= 17.2465 accuracy= 0.2160
inference time:  0.22200822830200195
Initialize a 1-layer GCN with  3 iterations
Gradient flows to all iterations:  True
i:  4
Test set results: loss= 75.8431 accuracy= 0.4480
inference time:  0.2838730812072754
Initialize a 1-layer GCN with  3 iterations
Gradient flows to all iterations:  True
i:  5
Test set results: loss= 800.0170 accuracy= 0.1070
inference time:  0.3578760623931885
Initialize a 1-layer GCN with  3 iterations
Gradient flows to all iterations:  True
i:  6
Test set results: loss= 3249.5498 accuracy= 0.3010
inference time:  0.4234731197357178
Initialize a 1-layer GCN with  3 iterations
Gradient flows to all iterations:  True
i:  7
Test set results: loss= 28781.5

KeyboardInterrupt: 