In [1]:
import sys

import pickle as pkl

import networkx as nx
import numpy as np
import scipy.sparse as sp



def load_data(dataset_str = 'cora'):
    """Load data."""
    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for i in range(len(names)):
        with open("data/ind.{}.{}".format(dataset_str, names[i]), 'rb') as f:
            if sys.version_info > (3, 0):
                objects.append(pkl.load(f, encoding='latin1'))
            else:
                objects.append(pkl.load(f))
    x, y, tx, ty, allx, ally, graph = tuple(objects)
    test_idx_reorder = parse_index_file("data/ind.{}.test.index".format(dataset_str))
    test_idx_range = np.sort(test_idx_reorder)

    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    # normalize
    features = normalize(features)

    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
        # build symmetric adjacency matrix
    adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)

    # network_emb = pros(adj)
    # network_emb = 0
    

    labels = np.vstack((ally, ty))
    labels[test_idx_reorder, :] = labels[test_idx_range, :]     # onehot
    
    idx_train = range(len(y))       # training data index
    idx_val = range(len(y), len(y)+500)     # validation data index
    idx_test = test_idx_range.tolist()      # test data index 

    features = np.array(features.todense())
    return adj, features, labels, idx_train, idx_val, idx_test

def parse_index_file(filename):
    """Parse index file."""
    index = []
    for line in open(filename):
        index.append(int(line.strip()))
    return index

def normalize(mx):
    """Row-normalize sparse matrix"""
    rowsum = np.array(mx.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    mx = r_mat_inv.dot(mx)
    return mx


if __name__ == '__main__':
    adj, features, labels, idx_train, idx_val, idx_test = load_data('cora')
    print('adj shape', adj.shape)        # adjacency matrix with Shape(2708, 2708)
    print('feature shape', features.shape)      # feature matrix, Shape(2708, 1433)
    print('label shape', labels.shape)      #label matrix, Shape(2708, 7)
    print('train/validation/test split: %s/%s/%s'%(len(idx_train), len(idx_val), len(idx_test)) )


adj shape (2708, 2708)
feature shape (2708, 1433)
label shape (2708, 7)
train/validation/test split: 140/500/1000


In [2]:
import numpy as np
import random
from torch import LongTensor, Tensor
class FeatureGraphDataset(object):

    def __init__(self, features, label, adj):
        '''Initalization
        
        Manually initalize a feature and graph dataset.

        Args:
            features: numpy ndarray, [[f1, f2, ...], [f1, f2, ...]]
            label: numpy ndarray, [0, 1, 2, 0, ...], label[i] == -1 if its class is unknow
            adj: dict of (int, list of int), {[1,2],[0,3],...}
        '''
        assert len(features) == len(label)
        assert type(features) == type(label) == np.ndarray
        self.features, self.label = features, label
        self.n = len(features) # num of instances
        self.m = np.max(label) + 1 # num of classes
        self.k = features.shape[1] # num of features
        self.adj = adj
        ratio = 0.5
        for k, v in adj.items():
            s = len(v)
            adj_features = reduce(lambda x,y: x + y, [self.features[y] for y in v])
            self.features[k] = self.features[k] * ratio + adj_features * (1 - ratio) / s

    def setting(self, label_num_per_class, test_num):
        '''Set label data and test set in semi-supervised learning

        Label data and test set should be settled at first. 

        '''
        self.test_ids = random.sample(range(self.n), test_num)
        remains = set(range(self.n)) - set(self.test_ids)
        num_of_class = [0] * self.m
        self.label_ids = []
        for i in remains:
            if num_of_class[self.label[i]] < label_num_per_class:
                self.label_ids.append(i)
            num_of_class[self.label[i]] += 1
        self.unlabel_ids = list(set(range(self.n)) - set(self.label_ids))
        self.test_num, self.label_num = test_num, sum(num_of_class)


    def label_batch(self, batch_size, tensor = True):
        '''Return a batch of label data features

        Random sample from label data

        Return:
            tuple: ([id0, id1, ...], [[f1, f2, ...], ...(batch_size)](type: numpy.ndarray), [0,1,2,...(batch_size)](type: numpy.ndarray))
        '''
        assert(len(self.label_ids) >= batch_size)
        ids = random.sample(self.label_ids, batch_size)
        return (LongTensor(ids), Tensor(self.features[ids]), LongTensor(self.label[ids])) if tensor else (ids, self.features[ids], self.label[ids])
    
    def unlabel_batch(self, batch_size, tensor = True):
        '''Return a batch of unlabel data features
        
        Random sample from label data

        Return:
            tuple: ([id0, ...], [[f1, f2, ...], ...(batch_size)](type: numpy.ndarray))
        '''
        if batch_size == -1:
            ids = self.unlabel_ids
        else:
            ids = random.sample(self.unlabel_ids, batch_size)
        return (LongTensor(ids), Tensor(self.features[ids])) if tensor else (ids, self.features[ids])

    def test_batch(self, batch_size = -1, tensor = True):
        if batch_size == -1:
            ids = self.test_ids
        else:
            ids = random.sample(self.test_ids, batch_size)
        return (LongTensor(ids), Tensor(self.features[ids]), LongTensor(self.label[ids])) if tensor else (ids, self.features[ids], self.label[ids])

    def adj_batch(self, batch, tensor = True):
        ids = [random.choice(self.adj[i]) for i in batch]
        return (LongTensor(ids), Tensor(self.features[ids])) if tensor else (ids, self.features[ids])

    def read_embbedings(self, embbeding_file):
        '''read graph embbedings from file

        Read graph embbedings generated by OpenNE system.        
        '''
        with open(embbeding_file, 'r') as f:
            lines = f.readlines()
            n, self.d = [int(i) for i in lines[0].split()]
            assert n == self.n
            self.embbedings = np.zeros((n, self.d))
            for line in lines[1:]:
                line = line.split()
                self.embbedings[int(line[0])] = [float(i) for i in line[1:]]


# Function

In [3]:
import torch
from torch.nn.parameter import Parameter
import torch.nn.functional as F
import math
import numpy as np
def log_sum_exp(x, axis = 1):
    m = torch.max(x, dim = 1)[0]
    return m + torch.log(torch.sum(torch.exp(x - m.unsqueeze(1)), dim = axis))
def normalize_infnorm(x, eps=1e-8):
    assert type(x) == np.ndarray
    return x / (abs(x).max(axis = 0) + 1e-8)   

class LinearWeightNorm(torch.nn.Module):
    def __init__(self, in_features, out_features, bias=True, weight_scale=None, weight_init_stdv=0.1):
        super(LinearWeightNorm, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.randn(out_features, in_features) * weight_init_stdv)
        if bias:
            self.bias = Parameter(torch.zeros(out_features))
        else:
            self.register_parameter('bias', None)
        if weight_scale is not None:
            assert type(weight_scale) == int
            self.weight_scale = Parameter(torch.ones(out_features, 1) * weight_scale)
        else:
            self.weight_scale = 1 
    def forward(self, x):
        W = self.weight * self.weight_scale / torch.sqrt(torch.sum(self.weight ** 2, dim = 1, keepdim = True))
        return F.linear(x, W, self.bias)
    def __repr__(self):
        return self.__class__.__name__ + '(' \
            + 'in_features=' + str(self.in_features) \
            + ', out_features=' + str(self.out_features) +')'

def pull_away_term(x):
    '''pull-away loss

    Args:
        x: type=> torch Tensor or Variable, size=>[batch_size * feature_dim], generated samples

    Return:
        scalar Loss
    '''
    x = F.normalize(x)
    pt = x.matmul(x.t()) ** 2
    return (pt.sum() - pt.diag().sum()) / (len(x) * (len(x) - 1))

   


# Model

In [4]:
import torch
from torch.nn.parameter import Parameter
from torch import nn
from torch.nn import functional as F
from torch.autograd import Variable
import pdb
class Discriminator(nn.Module):
    def __init__(self, input_dim = 28 ** 2, output_dim = 10):
        super(Discriminator, self).__init__()
        self.input_dim = input_dim
        self.layers = torch.nn.ModuleList([
            LinearWeightNorm(input_dim, 500),
            LinearWeightNorm(500, 500),
            LinearWeightNorm(500, 250),
            LinearWeightNorm(250, 250),
            LinearWeightNorm(250, 250)]
        )
        self.final = LinearWeightNorm(250, output_dim, weight_scale=1)
    def forward(self, x, feature = False, cuda = False, first = False):
#        pdb.set_trace()
        x = x.view(-1, self.input_dim)
        noise = torch.randn(x.size()) * 0.05 if self.training else torch.Tensor([0])
        if cuda:
            noise = noise.cuda()
        x = x + Variable(noise, requires_grad = False)
        if first:
            return self.layers[0](x)
        for i in range(len(self.layers)):
            m = self.layers[i]
            x_f = F.elu(m(x))
            noise = torch.randn(x_f.size()) * 0.5 if self.training else torch.Tensor([0])
            if cuda:
                noise = noise.cuda()
            x = (x_f + Variable(noise, requires_grad = False))
        if feature:
            return x_f, self.final(x)
        return self.final(x)


class Generator(nn.Module):
    def __init__(self, z_dim, output_dim = 28 ** 2):
        super(Generator, self).__init__()
        self.z_dim = z_dim
        self.fc1 = nn.Linear(z_dim, 500, bias = False)
        self.bn1 = nn.BatchNorm1d(500, affine = False, eps=1e-6, momentum = 0.5)
        self.fc2 = nn.Linear(500, 500, bias = False)
        self.bn2 = nn.BatchNorm1d(500, affine = False, eps=1e-6, momentum = 0.5)
        self.fc3 = LinearWeightNorm(500, output_dim, weight_scale = 1)
        self.bn1_b = Parameter(torch.zeros(500))
        self.bn2_b = Parameter(torch.zeros(500))
        nn.init.xavier_uniform(self.fc1.weight)
        nn.init.xavier_uniform(self.fc2.weight)
    def forward(self, batch_size, cuda = False, seed = -1):
        x = Variable(torch.rand(batch_size, self.z_dim), requires_grad = False, volatile = not self.training)
        if cuda:
            x = x.cuda()
        x = F.elu(self.bn1(self.fc1(x)) + self.bn1_b)
        x = F.elu(self.bn2(self.fc2(x)) + self.bn2_b)
        x = F.tanh(self.fc3(x))
        return x



In [9]:
# -*- coding:utf-8 -*-
from __future__ import print_function 
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader,TensorDataset
import sys
from torch.nn.parameter import Parameter
import argparse
import tensorboardX
import os
import random
import pickle as pkl
class GraphSGAN(object):
    def __init__(self, G, D, dataset, args):
        if os.path.exists(args.savedir):
            print('Loading model from ' + args.savedir)
            self.G = torch.load(os.path.join(args.savedir, 'G.pkl'))
            self.D = torch.load(os.path.join(args.savedir, 'D.pkl'))
            self.embedding_layer = torch.load(os.path.join(args.savedir, 'embedding.pkl'))
        else:
            os.makedirs(args.savedir)
            self.G = G
            self.D = D
            self.embedding_layer = nn.Embedding(dataset.n, dataset.d)
            self.embedding_layer.weight = Parameter(torch.Tensor(dataset.embbedings))
            torch.save(self.G, os.path.join(args.savedir, 'G.pkl'))
            torch.save(self.D, os.path.join(args.savedir, 'D.pkl'))
            torch.save(self.embedding_layer, os.path.join(args.savedir, 'embedding.pkl'))

        self.writer = tensorboardX.SummaryWriter(log_dir=args.logdir)
        if args.cuda:
            self.G.cuda()
            self.D.cuda() # self.embedding_layer is on CPU
        self.dataset = dataset
        self.Doptim = optim.Adam(self.D.parameters(), lr=args.lr, betas= (args.momentum, 0.999))
        self.Goptim = optim.Adam(self.G.parameters(), lr=args.lr, betas = (args.momentum,0.999))
        self.args = args

    def trainD(self, idf_label, y, idf_unlabel):
        x_label, x_unlabel, y = self.make_input(*idf_label), self.make_input(*idf_unlabel), Variable(y, requires_grad = False)
        if self.args.cuda:
            x_label, x_unlabel, y = x_label.cuda(), x_unlabel.cuda(), y.cuda()
        output_label, (mom_un, output_unlabel), output_fake = self.D(x_label, cuda=self.args.cuda), self.D(x_unlabel, cuda=self.args.cuda, feature = True), self.D(self.G(x_unlabel.size()[0], cuda = self.args.cuda).view(x_unlabel.size()).detach(), cuda=self.args.cuda)
        logz_label, logz_unlabel, logz_fake = log_sum_exp(output_label), log_sum_exp(output_unlabel), log_sum_exp(output_fake) # log ∑e^x_i
        prob_label = torch.gather(output_label, 1, y.unsqueeze(1)) # log e^x_label = x_label 
        loss_supervised = -torch.mean(prob_label) + torch.mean(logz_label)
        loss_unsupervised = 0.5 * (-torch.mean(logz_unlabel) + torch.mean(F.softplus(logz_unlabel))  + # real_data: log Z/(1+Z)
                            torch.mean(F.softplus(logz_fake)) ) # fake_data: log 1/(1+Z)
        entropy = -torch.mean(F.softmax(output_unlabel, dim = 1) * F.log_softmax(output_unlabel, dim = 1))
        pt = pull_away_term(mom_un)
        loss = loss_supervised + self.args.unlabel_weight * loss_unsupervised + entropy + pt
        acc = torch.mean((output_label.max(1)[1] == y).float())
        self.Doptim.zero_grad()
        loss.backward()
        self.Doptim.step()
        return loss_supervised.data.cpu().numpy(), loss_unsupervised.data.cpu().numpy(), acc
    
    def trainG(self, idf_unlabel):
        x_unlabel = self.make_input(*idf_unlabel)
        if self.args.cuda:
            x_unlabel = x_unlabel.cuda()
        fake = self.G(x_unlabel.size()[0], cuda = self.args.cuda).view(x_unlabel.size())
        mom_gen, output_fake = self.D(fake, feature=True, cuda=self.args.cuda)
        mom_unlabel, output_unlabel = self.D(x_unlabel, feature=True, cuda=self.args.cuda)
        loss_pt = pull_away_term(mom_gen)
        mom_gen = torch.mean(mom_gen, dim = 0)
        mom_unlabel = torch.mean(mom_unlabel, dim = 0) 
        loss_fm = torch.mean(torch.abs(mom_gen - mom_unlabel))
        loss = loss_fm + loss_pt 
        self.Goptim.zero_grad()
        self.Doptim.zero_grad()
        loss.backward()
        self.Goptim.step()
        return loss.data.cpu().numpy()

    def make_input(self, ids, feature, volatile = False):
        '''Concatenate feature and embeddings

        Args:
            feature: Size=>[batch_size, dataset.k], Type=>FloatTensor
            ids: Size=>[batch_size], Type=>LongTensor
        '''
        embedding = self.embedding_layer(Variable(ids, volatile = volatile)).detach() # detach temporarily
        return torch.cat((Variable(feature), embedding), dim = 1)
    def train(self):
        gn = 0
        NUM_BATCH = 100
        for epoch in range(self.args.epochs):
            self.G.train()
            self.D.train()
            self.D.turn = epoch
            loss_supervised = loss_unsupervised = loss_gen = accuracy = 0.
            for batch_num in range(NUM_BATCH):
                # extract batch from dataset
                idf_unlabel1 = self.dataset.unlabel_batch(self.args.batch_size)
                idf_unlabel2 = self.dataset.unlabel_batch(self.args.batch_size)
                id0, xf, y = self.dataset.label_batch(self.args.batch_size)

                # train D
                ll, lu, acc = self.trainD((id0, xf), y, idf_unlabel1)
                loss_supervised += ll
                loss_unsupervised += lu
                accuracy += acc

                # train G on unlabeled data
                lg = self.trainG(idf_unlabel2)
                loss_gen += lg
                # print and record logs 
                if (batch_num + 1) % self.args.log_interval == 0:
                    print('Training: %d / %d' % (batch_num + 1, NUM_BATCH))
                    gn += 1
                    self.writer.add_scalars('loss', {'loss_supervised':ll, 'loss_unsupervised':lu, 'loss_gen':lg}, gn)
                    self.writer.add_histogram('real_feature', self.D(self.make_input(id0, xf, volatile = True).cuda(), cuda=self.args.cuda, feature = True)[0], gn)
                    self.writer.add_histogram('fake_feature', self.D(self.G(self.args.batch_size, cuda = self.args.cuda), cuda=self.args.cuda, feature = True)[0], gn)
            # calculate average loss at the end of an epoch
            batch_num += 1
            loss_supervised /= batch_num
            loss_unsupervised /= batch_num
            loss_gen /= batch_num
            accuracy /= batch_num
            print("Iteration %d, loss_supervised = %.4f, loss_unsupervised = %.4f, loss_gen = %.4f train acc = %.4f" % (epoch, loss_supervised, loss_unsupervised, loss_gen, accuracy))
            sys.stdout.flush()

            # eval
            tmp = self.eval()
            print("Eval: correct %d / %d, Acc: %.2f"  % (tmp, self.dataset.test_num, tmp * 100. / self.dataset.test_num))
            torch.save(self.G, os.path.join(self.args.savedir, 'G.pkl'))
            torch.save(self.D, os.path.join(self.args.savedir, 'D.pkl'))


    def predict(self, x):
        '''predict label in volatile mode

        Args:
            x: Size=>[batch_size, self.dataset.k + self.dataset.d], Type=>Variable(FloatTensor), volatile
        '''
        return torch.max(self.D(x, cuda=self.args.cuda), 1)[1].data

    def eval(self):
        self.G.eval()
        self.D.eval()
        ids, f, y = self.dataset.test_batch()
        x = self.make_input(ids, f, volatile = True)
        if self.args.cuda:
            x, y = x.cuda(), y.cuda()
        pred1 = self.predict(x)

        return torch.sum(pred1 == y)

    def draw(self, batch_size):
        self.G.eval()
        return self.G(batch_size, cuda=self.args.cuda)

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='PyTorch GraphS GAN')
    parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--epochs', type=int, default=20, metavar='N',
                        help='number of epochs to train (default: 20)')
    parser.add_argument('--lr', type=float, default=0.003, metavar='LR',
                        help='learning rate (default: 0.003)')
    parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--cuda', action='store_true', default=False,
                        help='CUDA training')
    parser.add_argument('--seed', type=int, default=2, metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--log-interval', type=int, default=100, metavar='N',
                        help='how many batches to wait before logging training status')
    parser.add_argument('--eval-interval', type=int, default=1, metavar='N',
                        help='how many batches to wait before evaling training status')
    parser.add_argument('--unlabel-weight', type=float, default=0.5, metavar='N',
                        help='scale factor between labeled and unlabeled data')
    parser.add_argument('--logdir', type=str, default='./logfile', metavar='LOG_PATH', help='logfile path, tensorboard format')
    parser.add_argument('--savedir', type=str, default='./models', metavar='SAVE_PATH', help = 'saving path, pickle format')
    args = parser.parse_args(["--cuda"])
    args.cuda = args.cuda and torch.cuda.is_available()
    np.random.seed(args.seed)
    random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    # That is how you usually build the dataset 
    #dataset = CoraDataset(feature_file = './data/cora.features', 
    #     edge_file = './data/cora_edgelist', label_file = './data/cora_label')
    #dataset.read_embbedings('./embedding/embedding_line_cora')
    #dataset.setting(20, 1000)
    

    # but we load the example of cora
    with open('cora.dataset/cora.dataset', 'rb') as fdata:
        dataset = pkl.load(fdata,encoding='iso-8859-1')
    gan = GraphSGAN(Generator(200, dataset.k + dataset.d), Discriminator(dataset.k + dataset.d, dataset.m), dataset, args)
    gan.train() 
    


  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


Training: 100 / 100
Iteration 0, loss_supervised = 0.4530, loss_unsupervised = 0.5101, loss_gen = 1.0487 train acc = 0.8317
Eval: correct 764 / 1000, Acc: 76.40




Training: 100 / 100
Iteration 1, loss_supervised = 0.0400, loss_unsupervised = 0.2581, loss_gen = 1.3300 train acc = 0.9873
Eval: correct 772 / 1000, Acc: 77.20
Training: 100 / 100
Iteration 2, loss_supervised = 0.0184, loss_unsupervised = 0.2929, loss_gen = 1.0697 train acc = 0.9945
Eval: correct 787 / 1000, Acc: 78.70
Training: 100 / 100
Iteration 3, loss_supervised = 0.0181, loss_unsupervised = 0.3869, loss_gen = 0.5057 train acc = 0.9937
Eval: correct 806 / 1000, Acc: 80.60
Training: 100 / 100
Iteration 4, loss_supervised = 0.0154, loss_unsupervised = 0.4276, loss_gen = 0.3555 train acc = 0.9956
Eval: correct 829 / 1000, Acc: 82.90
Training: 100 / 100
Iteration 5, loss_supervised = 0.0142, loss_unsupervised = 0.4441, loss_gen = 0.3177 train acc = 0.9952
Eval: correct 819 / 1000, Acc: 81.90
Training: 100 / 100
Iteration 6, loss_supervised = 0.0121, loss_unsupervised = 0.4460, loss_gen = 0.3101 train acc = 0.9958
Eval: correct 816 / 1000, Acc: 81.60
Training: 100 / 100
Iteration 7, l