In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

In [0]:
%cd "/content/gdrive/My Drive/Datasets"
!tar -xvf "/content/gdrive/My Drive/Datasets/cora.tgz"
%cd "/content/gdrive/My Drive/Datasets/cora"

In [0]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from layers import GraphConvolution

# Some Exploration

In [0]:
a=np.genfromtxt("cora.cites",dtype=np.dtype(str))
a 
#paper 2 cites paper 1
#contains edges

In [0]:
b=np.genfromtxt("cora.content",dtype=np.dtype(str))
b
#contains ids+features+labels

In [0]:
b[:,1:-1] #gives us the "word attributes" field values, which we'll be using as our features

In [0]:
b[:,-1] #gives us our labels

# Data Loading

In [0]:
edge_list=np.genfromtxt("cora.cites",dtype=np.int32)
ids_features_labels=np.genfromtxt("cora.content",dtype=np.dtype(str))
ids=np.array(ids_features_labels[:,0],dtype=np.int32)
features=np.array(ids_features_labels[:,1:-1],dtype=np.float32)
labels_strings=ids_features_labels[:,-1]

In [0]:
labels_strings=np.array(labels_strings,dtype=np.dtype(str))
#features=np.array(features,dtype=np.float32)

In [0]:
from sklearn.preprocessing import LabelEncoder

def one_hot_encoder(labels_strings):
  encoder=LabelEncoder()
  labels=encoder.fit_transform(labels_strings)
  one_hot_labels=np.zeros((labels.size,labels.max()+1))
  one_hot_labels[np.arange(labels.size),labels]=1
  return one_hot_labels

In [0]:
labels=one_hot_encoder(labels_strings)
labels

In [0]:
id_map={j:i for i,j in enumerate(ids)}
edges=np.array(list(map(id_map.get,edge_list.flatten()))).reshape(edge_list.shape)
edges

In [0]:
adj=np.zeros((labels.shape[0],labels.shape[0]))
for i,j in zip(edges[:,0],edges[:,1]):
  adj[i][j]=1

In [0]:
#adj_symm=adj+adj.T-(np.eye(labels.shape[0],labels.shape[0])*np.diagonal(adj))
adj_symm=adj+adj.T
adj=adj_symm+np.eye(adj_symm.shape[0])

In [0]:
def normalise(X):
  rowsum=X.sum(axis=1) #sum of rows of adjacency matrix gives degree of node
  inv=np.power(rowsum,-1).flatten() #inverse of a diagonal matrix is the reciprocal of the diagonal element
  #inv[np.isinf(inv)] = 0. #checking and removing infinities
  inv_mx=np.diagflat(inv)
  m=inv_mx.dot(X)
  return m

In [0]:
features=normalise(features)
adj=normalise(adj)
features=torch.FloatTensor(np.array(features))
labels=torch.LongTensor(np.where(labels))[1]
adj=torch.FloatTensor(adj)

In [0]:
class GCN(nn.Module):
  def __init__(self,num_features,h,num_classes):
    super(GCN,self).__init__()
    self.gconv1=GraphConvolution(num_features,h)
    self.gconv2=GraphConvolution(h,num_classes)
    self.dropout=0.5
    
  def forward(self,x,adj):
    x=F.relu(self.gconv1(x,adj))
    x=F.dropout(x,self.dropout,training=self.training)
    x=self.gconv2(x,adj)
    return F.log_softmax(x,dim=1)

In [0]:
net=GCN(num_features=features.shape[1],h=16,num_classes=labels.max().item()+1)
opt=optim.Adam(net.parameters(),lr=4e-3)
device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
net=net.to(device)
adj=adj.to(device)
features=features.to(device)
labels=labels.to(device)

In [0]:
idx_train=range(140)
idx_val=range(200, 500)
idx_test=range(500, 1500)
idx_train = torch.LongTensor(idx_train)
idx_val = torch.LongTensor(idx_val)
idx_test = torch.LongTensor(idx_test)
idx_train=idx_train.to(device)
idx_val=idx_val.to(device)
idx_test=idx_test.to(device)

In [0]:
def accuracy(output,labels):
  preds=output.max(1)[1].type_as(labels)
  correct=preds.eq(labels).double()
  correct=correct.sum()
  return correct/len(labels)


In [0]:
def train(num_epochs):
  net.train()
  for epoch in range(1,num_epochs+1):
    opt.zero_grad()
    out=net(features,adj)
    loss=F.nll_loss(out[idx_train],labels[idx_train])
    loss.backward()
    opt.step()
    val_accuracy=accuracy(out[idx_val],labels[idx_val])
    print("{}/{}. loss: {}".format(epoch,num_epochs,loss))
    print("{}/{}. validation accuacy: {}".format(epoch, num_epochs,val_accuracy))
    

In [0]:
train(num_epochs=200)

In [0]:
def test():
  net.eval()
  out=net(features,adj)
  #loss_test = F.nll_loss(output[idx_test], labels[idx_test])
  test_accuracy=accuracy(out[idx_test],labels[idx_test])
  #print("Test set results:","loss= {:.4f}".format(loss_test.item()),"accuracy= {:.4f}".format(acc_test.item()))
  print(test_accuracy)

In [0]:
test()