In [2]:
from torch_geometric.datasets import Planetoid
import numpy as np
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
# from sklearn.metrics import f1_score
# from sklearn.metrics import roc_curve
# from sklearn.metrics import auc
# from sklearn.preprocessing import label_binarize
import matplotlib.pyplot as plt

In [3]:
dataset = Planetoid(root='.', name='Citeseer')
data = dataset[0]
print(dataset)
print("number of graphs:\t\t",len(dataset))
print("number of classes:\t\t",dataset.num_classes)
print("number of classes:\t\t",np.unique(data.y))
print("number of node features:\t",data.num_node_features)
print("number of edge features:\t",data.num_edge_features)
print("X shape: ", data.x.shape)
print("Edge shape: ", data.edge_index.shape)
print("Y shape: ", data.y.shape)

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.test.index
Processing...


Citeseer()
number of graphs:		 1
number of classes:		 6
number of classes:		 [0 1 2 3 4 5]
number of node features:	 3703
number of edge features:	 0
X shape:  torch.Size([3327, 3703])
Edge shape:  torch.Size([2, 9104])
Y shape:  torch.Size([3327])


Done!


In [4]:
class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(dataset.num_node_features, 16)
        self.conv2 = GCNConv(16, dataset.num_classes) 

    def forward(self, data):
        x, edge_index = data.x, data.edge_index 

        x = self.conv1(x, edge_index) 
        x = F.relu(x) 
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index) 
        return F.log_softmax(x, dim=1)


In [5]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN().to(device)
data = dataset[0].to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
num_epoch = 200
print(f'Graph Convolutional Network (GCN): \n{GCN()}')

Graph Convolutional Network (GCN): 
GCN(
  (conv1): GCNConv(3703, 16)
  (conv2): GCNConv(16, 6)
)


In [8]:

model.train()
for epoch in range(num_epoch):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask]) 
    correct = (out.argmax(dim=1)[data.train_mask] == data.y[data.train_mask]).sum() 
    acc = int(correct) / int(data.train_mask.sum())
    loss.backward() 
    optimizer.step() 
    if (epoch+1) % 10 == 0:
        print('Epoch: {}, Loss: {:.4f}, Training Acc: {:.4f}'.format(epoch+1, loss.item(), acc))

model.eval()
with torch.no_grad():
    pred1 = model(data)
    pred = pred1.argmax(dim=1) 
    correct = (pred[data.test_mask] == data.y[data.test_mask]).sum() 
    acc = int(correct) / int(data.test_mask.sum())
    # f1 = f1_score(pred[data.test_mask].cpu(), data.y[data.test_mask].cpu(), average='macro') 
    print(f'Accuracy: {acc:.4f}') # ,f'F1_score: {f1:.4f}'

torch.save(model.state_dict(), "GCNCiteseer.pkl")


Epoch: 10, Loss: 0.0186, Training Acc: 1.0000
Epoch: 20, Loss: 0.0173, Training Acc: 1.0000
Epoch: 30, Loss: 0.0294, Training Acc: 0.9917
Epoch: 40, Loss: 0.0195, Training Acc: 0.9917
Epoch: 50, Loss: 0.0236, Training Acc: 0.9917
Epoch: 60, Loss: 0.0182, Training Acc: 0.9917
Epoch: 70, Loss: 0.0254, Training Acc: 0.9917
Epoch: 80, Loss: 0.0381, Training Acc: 0.9917
Epoch: 90, Loss: 0.0163, Training Acc: 1.0000
Epoch: 100, Loss: 0.0240, Training Acc: 0.9917
Epoch: 110, Loss: 0.0234, Training Acc: 1.0000
Epoch: 120, Loss: 0.0101, Training Acc: 1.0000
Epoch: 130, Loss: 0.0360, Training Acc: 0.9917
Epoch: 140, Loss: 0.0151, Training Acc: 1.0000
Epoch: 150, Loss: 0.0365, Training Acc: 0.9917
Epoch: 160, Loss: 0.0126, Training Acc: 1.0000
Epoch: 170, Loss: 0.0248, Training Acc: 1.0000
Epoch: 180, Loss: 0.0366, Training Acc: 0.9917
Epoch: 190, Loss: 0.0211, Training Acc: 1.0000
Epoch: 200, Loss: 0.0254, Training Acc: 1.0000
Accuracy: 0.6830


In [None]:
# y_test = label_binarize(data.y[data.test_mask].cpu(),classes=[0,1,2,3,4,5]) 
# y_score = pred1[data.test_mask].cpu().detach().numpy()
# n_classes = y_test.shape[1]
# print(n_classes)
# fpr = dict()
# tpr = dict()
# roc_auc = dict()
# for i in range(n_classes):
#     fpr[i],tpr[i],_ = roc_curve(y_test[:,i],y_score[:,i])
#     roc_auc[i] = auc(fpr[i],tpr[i])

# fpr['micro'],tpr['micro'],_ = roc_curve(y_test.ravel(),y_score.ravel())
# roc_auc['micro'] = auc(fpr['micro'],tpr['micro'])
# all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))
# mean_tpr = np.zeros_like(all_fpr)
# for i in range(n_classes):
#     mean_tpr += np.interp(all_fpr, fpr[i], tpr[i])
# mean_tpr /= n_classes
# fpr["macro"] = all_fpr
# tpr["macro"] = mean_tpr
# roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
# lw=2
# plt.figure()
# plt.plot(fpr["micro"], tpr["micro"],
#          label='micro-average ROC curve (area = {0:0.2f})'
#                ''.format(roc_auc["micro"]),
#          color='navy', linestyle=':', linewidth=4)

# plt.plot(fpr["macro"], tpr["macro"],
#          label='macro-average ROC curve (area = {0:0.2f})'
#                ''.format(roc_auc["macro"]),
#          color='deeppink', linestyle=':', linewidth=4)

# colors = ['lightcoral', 'pink', 'orange', 'lightgreen', 'lightskyblue', 'gold', 'thistle']
# for i, color in zip(range(n_classes), colors):
#     plt.plot(fpr[i], tpr[i], color=color, lw=lw,
#              label='ROC curve of class {0} (area = {1:0.2f})'
#              ''.format(i, roc_auc[i]))

# plt.plot([0, 1], [0, 1], 'k--', lw=lw)
# plt.xlim([0.0, 1.0])
# plt.ylim([0.0, 1.05])
# plt.xlabel('False Positive Rate')
# plt.ylabel('True Positive Rate')
# plt.title('Some extension of Receiver operating characteristic to multi-class')
# plt.legend(loc="lower right")
# plt.show()

# model's state_dict
#print("Model's state_dict:")
#for param_tensor in model.state_dict():
#    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

# optimizer's state_dict
#print("Optimizer's state_dict:")
#for var_name in optimizer.state_dict():
#    print(var_name, "\t", optimizer.state_dict()[var_name])

