In [None]:
!pip install torch_geometric
!pip install torch

In [61]:
import torch
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, TopKPooling, global_mean_pool
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp

from torch_geometric.datasets import Planetoid

In [83]:
embdding_size = 64
# data
data = Planetoid(root="testdata", name="Cora")


print("Data Info: ")
print("Number of graphs: ", len(data))
print("Number of features: ", data.num_features)
print("Number of edge features: ", data.num_edge_features)
print("Number of classes: ", data.num_classes)
print("Number of node features: ", data.num_node_features )

data[0].y

Data Info: 
Number of graphs:  1
Number of features:  1433
Number of edge features:  0
Number of classes:  7


tensor([3, 4, 4,  ..., 3, 3, 3])

In [64]:
class GCN(torch.nn.Module):
    def __init__(self):
        super(GCN, self).__init__()
        torch.manual_seed(42)

        # Layers
        self.initial_conv = GCNConv(data.num_features, embdding_size)
        self.conv1 = GCNConv(embdding_size, embdding_size)
        self.conv2 = GCNConv(embdding_size, embdding_size)
        self.conv3 = GCNConv(embdding_size, embdding_size)

        # Output Layer
        self.out = Linear(embdding_size*2, data.num_classes)


    def forward(self, x, edge_index, batch_index):
        # First Conv Layer - activation = feature vectors
        # we will use tanh as activation

        hidden = self.initial_conv(x, edge_index)
        hidden = F.tanh(hidden) 

        # Other Conv Layers
        hidden = self.conv1(hidden, edge_index)
        hidden = F.tanh(hidden) 

        hidden = self.conv2(hidden, edge_index)
        hidden = F.tanh(hidden) 

        hidden = self.conv3(hidden, edge_index)
        hidden = F.tanh(hidden)


        # Global Pooling Layer (stack aggregations)
        # Creates embedding of whole graph
        hidden = torch.cat([gmp(hidden, batch_index), gap(hidden, batch_index)], dim=1)

        # Final classifier
        out = self.out(hidden) # refers to linear classifier

        return out, hidden

In [65]:
model = GCN()
print(model)
print(torch.cuda.is_available())

GCN(
  (initial_conv): GCNConv(1433, 64)
  (conv1): GCNConv(64, 64)
  (conv2): GCNConv(64, 64)
  (conv3): GCNConv(64, 64)
  (out): Linear(in_features=128, out_features=7, bias=True)
)
False


In [78]:
# Train the GCN

from torch_geometric.data import DataLoader
import warnings 
warnings.filterwarnings("ignore")

# Root mean squared error (regression ??)
loss_fn = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0007) # lr = learning rate

device = torch.device("cpu") # switch to gpu if cuda is available
model = model.to(device)

data_size = len(data)
print()
NUM_GRAPHS_PER_BATCH = 64

print(data[:int(data_size * 0.8)])
print(data)
loader = DataLoader(data[:int(data_size * 0.8)], 
                    batch_size=NUM_GRAPHS_PER_BATCH, shuffle=True)

test_loader = DataLoader(data[int(data_size*0.8):],
                         batch_size=NUM_GRAPHS_PER_BATCH, shuffle=True)

def train(data):
    for batch in loader:
        batch.to(device)

        # reset gradients
        optimizer.zero_grad()
        pred, embedding = model(batch.x.float(), batch.edge_index, batch.batch)

        loss = torch.sqrt(loss_fn(pred, batch.y))
        loss.backward()

        # update using gradients
        optimizer.step()

    return loss, embedding

Cora()
Cora()


ValueError: num_samples should be a positive integer value, but got num_samples=0

In [53]:
print("Starting training ...")
losses = []
for epoch in range(20):
    loss, h = train(data)
    losses.append(loss)
    # if epoch % 100 == 0:
    print(f"Epoch {epoch} | Train Loss {loss}")
        

Starting training ...


NameError: name 'loader' is not defined