In [1]:
from torch_geometric.datasets import UPFD
# different feature types can be selected: content(profile + spacy; dim: 310), profile(dim: 10), spacy(dim: 300)
# splits: train, test, val
# name: politifact, gossipcop
dataset = UPFD('data/upfd', name="politifact", feature='bert', split="train")

In [2]:
graph = dataset[0]
print(f"Graph at index 0: {graph}")
print(f"Node features shape: {graph.x.shape}")
print(f"Node labels shape: {graph.y.shape}")

print(f"Edge index shape: {graph.edge_index.shape}")
print(f"Edge index: {graph.edge_index}")

Graph at index 0: Data(x=[72, 768], edge_index=[2, 71], y=[1])
Node features shape: torch.Size([72, 768])
Node labels shape: torch.Size([1])
Edge index shape: torch.Size([2, 71])
Edge index: tensor([[ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  8,  8,  8, 16, 16, 16, 16, 16, 16,
         24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
         24, 24, 24, 24, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 60],
        [ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
         19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
         37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54,
         55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71]])


In [25]:
import torch
def get_edge_type(edge_index, source_indices=[0]):
    edge_type = []
    for src, tgt in edge_index.t().tolist():
        if src in source_indices:
            edge_type.append(0)
        else:
            edge_type.append(1)
    return torch.tensor(edge_type)

In [13]:
from torch_geometric.nn import RGCNConv
import torch

rgcn_layer = RGCNConv(graph.num_node_features, 16, num_relations=2)

out = rgcn_layer(graph.x, graph.edge_index, get_edge_type(graph.edge_index))
print(out)

tensor([[ 1.2719e+00,  9.2688e-01, -7.3374e-01,  ..., -1.6260e+00,
         -1.7678e+00, -1.6121e+00],
        [ 2.7278e+00, -6.6306e-01, -2.6498e-03,  ..., -1.7341e+00,
         -4.4361e+00,  4.3912e-01],
        [ 2.7572e+00, -8.0807e-01, -1.3381e-01,  ..., -1.3875e+00,
         -4.4156e+00,  3.4158e-01],
        ...,
        [ 1.3388e+00,  1.2458e+00,  7.4155e-01,  ..., -9.1565e-01,
         -2.2600e+00, -2.4664e+00],
        [ 1.3460e+00,  1.3476e+00,  7.2434e-01,  ..., -8.8550e-01,
         -2.3370e+00, -2.4440e+00],
        [ 1.3878e+00,  1.3763e+00,  7.5385e-01,  ..., -8.5009e-01,
         -2.4101e+00, -2.4975e+00]], grad_fn=<AddBackward0>)


In [42]:
import torch
import torch.nn.functional as F
from torch.nn import Linear, ModuleList
from torch_geometric.nn import RGCNConv, global_mean_pool
from torch_geometric.datasets import UPFD

class RGCNForGraphClassification(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_relations, num_bases=None):
        super(RGCNForGraphClassification, self).__init__()
        
        # First RGCN convolution layer
        self.conv1 = RGCNConv(
            in_channels=in_channels,
            out_channels=hidden_channels,
            num_relations=num_relations,
            num_bases=num_bases
        )
        
        # Second RGCN convolution layer
        self.conv2 = RGCNConv(
            in_channels=hidden_channels,
            out_channels=hidden_channels,
            num_relations=num_relations,
            num_bases=num_bases
        )
        
        # Output layer
        self.classifier = Linear(hidden_channels, out_channels)
    
    def forward(self, x, edge_index, edge_type, batch):
        """
        Forward pass for graph classification
        
        Args:
            x (Tensor): Node feature matrix [num_nodes, in_channels]
            edge_index (Tensor): Edge indices [2, num_edges]
            edge_type (Tensor): Edge type/relation indices [num_edges]
            batch (Tensor): Batch vector [num_nodes] mapping each node to its graph
            
        Returns:
            Tensor: Graph classification predictions [batch_size, out_channels]
        """
        # First layer with ReLU activation
        x = self.conv1(x, edge_index, edge_type)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        
        # Second layer
        x = self.conv2(x, edge_index, edge_type)
        x = F.relu(x)
        
        # Global pooling (from node-level to graph-level representation)
        # print(f"before global_mean_pool: {x.shape}")
        x = global_mean_pool(x, batch)
        # print(f"after global_mean_pool: {x.shape}")
        
        # Apply final classifier
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.classifier(x)
        
        return x

In [43]:
rgcn_model = RGCNForGraphClassification(
    in_channels=graph.num_node_features,
    hidden_channels=16,
    out_channels=2,  # Assuming binary classification
    num_relations=2,
    num_bases=None
)


In [44]:
from torch_geometric.loader import DataLoader

train_dataset = UPFD('data/upfd', name='politifact', feature='bert', split='train')
val_dataset = UPFD('data/upfd', name='politifact', feature='bert', split='val')
test_dataset = UPFD('data/upfd', name='politifact', feature='bert', split='test')

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)

In [41]:
test_train_loader = DataLoader(train_dataset, batch_size=3, shuffle=True)
for batch in test_train_loader:

    rgcn_model(batch.x, batch.edge_index, get_edge_type(batch.edge_index, source_indices=batch.ptr[:-1]), batch.batch)
    break

before global_mean_pool: torch.Size([387, 16])
after global_mean_pool: torch.Size([3, 16])


In [54]:
import torch.nn.functional as F
rgcn_model = RGCNForGraphClassification(
    in_channels=graph.num_node_features,
    hidden_channels=16,
    out_channels=2,  # Assuming binary classification
    num_relations=2,
    num_bases=None
)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(rgcn_model.parameters(), lr=0.005)
num_epochs = 50

for epoch in range(num_epochs):
  for batch in train_loader:
    batch.to('cuda')
    rgcn_model.train()
    rgcn_model.to('cuda')
    optimizer.zero_grad()  
    out = rgcn_model(batch.x, batch.edge_index, get_edge_type(batch.edge_index, source_indices=batch.ptr[:-1]), batch.batch)
    loss = criterion(out, batch.y)
    loss.backward()
    optimizer.step()
    print(f"Loss: {loss.item()}")
    
  # val
  rgcn_model.eval()
  correct = 0
  for batch in val_loader:
    batch.to('cuda')
    out = rgcn_model(batch.x, batch.edge_index, get_edge_type(batch.edge_index, source_indices=batch.ptr[:-1]), batch.batch)
    pred = out.argmax(dim=1)
    correct += (pred == batch.y).sum().item()
  val_acc = correct / len(val_loader.dataset)
  print(f"Validation Accuracy: {val_acc:.4f}")
# test
rgcn_model.eval()
correct = 0
for batch in test_loader:
  batch.to('cuda')
  out = rgcn_model(batch.x, batch.edge_index, get_edge_type(batch.edge_index, source_indices=batch.ptr[:-1]), batch.batch)
  pred = out.argmax(dim=1)
  correct += (pred == batch.y).sum().item()
test_acc = correct / len(test_loader.dataset)
print(f"Test Accuracy: {test_acc:.4f}")

Loss: 0.7775267958641052
Loss: 0.8011779189109802
Validation Accuracy: 0.4194
Loss: 0.7628626823425293
Loss: 0.7330371141433716
Validation Accuracy: 0.5161
Loss: 0.7312332391738892
Loss: 0.6923891305923462
Validation Accuracy: 0.5484
Loss: 0.6680896878242493
Loss: 0.6541942358016968
Validation Accuracy: 0.4516
Loss: 0.6258772611618042
Loss: 0.6679251194000244
Validation Accuracy: 0.4839
Loss: 0.6304818391799927
Loss: 0.6146295070648193
Validation Accuracy: 0.4839
Loss: 0.6503036022186279
Loss: 0.6663673520088196
Validation Accuracy: 0.5161
Loss: 0.6597305536270142
Loss: 0.6629555821418762
Validation Accuracy: 0.5161
Loss: 0.6551036238670349
Loss: 0.6218137741088867
Validation Accuracy: 0.6129
Loss: 0.6047449707984924
Loss: 0.5984771847724915
Validation Accuracy: 0.7419
Loss: 0.568616509437561
Loss: 0.5753270983695984
Validation Accuracy: 0.6774
Loss: 0.5822259783744812
Loss: 0.5692937970161438
Validation Accuracy: 0.6774
Loss: 0.6012411117553711
Loss: 0.4637071490287781
Validation Accu