### Library Installation

In [None]:
# PyG installation
!pip install -q torch-scatter -f https://pytorch-geometric.com/whl/torch-${TORCH}+${CUDA}.html
!pip install -q torch-sparse -f https://pytorch-geometric.com/whl/torch-${TORCH}+${CUDA}.html
!pip install -q git+https://github.com/rusty1s/pytorch_geometric.git

In [None]:
import torch
vers = torch.__version__
print("Torch vers: ", vers)

### Dataset Setup

In [None]:
from torch_geometric.datasets import UPFD

DATA_ROOT = "/content/FakeNewsNet/dataset"
train_data = UPFD(root = DATA_ROOT, 
                  name="gossipcop", feature="content", 
                  split="train")
test_data = UPFD(root = DATA_ROOT, 
                 name="gossipcop", feature="content", 
                 split="test")

In [None]:
print("# Training Examples: {}".format(len(train_data)))
print("# Test Examples: {}".format(len(test_data)))

In [None]:
train_data[0]

### Graph Visualization

In [None]:
import networkx as nx
def to_networkx(data, node_attrs=None, edge_attrs=None, to_undirected=False,
                remove_self_loops=False):
    if to_undirected:
        G = nx.Graph()
    else:
        G = nx.DiGraph()
    G.add_nodes_from(range(data.num_nodes))
    node_attrs, edge_attrs = node_attrs or [], edge_attrs or []
    values = {}
    for key, item in data(*(node_attrs + edge_attrs)):
        if torch.is_tensor(item):
            values[key] = item.squeeze().tolist()
        else:
            values[key] = item
        if isinstance(values[key], (list, tuple)) and len(values[key]) == 1:
            values[key] = item[0]
    for i, (u, v) in enumerate(data.edge_index.t().tolist()):
        if to_undirected and v > u:
            continue
        if remove_self_loops and u == v:
            continue
        G.add_edge(u, v)
        for key in edge_attrs:
            G[u][v][key] = values[key][i]
    for key in node_attrs:
        for i, feat_dict in G.nodes(data=True):
            feat_dict.update({key: values[key][i]})
    return G

In [None]:
nx.draw(to_networkx(train_data[1]))

### GNN Implementation

#### Data Loading

In [None]:
from torch_geometric.loader import DataLoader
train_loader = DataLoader(train_data, batch_size=128, shuffle=True)
test_loader = DataLoader(test_data, batch_size=128, shuffle=False)

#### GNN Architecture

In [None]:
from torch_geometric.nn import global_max_pool as gmp
from torch_geometric.nn import GATConv
from torch.nn import Linear


class GNN(torch.nn.Module):
    def __init__(self, 
                 in_channels, hidden_channels, out_channels):
      
        super().__init__()
        
        # Graph Convolutions
        self.convolution_1 = GATConv(in_channels, hidden_channels)
        self.convolution_2 = GATConv(hidden_channels, hidden_channels)
        self.convolution_3 = GATConv(hidden_channels, hidden_channels)

        # Readout Layers
        # For news features
        self.lin_news = Linear(in_channels, hidden_channels)

        # For processing graph features
        self.lin0 = Linear(hidden_channels, hidden_channels)

        # For pre-final layer for softmax
        self.lin1 = Linear(2*hidden_channels, out_channels)

    def forward(self, x, edge_index, batch):
        # Graph Convolutions
        h = self.conv1(x, edge_index).relu()
        h = self.conv2(h, edge_index).relu()
        h = self.conv3(h, edge_index).relu()

        # Pooling
        h = gmp(h, batch)

        # Readout
        h = self.lin0(h).relu()

        # Following the UPFD paper, we include raw word2vec embeddings of news 
        root = (batch[1:] - batch[:-1]).nonzero(as_tuple=False).view(-1)
        root = torch.cat([root.new_zeros(1), root + 1], dim=0)

        news = x[root]
        news = self.lin_news(news).relu()
        
        out = self.lin1(torch.cat([h, news], dim=-1))
        return torch.sigmoid(out)

In [None]:
from sklearn.metrics import accuracy_score, f1_score

if torch.cuda.is_available():
  device = 'cuda'
else:
  device = 'cpu'

model = GNN(train_data.num_features, 128, 1).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=0.01)
loss_fnc = torch.nn.BCELoss()

#### Model Training

In [None]:
def train(epoch):
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.batch)
        loss = loss_fnc(torch.reshape(out, (-1,)), data.y.float())
        loss.backward()
        optimizer.step()
        total_loss += float(loss) * data.num_graphs
    return total_loss / len(train_loader.dataset)

#### Model Testing

In [None]:
@torch.no_grad()
def test(epoch):
    model.eval()
    total_loss = 0
    all_preds = []
    all_labels = []
    for data in test_loader:
        data = data.to(device)
        out = model(data.x, data.edge_index, data.batch)
        loss = loss_fnc(torch.reshape(out, (-1,)), data.y.float())
        total_loss += float(loss) * data.num_graphs
        all_preds.append(torch.reshape(out, (-1,)))
        all_labels.append(data.y.float())

    # Calculate Metrics
    accuracy, f1 = metrics(all_preds, all_labels)

    return total_loss / len(test_loader.dataset), accuracy, f1

In [None]:
def metrics(predicted, actuals):
    preds = torch.round(torch.cat(predicted))
    acts = torch.cat(actuals)
    acc = accuracy_score(preds, acts)
    f1 = f1_score(preds, acts)
    return acc, f1

#### Visualization

In [None]:
NUM_EPOCHS = 50
train_losses = []
test_losses = []
for epoch in range(NUM_EPOCHS):
    train_loss = train(epoch)
    test_loss, test_acc, test_f1 = test(epoch)
    train_losses.append(train_loss)
    test_losses.append(test_loss)
    print(f'Epoch: {epoch:04d}  ==  Training Loss: {train_loss:.4f}  ==  '
          f'TestLoss: {test_loss:.4f}  ==  TestAcc: {test_acc:.4f}  ==  TestF1: {test_f1:.4f}')

In [None]:
import matplotlib.pyplot as plt 
plt.plot(list(range(NUM_EPOCHS)), train_losses, color = 'blue', label = 'Training Loss')
plt.plot(list(range(NUM_EPOCHS)), test_losses, color = 'red', label = 'Test Loss')
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()