In [2]:

!pip install torch torchvision
!pip install torch-geometric

Collecting torch-geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.6.1-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m15.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch-geometric
Successfully installed torch-geometric-2.6.1


In [3]:
import os.path as osp
import torch
import pandas as pd
from torch_geometric.datasets import Amazon
import torch_geometric.transforms as T
import numpy as np
import random
from itertools import product
import matplotlib.pyplot as plt
import seaborn as sns

torch.manual_seed(1)
torch.cuda.manual_seed(1)
torch.cuda.manual_seed_all(1)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [4]:
# Carga del dataset
path = osp.join('.', 'data', 'Computers', 'Computers')
dataset = Amazon(path, 'Computers', transform=T.NormalizeFeatures())
data = dataset[0]
print(dataset.data)

Downloading https://github.com/shchur/gnn-benchmark/raw/master/data/npz/amazon_electronics_computers.npz
Processing...


Data(x=[13752, 767], edge_index=[2, 491722], y=[13752])


Done!



# 3 capas en la red
*   Contiene normalización de matriz de adyacencia
*   Incluye Dropout 0,4



In [6]:
import torch
torch.manual_seed(0)
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.utils import add_self_loops, degree
from torch_geometric.datasets import Amazon

# Import dataset from PyTorch Geometric
dataset = Amazon(root=".", name="Computers")
data = dataset[0]

# Define train, validation, and test masks
num_nodes = data.num_nodes
train_mask = torch.arange(int(num_nodes * 0.6))
test_mask = torch.arange(int(num_nodes * 0.6), int(num_nodes * 0.9))
val_mask = torch.arange(int(num_nodes * 0.9), num_nodes)

# Apply the masks to the data
data.train_mask = train_mask
data.val_mask = val_mask
data.test_mask = test_mask

# Normalizing adjacency matrix
def normalize_adj(edge_index, num_nodes):
    # Adding self-loops to the adjacency matrix
    edge_index, _ = add_self_loops(edge_index, num_nodes=num_nodes)
    row, col = edge_index
    deg = degree(row, num_nodes=num_nodes, dtype=torch.float)
    deg_inv_sqrt = deg.pow(-0.5)
    deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
    normalized_adj = deg_inv_sqrt[row] * deg_inv_sqrt[col]
    return edge_index, normalized_adj

# Apply normalization
edge_index, normalized_adj = normalize_adj(data.edge_index, data.num_nodes)

# Accuracy function
def accuracy(y_pred, y_true):
    """Calculate accuracy."""
    return torch.sum(y_pred == y_true) / len(y_true)

# Define Sparse Layer with normalization
class SparseLayer(torch.nn.Module):
    def __init__(self, dim_in, dim_out):
        super().__init__()
        self.linear = Linear(dim_in, dim_out, bias=False)

    def forward(self, x, edge_index, edge_weight):
        x = self.linear(x)
        return torch.sparse.mm(torch.sparse_coo_tensor(edge_index, edge_weight, (x.size(0), x.size(0))), x)

# Define GNN model with 3 layers
class GNN(torch.nn.Module):
    def __init__(self, dim_in, dim_h, dim_h2, dim_out, dropout=0.5):
        super().__init__()
        self.gnn1 = SparseLayer(dim_in, dim_h)
        self.gnn2 = SparseLayer(dim_h, dim_h2)
        self.gnn3 = SparseLayer(dim_h2, dim_out)
        self.dropout = dropout

    def forward(self, x, edge_index, edge_weight):
        h = self.gnn1(x, edge_index, edge_weight)
        h = torch.relu(h)
        h = F.dropout(h, p=self.dropout, training=self.training)

        h = self.gnn2(h, edge_index, edge_weight)
        h = torch.relu(h)
        h = F.dropout(h, p=self.dropout, training=self.training)

        h = self.gnn3(h, edge_index, edge_weight)
        return F.log_softmax(h, dim=1)

    def fit(self, data, edge_index, edge_weight, epochs=100):
        criterion = torch.nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(self.parameters(), lr=0.005, weight_decay=1e-3)

        self.train()
        for epoch in range(epochs + 1):
            optimizer.zero_grad()
            out = self(data.x, edge_index, edge_weight)
            loss = criterion(out[data.train_mask], data.y[data.train_mask])
            acc = accuracy(out[data.train_mask].argmax(dim=1), data.y[data.train_mask])
            loss.backward()
            optimizer.step()

            if epoch % 20 == 0:
                val_loss = criterion(out[data.val_mask], data.y[data.val_mask])
                val_acc = accuracy(out[data.val_mask].argmax(dim=1), data.y[data.val_mask])
                print(f'Epoch {epoch:>3} | Train Loss: {loss:.3f} | Train Acc:'
                      f' {acc*100:>5.2f}% | Val Loss: {val_loss:.2f} | '
                      f'Val Acc: {val_acc*100:.2f}%')

    @torch.no_grad()
    def test(self, data, edge_index, edge_weight):
        self.eval()
        out = self(data.x, edge_index, edge_weight)
        acc = accuracy(out.argmax(dim=1)[data.test_mask], data.y[data.test_mask])
        return acc

# Create the GNN model with 3 layers
gnn = GNN(dataset.num_features, dim_h=64, dim_h2=32, dim_out=dataset.num_classes, dropout=0.4)
print(gnn)

# Train the model
gnn.fit(data, edge_index, normalized_adj, epochs=100)

# Test the model
acc = gnn.test(data, edge_index, normalized_adj)
print(f'\nGNN test accuracy: {acc * 100:.2f}%')


Downloading https://github.com/shchur/gnn-benchmark/raw/master/data/npz/amazon_electronics_computers.npz
Processing...
Done!


GNN(
  (gnn1): SparseLayer(
    (linear): Linear(in_features=767, out_features=64, bias=False)
  )
  (gnn2): SparseLayer(
    (linear): Linear(in_features=64, out_features=32, bias=False)
  )
  (gnn3): SparseLayer(
    (linear): Linear(in_features=32, out_features=10, bias=False)
  )
)
Epoch   0 | Train Loss: 2.310 | Train Acc:  4.98% | Val Loss: 2.31 | Val Acc: 5.23%
Epoch  20 | Train Loss: 1.495 | Train Acc: 54.48% | Val Loss: 1.52 | Val Acc: 53.12%
Epoch  40 | Train Loss: 0.899 | Train Acc: 68.55% | Val Loss: 0.90 | Val Acc: 68.82%
Epoch  60 | Train Loss: 0.646 | Train Acc: 80.80% | Val Loss: 0.68 | Val Acc: 79.51%
Epoch  80 | Train Loss: 0.547 | Train Acc: 84.30% | Val Loss: 0.59 | Val Acc: 83.21%
Epoch 100 | Train Loss: 0.520 | Train Acc: 84.63% | Val Loss: 0.56 | Val Acc: 84.16%

GNN test accuracy: 83.59%
