<a href="https://colab.research.google.com/github/1zuu/Machine-Learning-With-Graphs/blob/main/1_introduction_GraphSAGE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Dependency Installation**

In [1]:
import torch

def format_pytorch_version(version):
  return version.split('+')[0]

TORCH_version = torch.__version__
TORCH = format_pytorch_version(TORCH_version)

def format_cuda_version(version):
  return 'cu' + version.replace('.', '')

CUDA_version = torch.version.cuda
CUDA = format_cuda_version(CUDA_version)

!pip install torch-scatter -f https://data.pyg.org/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-sparse -f https://data.pyg.org/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-cluster -f https://data.pyg.org/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-spline-conv -f https://data.pyg.org/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-geometric

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://data.pyg.org/whl/torch-1.11.0+cu113.html
Collecting torch-scatter
  Downloading https://data.pyg.org/whl/torch-1.11.0%2Bcu113/torch_scatter-2.0.9-cp37-cp37m-linux_x86_64.whl (7.9 MB)
[K     |████████████████████████████████| 7.9 MB 4.2 MB/s 
[?25hInstalling collected packages: torch-scatter
Successfully installed torch-scatter-2.0.9
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://data.pyg.org/whl/torch-1.11.0+cu113.html
Collecting torch-sparse
  Downloading https://data.pyg.org/whl/torch-1.11.0%2Bcu113/torch_sparse-0.6.13-cp37-cp37m-linux_x86_64.whl (3.5 MB)
[K     |████████████████████████████████| 3.5 MB 1.1 MB/s 
Installing collected packages: torch-sparse
Successfully installed torch-sparse-0.6.13
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/

# **Import Dependencies & Mount Gdrive**

In [2]:
import torch, os
import torch.nn.functional as F
from torch_geometric.nn import SAGEConv
from torch_geometric.datasets import Planetoid

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# **Data Paths, Paramaters & variables**

In [3]:
working_dir = '/content/drive/My Drive/Colab Notebooks/machine-learning-with-graphs/'

data_folder = 'data/Planetoid'
data_dir = os.path.join(working_dir, data_folder)

# **Load Dataset**

In [4]:
dataset = Planetoid(
                  root=data_dir, 
                  name='Cora'
                  )

*dataset* *properties*

In [5]:
print(dataset)
print("number of graphs           : ",len(dataset))
print("number of classes          : ",dataset.num_classes)
print("number of node features    : ",dataset.num_node_features)
print("number of edge features    : ",dataset.num_edge_features)

Cora()
number of graphs           :  1
number of classes          :  7
number of node features    :  1433
number of edge features    :  0


Dataset *shapes*

In [6]:
'''
|V| = N = 2708
Embedding Dim (D) = 2708

'''
print(dataset.data)

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])


In [7]:
print("edge_index : ",dataset.data.edge_index.shape)
print(dataset.data.edge_index)

edge_index :  torch.Size([2, 10556])
tensor([[   0,    0,    0,  ..., 2707, 2707, 2707],
        [ 633, 1862, 2582,  ...,  598, 1473, 2706]])


In [8]:
print("train_mask : ",dataset.data.train_mask.shape)
print(dataset.data.train_mask)


train_mask :  torch.Size([2708])
tensor([ True,  True,  True,  ..., False, False, False])


In [9]:
print("x : ",dataset.data.x.shape)
print(dataset.data.x)


x :  torch.Size([2708, 1433])
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])


In [10]:
print("y : ",dataset.data.y.shape)
print(dataset.data.y)

y :  torch.Size([2708])
tensor([3, 4, 4,  ..., 3, 3, 3])


In [11]:
print(f"Train Size : {dataset.data.train_mask.sum()}")
print(f"Test  Size : {dataset.data.test_mask.sum()}")
print(f"valid Size : {dataset.data.val_mask.sum()}")

Train Size : 140
Test  Size : 1000
valid Size : 500


In [12]:
torch.sum(dataset.data.train_mask)

tensor(140)

# **Graph Neural Network Designing (GraphSAGE)**

In [15]:
class GraphSAGE(torch.nn.Module):
    def __init__(self, data):
        super(GraphSAGE, self).__init__()
        
        self.conv = SAGEConv(
                        dataset.num_features,
                        dataset.num_classes,
                        aggr="max"
                        ) # max, mean, add ...)
        self.data = data

    def forward(self):
        x = self.conv(self.data.x, self.data.edge_index)
        return F.log_softmax(x, dim=1)

# **Train GraphSAGE**

In [18]:
class NodeClassification(object):
    def __init__(self):
        self.data = dataset[0]
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model = GraphSAGE(self.data).to(self.device)
        self.data = self.data.to(self.device)
        self.optimizer = torch.optim.Adam(
                            self.model.parameters(), 
                            weight_decay=5e-4, 
                            lr=0.01
                            )
        
    def train(self):
        self.model.train()
        self.optimizer.zero_grad()

        loss = F.nll_loss(self.model()[self.data.train_mask], self.data.y[self.data.train_mask])
        loss.backward()

        self.optimizer.step()


    def test(self):
        self.model.eval()
        logits, accs = self.model(), []
        for _, mask in self.data('train_mask', 'val_mask', 'test_mask'):
            pred = logits[mask].max(1)[1]
            acc = pred.eq(self.data.y[mask]).sum().item() / mask.sum().item()
            accs.append(acc)
        return accs

    def train_loop(self):
        best_val_acc = test_acc = 0
        for epoch in range(1,101):
            self.train()
            _, val_acc, tmp_test_acc = self.test()
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                test_acc = tmp_test_acc
            log = 'Epoch: {:03d}, Val: {:.4f}, Test: {:.4f}'
            
            if epoch % 10 == 0:
                print(log.format(epoch, best_val_acc, test_acc))

In [19]:
task = NodeClassification()
task.train_loop()

Epoch: 010, Val: 0.7420, Test: 0.7200
Epoch: 020, Val: 0.7420, Test: 0.7200
Epoch: 030, Val: 0.7420, Test: 0.7200
Epoch: 040, Val: 0.7420, Test: 0.7200
Epoch: 050, Val: 0.7420, Test: 0.7200
Epoch: 060, Val: 0.7420, Test: 0.7200
Epoch: 070, Val: 0.7420, Test: 0.7200
Epoch: 080, Val: 0.7420, Test: 0.7200
Epoch: 090, Val: 0.7420, Test: 0.7200
