# GNN Basic

- [Pytorch Geometric tutorial: Introduction to Pytorch geometric - YouTube](https://www.youtube.com/watch?v=JtDgmmQ60x8)

adj matrixs:
- adding node (different sizes) change the size of adj matrix
- node ording doesn't matter
- every node has it's own computation graph

In [1]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F

import torch_geometric
from torch_geometric.datasets import Planetoid

os.chdir('..')

from utils.utils import *

In [2]:
def count_unique(tensor):
    # Calculate unique values and their counts
    unique_values, counts = torch.unique(tensor, return_counts=True)

    # Convert unique_values to a Python list
    unique_values = unique_values.tolist()

    # Convert counts to a Python list
    counts = counts.tolist()

    # Print the unique values and their counts
    for value, count in zip(unique_values, counts):
        print(f"Value: {value}, Count: {count}")
    
    print()

In [3]:
dataset = Planetoid(root="../Data/", name="Cora")

In [4]:
cprint(type(dataset))

[93mtype(dataset)[0m: 
<class 'torch_geometric.datasets.planetoid.Planetoid'>



In [5]:
cprint(dataset.data)

[93mdataset.data[0m: 
Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])





In [6]:
# it's a graph already?
print("number of graphs:\t\t",len(dataset))
print("number of classes:\t\t",dataset.num_classes)
print("number of node features:\t",dataset.num_node_features)
print("number of edge features:\t",dataset.num_edge_features)

number of graphs:		 1
number of classes:		 7
number of node features:	 1433
number of edge features:	 0


In [7]:
cprint(dataset.data.edge_index.shape)
cprint(dataset.data.edge_index)

[93mdataset.data.edge_index.shape[0m: 
torch.Size([2, 10556])

[93mdataset.data.edge_index[0m: 
tensor([[   0,    0,    0,  ..., 2707, 2707, 2707],
        [ 633, 1862, 2582,  ...,  598, 1473, 2706]])



In [8]:
cprint(dataset.data.train_mask.shape)
cprint(dataset.data.train_mask)
count_unique(dataset.data.train_mask)

[93mdataset.data.train_mask.shape[0m: 
torch.Size([2708])

[93mdataset.data.train_mask[0m: 
tensor([ True,  True,  True,  ..., False, False, False])

Value: False, Count: 2568
Value: True, Count: 140



In [9]:
cprint(dataset.data.x.shape)
cprint(dataset.data.x)

cprint(dataset.data.y.shape)
cprint(dataset.data.y)
count_unique(dataset.data.y)

cprint(dataset.data.x[dataset.data.train_mask].shape)

[93mdataset.data.x.shape[0m: 
torch.Size([2708, 1433])

[93mdataset.data.x[0m: 
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

[93mdataset.data.y.shape[0m: 
torch.Size([2708])

[93mdataset.data.y[0m: 
tensor([3, 4, 4,  ..., 3, 3, 3])

Value: 0, Count: 351
Value: 1, Count: 217
Value: 2, Count: 418
Value: 3, Count: 818
Value: 4, Count: 426
Value: 5, Count: 298
Value: 6, Count: 180

[93mdataset.data.x[dataset.data.train_mask].shape[0m: 
torch.Size([140, 1433])



In [10]:
print_methods(dataset)


[93mMagic Methods:[0m
    __abstractmethods__
    __add__
    __annotations__
    __class__
    __class_getitem__
    __delattr__
    __dict__
    __dir__
    __doc__
    __eq__
    __format__
    __ge__
    __getattr__
    __getattribute__
    __getitem__
    __getstate__
    __gt__
    __hash__
    __init__
    __init_subclass__
    __le__
    __len__
    __lt__
    __module__
    __ne__
    __new__
    __orig_bases__
    __parameters__
    __reduce__
    __reduce_ex__
    __repr__
    __setattr__
    __sizeof__
    __slots__
    __str__
    __subclasshook__
    __weakref__

[93mPrivate Methods:[0m
    _abc_impl
    _data
    _data_list
    _download
    _indices
    _infer_num_classes
    _is_protocol
    _process

[93mPublic Methods:[0m
    collate
    copy
    data
    download
    geom_gcn_url
    get
    get_summary
    has_download
    has_process
    index_select
    indices
    len
    log
    name
    num_classes
    num_edge_features
    num_features
    num_node_fea

# Basic NN and Training

In [11]:
import torch.nn.functional as F
from torch_geometric.nn import SAGEConv

In [12]:
data = dataset[0]

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.conv = SAGEConv(dataset.num_features,
                             dataset.num_classes,
                             aggr="max") # max, mean, add ...)

    def forward(self):
        x = self.conv(data.x, data.edge_index)
        return F.log_softmax(x, dim=1)
    

device = 'cuda'
model, data = Net().to(device), data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

In [13]:
def train():
    model.train()
    optimizer.zero_grad()
    F.nll_loss(model()[data.train_mask], data.y[data.train_mask]).backward()
    optimizer.step()


def test():
    model.eval()
    logits, accs = model(), []
    for _, mask in data('train_mask', 'val_mask', 'test_mask'):
        pred = logits[mask].max(1)[1]
        acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item()
        accs.append(acc)
    return accs

In [None]:
# best_val_acc = test_acc = 0
# for epoch in range(1,100):
#     train()
#     _, val_acc, tmp_test_acc = test()
#     if val_acc > best_val_acc:
#         best_val_acc = val_acc
#         test_acc = tmp_test_acc
#     log = 'Epoch: {:03d}, Val: {:.4f}, Test: {:.4f}'
    
#     if epoch % 10 == 0:
#         print(log.format(epoch, best_val_acc, test_acc))


# GAT

In [None]:
class GATLayer(nn.Module):
    def __init__(self, in_features, out_features, dropout, alpha, concat=True):
        super(GATLayer, self).__init__()
        self.dropout       = dropout        # drop prob = 0.6
        self.in_features   = in_features    # 
        self.out_features  = out_features   # 
        self.alpha         = alpha          # LeakyReLU with negative input slope, alpha = 0.2
        self.concat        = concat         # conacat = True for all layers except the output layer.

        
        # Xavier Initialization of Weights
        # Alternatively use weights_init to apply weights of choice 
        self.W = nn.Parameter(torch.zeros(size=(in_features, out_features)))
        nn.init.xavier_uniform_(self.W.data, gain=1.414)
        
        self.a = nn.Parameter(torch.zeros(size=(2*out_features, 1)))
        nn.init.xavier_uniform_(self.a.data, gain=1.414)
        
        # LeakyReLU
        self.leakyrelu = nn.LeakyReLU(self.alpha)

    def forward(self, input, adj):
        # Linear Transformation
        h = torch.mm(input, self.W) # matrix multiplication
        N = h.size()[0]
        print(N)

        # Attention Mechanism
        a_input = torch.cat([h.repeat(1, N).view(N * N, -1), h.repeat(N, 1)], dim=1).view(N, -1, 2 * self.out_features)
        e       = self.leakyrelu(torch.matmul(a_input, self.a).squeeze(2))

        # Masked Attention
        zero_vec  = -9e15*torch.ones_like(e)
        attention = torch.where(adj > 0, e, zero_vec)
        
        attention = F.softmax(attention, dim=1)
        attention = F.dropout(attention, self.dropout, training=self.training)
        h_prime   = torch.matmul(attention, h)

        if self.concat:
            return F.elu(h_prime)
        else:
            return h_prime