This work is inspired/based in the following work: 

https://github.com/pyg-team/pytorch_geometric/blob/master/examples/ogbn_products_sage.py

https://github.com/PacktPublishing/Hands-On-Graph-Neural-Networks-Using-Python/blob/main/Chapter08/chapter8.ipynb 

https://github.com/pyg-team/pytorch_geometric/blob/master/examples/graph_sage_unsup.py

https://pytorch-geometric.readthedocs.io/en/latest/generated/torch_geometric.nn.conv.SAGEConv.html

https://medium.com/@juyi.lin/neighborloader-introduction-ccb870cc7294

# Imports

In [24]:
import torch
from torch_geometric.datasets import Reddit
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import SAGEConv
import torch.nn.functional as F
from torch_geometric.loader import NeighborLoader
import time
from sklearn.metrics import f1_score 
import numpy as np

# Local 
import graph_handler
import graph_sage_node_classification
from k_cross_validation import k_fold

In [20]:
if torch.backends.mps.is_available() and False:
    device = torch.device("mps")
    x = torch.ones(1, device=device)
    print (x)
else:
    device = torch.device('cpu')

# Read in Data: CORA - small version 

In [4]:
dataset = Planetoid(root='/tmp/Cora', name='Cora')
data = dataset[0]
data = data.to(device)

## Visualize Information from graph

In [18]:
graph_handler.visualize_information_graph(dataset)

Dataset: Cora()
-------------------
Number of graphs: 1
Number of nodes: 2708
Number of features: 1433
Number of classes: 7

Graph:
------
Training nodes: 140
Evaluation nodes: 500
Test nodes: 1000
Edges are directed: False
Graph has isolated nodes: False
Graph has loops: False


# Hyperparameters

In [6]:
# VARIABLES TO CHANGE FOR EXPERIMENTS 
learning_rate = 0.01 # variable to change/play around with for experiments --> 0.0001
aggregator = 'mean' # variable to change/play around with for experiments

# FIXED PARAMETERS
epochs = 10
dropout_rate = 0.4
normalization = True
activation_function = True
bias = True
batch =  512
neighborhood_1 = 25
neighborhood_2 = 10
embedding_dimension = 128
k = 5 # k-cross validation

# Training model and obtaining results 

In [8]:
# Store the metrics for each fold
micro_f1_scores, macro_f1_scores, accuracy_scores = [], [], []


In [27]:
for fold, (train_idx, test_idx, val_idx) in enumerate(zip(*k_fold(data, k))):
    print('Fold = ', fold)
    # Create masks used to access data 
    mask_train = torch.zeros(data.num_nodes, dtype=torch.bool)
    mask_train[train_idx] = True
    mask_test = torch.zeros(data.num_nodes, dtype=torch.bool)
    mask_test[test_idx] = True

    # Create a NeighborLoader which samples nodes for the current fold's training data
    train_loader = NeighborLoader(data, 
                                  input_nodes = mask_train, 
                                  batch_size=batch,
                                  num_neighbors=[neighborhood_1, neighborhood_2],
                                  shuffle=True)

    # Initialize model and optimizer for each fold
    model = graph_sage_node_classification.GraphSAGE_local(in_channels = data.num_node_features,
                      hidden_channels= embedding_dimension,
                      out_channels = dataset.num_classes,
                      dropout= dropout_rate,
                      aggr = aggregator,
                      normalization = normalization,
                      activation_function = activation_function,
                      bias = bias)
    model.to(device)

    # Train the model for the current fold
    model.fit(train_loader, epochs, learning_rate, device)

    # Obtain results for test data 
    acc, f1_macro, f1_micro = model.test_kfold(data, mask_test)

    # Store results 
    micro_f1_scores.append(f1_micro)
    macro_f1_scores.append(f1_macro)
    accuracy_scores.append(acc)
    
# Get the mean from the results 
mean_micro_f1 = np.mean(micro_f1_scores)
mean_macro_f1 = np.mean(macro_f1_scores)
mean_accuracy = np.mean(accuracy_scores)

Fold =  0
Epoch   0 | Train Loss: 2.045 | Train Acc:  19.02% | Val Loss: 2.00 | Val Acc: 17.20%
Epoch   1 | Train Loss: 1.777 | Train Acc:  30.51% | Val Loss: 1.91 | Val Acc: 18.05%
Epoch   2 | Train Loss: 1.477 | Train Acc:  41.80% | Val Loss: 1.83 | Val Acc: 22.40%
Epoch   3 | Train Loss: 1.258 | Train Acc:  49.85% | Val Loss: 1.74 | Val Acc: 28.58%
Epoch   4 | Train Loss: 1.099 | Train Acc:  53.70% | Val Loss: 1.61 | Val Acc: 35.87%
Epoch   5 | Train Loss: 0.999 | Train Acc:  58.94% | Val Loss: 1.60 | Val Acc: 38.21%
Epoch   6 | Train Loss: 1.065 | Train Acc:  54.03% | Val Loss: 1.63 | Val Acc: 35.77%
Epoch   7 | Train Loss: 0.928 | Train Acc:  58.88% | Val Loss: 1.61 | Val Acc: 36.14%
Epoch   8 | Train Loss: 0.979 | Train Acc:  55.52% | Val Loss: 1.61 | Val Acc: 38.64%
Epoch   9 | Train Loss: 1.038 | Train Acc:  54.95% | Val Loss: 1.62 | Val Acc: 39.04%
Epoch  10 | Train Loss: 0.943 | Train Acc:  58.32% | Val Loss: 1.66 | Val Acc: 38.43%
Median time per epoch: 0.1868s
Fold =  1
Epo

In [28]:
print(mean_micro_f1*100)
print(mean_macro_f1*100)
print(mean_accuracy*100)

59.17680119499901
56.745100747539944
59.17680159211158
