This work is inspired/based in the following work: 

https://github.com/pyg-team/pytorch_geometric/blob/master/examples/ogbn_products_sage.py

https://github.com/PacktPublishing/Hands-On-Graph-Neural-Networks-Using-Python/blob/main/Chapter08/chapter8.ipynb 

https://github.com/pyg-team/pytorch_geometric/blob/master/examples/graph_sage_unsup.py

https://pytorch-geometric.readthedocs.io/en/latest/generated/torch_geometric.nn.conv.SAGEConv.html

https://medium.com/@juyi.lin/neighborloader-introduction-ccb870cc7294

TODO: add k-fold cross validation 

# Imports

In [3]:
import torch
from torch_geometric.datasets import Reddit
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import SAGEConv
import torch.nn.functional as F
from torch_geometric.loader import NeighborLoader
import time
from sklearn.metrics import f1_score 
import graph_handler
import graph_sage_node_classification

In [4]:
if torch.backends.mps.is_available() and False:
    device = torch.device("mps")
    x = torch.ones(1, device=device)
    print (x)
else:
    device = torch.device('cpu')

# Read in Data: CORA - small version 

In [5]:
dataset = Planetoid(root='/tmp/Cora', name='Cora')
data = dataset[0]
data = data.to(device)

## Visualize Information from graph

In [6]:
graph_handler.visualize_information_graph(dataset)

Dataset: Cora()
-------------------
Number of graphs: 1
Number of nodes: 2708
Number of features: 1433
Number of classes: 7

Graph:
------
Training nodes: 140
Evaluation nodes: 500
Test nodes: 1000
Edges are directed: False
Graph has isolated nodes: False
Graph has loops: False


# Hyperparameters

In [7]:
# VARIABLES TO CHANGE FOR EXPERIMENTS 
learning_rate = 0.01 # variable to change/play around with for experiments --> 0.0001
aggregator = 'mean' # variable to change/play around with for experiments

# FIXED PARAMETERS
epochs = 10
dropout_rate = 0.4
normalization = True
activation_function = True
bias = True
batch =  512
neighborhood_1 = 25
neighborhood_2 = 10
embedding_dimension = 128

# NeighborLoader = Sampling from graph 

In [8]:
train_loader = NeighborLoader(data, 
                            input_nodes=data.train_mask, # ensure that the sampling only happens in the training set 
                            batch_size = batch,
                            num_neighbors=[neighborhood_1, neighborhood_2], 
                            shuffle=True)

# Training model and obtaining results 

# 1. Create model for node classification


In [9]:
labels_all_nodes = data.y
number_classes  = labels_all_nodes.unique().size(0)

model = graph_sage_node_classification.GraphSAGE_local(in_channels = data.num_node_features,
                  hidden_channels= embedding_dimension,
                  out_channels = number_classes,
                  dropout= dropout_rate,
                  aggr = aggregator,
                  normalization = normalization,
                  activation_function = activation_function,
                  bias = bias)
model.to(device)
print(model)

GraphSAGE_local(
  (conv1): SAGEConv(1433, 128, aggr=mean)
  (conv2): SAGEConv(128, 7, aggr=mean)
)


# 2. Training mdel for certain number of epochs and testing it 

In [10]:
model.fit(train_loader, epochs, learning_rate, device)


Epoch   0 | Train Loss: 1.956 | Train Acc:  11.43% | Val Loss: 1.95 | Val Acc: 13.48%
Epoch   1 | Train Loss: 1.912 | Train Acc:  17.86% | Val Loss: 1.94 | Val Acc: 23.21%
Epoch   2 | Train Loss: 2.819 | Train Acc:  14.29% | Val Loss: 2.26 | Val Acc: 20.78%
Epoch   3 | Train Loss: 1.883 | Train Acc:  21.43% | Val Loss: 1.92 | Val Acc: 26.84%
Epoch   4 | Train Loss: 1.891 | Train Acc:  26.43% | Val Loss: 1.94 | Val Acc: 22.94%
Epoch   5 | Train Loss: 1.861 | Train Acc:  22.14% | Val Loss: 1.93 | Val Acc: 26.07%
Epoch   6 | Train Loss: 1.764 | Train Acc:  28.57% | Val Loss: 1.92 | Val Acc: 25.00%
Epoch   7 | Train Loss: 1.721 | Train Acc:  28.57% | Val Loss: 1.90 | Val Acc: 27.16%
Epoch   8 | Train Loss: 1.605 | Train Acc:  36.43% | Val Loss: 1.86 | Val Acc: 33.76%
Epoch   9 | Train Loss: 1.502 | Train Acc:  40.00% | Val Loss: 1.81 | Val Acc: 37.50%
Epoch  10 | Train Loss: 1.473 | Train Acc:  37.86% | Val Loss: 1.77 | Val Acc: 34.19%
Median time per epoch: 0.0350s


## 3. Calculate accuracy and F-1 score on test data


In [11]:
acc, f1_macro, f1_micro = model.test(data)
print(f'Model accuarcy: {acc*100:.2f}%' )
print(f'Model f1 Macro: {f1_macro:.2f}%' )
print(f'Model f1 Micro: {f1_micro:.2f}%' )

Model accuarcy: 52.50%
Model f1 Macro: 0.35%
Model f1 Micro: 0.53%
