# Graph Neural Network
## Notebook 3

In this notebook, we will define, train, and test a Graph Neural Network to predict sale prices of NFTs.

## Connect to TigerGraph Database

The code block below connects to a TigerGraph database. Make sure to change the authentication details in order for you to connect to the instance successfully.

In [1]:
import pyTigerGraph as tg

conn = tg.TigerGraphConnection("http://3.22.188.182", graphname="KDD_2022_NFT")

## Create Graph Features

In [2]:
f = conn.gds.featurizer()

f.installAlgorithm("tg_fastRP")

'tg_fastRP'

In [3]:
params = {"v_type": ["Category", "NFT_Collection", "NFT"], 
          "e_type": ["COLLECTION_HAS_NFT", "CATEGORY_HAS_NFT", "NFT_IN_CATEGORY", "NFT_IN_COLLECTION"], 
          "weights": "1,2,4", 
          "beta": -0.1,
          "k": 3,
          "reduced_dim": 64, 
          "sampling_constant": 3,
          "random_seed": 42,
          "print_accum": False,
          "result_attr": "fastrp_embedding"}

f.runAlgorithm("tg_fastRP", params)

[]

In [4]:
ExprFunctions=""  # For enterprise users, please use the link you received.
ExprUtil=""  # For enterprise users, please use the link you received.
#conn.installUDF(ExprFunctions, ExprUtil)

In [5]:
conn.gds.configureKafka(kafka_address="kaf.kdd.tigergraphlabs.com:19092")

## Define Data Loader

In [6]:
train_loader = conn.gds.neighborLoader(
    v_in_feats={"Transaction": ["seller_k_size", "buyer_k_size"], 
                "NFT_User": ["pagerank", "kcore_size"], 
                "NFT": ["fastrp_embedding"], 
                "NFT_Collection": ["fastrp_embedding"], 
                "Category": ["fastrp_embedding"]},
    v_out_labels={"Transaction": ["usd_price"]},
    v_extra_feats={"Transaction":  ["train"]},
    filter_by={"Transaction": "train"},
    shuffle=True,
    batch_size=2048,
    buffer_size=4,
    add_self_loop=True,
    reverse_edge=True
)

In [7]:
for batch in train_loader:
    print(batch.metadata())
    break

  from .autonotebook import tqdm as notebook_tqdm


(['Transaction', 'NFT_User', 'NFT', 'NFT_Collection', 'Category'], [('Transaction', 'NFT_SOLD_BY', 'NFT_User'), ('Transaction', 'NFT_BOUGHT_BY', 'NFT_User'), ('Transaction', 'FOR_SALE_OF', 'NFT'), ('NFT', 'HAD_TRANSACTION', 'Transaction'), ('NFT', 'NFT_IN_COLLECTION', 'NFT_Collection'), ('NFT', 'NFT_IN_CATEGORY', 'Category'), ('NFT_User', 'USER_SOLD_NFT', 'Transaction'), ('NFT_User', 'USER_SOLD_TO', 'NFT_User'), ('NFT_User', 'USER_BOUGHT_FROM', 'NFT_User'), ('NFT_User', 'USER_BOUGHT_NFT', 'Transaction')])


In [8]:
train_loader.num_batches

24

## Define Graph Attention Network

In [9]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GATConv, to_hetero


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Create a normal (homogeneous) GAT model
class GAT(torch.nn.Module):
    def __init__(
        self, num_layers, out_dim, dropout, hidden_dim, num_heads
    ):
        super().__init__()
        self.dropout = dropout
        self.layers = torch.nn.ModuleList()
        for i in range(num_layers):
            in_units = (-1, -1) if i == 0 else hidden_dim * num_heads
            out_units = out_dim if i == (num_layers - 1) else hidden_dim
            heads = 1 if i == (num_layers - 1) else num_heads
            self.layers.append(
                GATConv(in_units, out_units, heads=heads, dropout=dropout)
            )
        self.double()

    def reset_parameters(self):
        for layer in self.layers:
            layer.reset_parameters()

    def forward(self, x, edge_index):
        x = x.float()
        for layer in self.layers[:-1]:
            x = layer(x, edge_index)
            x = F.elu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.layers[-1](x, edge_index)
        return x
    
model = GAT(
    num_layers=2,
    out_dim=1,
    dropout=0.8,
    hidden_dim=8,
    num_heads=4,
)

# Convert it to a heterogeneous model. See https://pytorch-geometric.readthedocs.io/en/latest/modules/nn.html#torch_geometric.nn.to_hetero_transformer.to_hetero for details.
model = to_hetero(model, batch.metadata(), aggr='mul').to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

mae = torch.nn.L1Loss()

## Train GNN

In [10]:
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime
# default `log_dir` is "runs" - we'll be more specific here
writer = SummaryWriter('runs/gnn_training'+str(datetime.now()))

  if not hasattr(tensorboard, '__version__') or LooseVersion(tensorboard.__version__) < LooseVersion('1.15'):
  if not hasattr(tensorboard, '__version__') or LooseVersion(tensorboard.__version__) < LooseVersion('1.15'):


In [11]:
for i in range(20):
    epochLoss = 0
    epochMae = 0

    j = 0
    for batch in train_loader:
        model.train()
        optimizer.zero_grad()
        out = model(batch.x_dict, batch.edge_index_dict)
        mask = batch["Transaction"].train
        loss = F.smooth_l1_loss(out["Transaction"][mask].flatten(), batch["Transaction"].y[mask])
        loss.backward()
        optimizer.step()
        epochLoss += loss.item()
        batchMae = mae(out["Transaction"][mask].flatten(), batch["Transaction"].y[mask])
        epochMae += batchMae.item()
        #print("Batch:", j, "Loss:", loss.item(), "MAE:", batchMae.item())

                # ...log the running loss
        writer.add_scalar('training loss',
                        loss.item(),
                        i * train_loader.num_batches + j)
        writer.add_scalar('training mae',
                          batchMae.item(),
                          i * train_loader.num_batches + j)

        j += 1
    print("EPOCH:", i, "LOSS:", epochLoss / train_loader.num_batches, "MAE:", epochMae / train_loader.num_batches)

EPOCH: 0 LOSS: 84.1789609004902 MAE: 84.53867048515747
EPOCH: 1 LOSS: 84.07659451301298 MAE: 84.43636703472662
EPOCH: 2 LOSS: 83.41138895810124 MAE: 83.77127642621342
EPOCH: 3 LOSS: 83.96139434334498 MAE: 84.32050446430317
EPOCH: 4 LOSS: 82.09328605267515 MAE: 82.45219914196134
EPOCH: 5 LOSS: 81.06711912585138 MAE: 81.42538688487127
EPOCH: 6 LOSS: 82.25617711826577 MAE: 82.61477320558305
EPOCH: 7 LOSS: 82.2723330178559 MAE: 82.63043146365384
EPOCH: 8 LOSS: 83.31271458326462 MAE: 83.66995472163757
EPOCH: 9 LOSS: 81.5533396259144 MAE: 81.90969251901369
EPOCH: 10 LOSS: 82.6070436670344 MAE: 82.9611001658151
EPOCH: 11 LOSS: 82.88955156722217 MAE: 83.24226798438262
EPOCH: 12 LOSS: 82.67016822636941 MAE: 83.0194979082486
EPOCH: 13 LOSS: 83.79682331829092 MAE: 84.14384375165265
EPOCH: 14 LOSS: 80.4344199616139 MAE: 80.77845245076306
EPOCH: 15 LOSS: 83.48354590714946 MAE: 83.82721595189226
EPOCH: 16 LOSS: 82.61014725372442 MAE: 82.95431615053143
EPOCH: 17 LOSS: 82.661893742361 MAE: 83.00846400

## Test GNN

In [12]:
test_loader = conn.gds.neighborLoader(
    v_in_feats={"Transaction": ["seller_k_size", "buyer_k_size"], 
                "NFT_User": ["pagerank", "kcore_size"], 
                "NFT": ["fastrp_embedding"], 
                "NFT_Collection": ["fastrp_embedding"], 
                "Category": ["fastrp_embedding"]},
    v_out_labels={"Transaction": ["usd_price"]},
    v_extra_feats={"Transaction":  ["test"]},
    filter_by={"Transaction": "test"},
    shuffle=False,
    batch_size=2048,
    add_self_loop=True,
    reverse_edge=True
)



In [13]:
totLoss = 0
totMAE = 0
for batch in test_loader:
    model.eval()
    with torch.no_grad():
        out = model(batch.x_dict, batch.edge_index_dict)
        mask = batch["Transaction"].test
        loss = F.smooth_l1_loss(out["Transaction"][mask].flatten(), batch["Transaction"].y[mask])
    totMAE += mae(out["Transaction"][mask].flatten(), batch["Transaction"].y[mask]).item()
    totLoss += loss.item()
print("LOSS:", totLoss / test_loader.num_batches, "MAE:", totMAE / test_loader.num_batches)

LOSS: 93.36157755388801 MAE: 93.75959714282766
