In [1]:
import pyTigerGraph as tg

conn = tg.TigerGraphConnection("http://3.22.188.182", graphname="KDD_2022_NFT")

In [2]:
f = conn.gds.featurizer()

f.installAlgorithm("tg_fastRP")

'tg_fastRP'

In [3]:
params = {"v_type": ["Category", "NFT_Collection", "NFT"], 
          "e_type": ["COLLECTION_HAS_NFT", "CATEGORY_HAS_NFT", "NFT_IN_CATEGORY", "NFT_IN_COLLECTION"], 
          "weights": "1,2,4", 
          "beta": -0.1,
          "k": 3,
          "reduced_dim": 64, 
          "sampling_constant": 3,
          "random_seed": 42,
          "print_accum": False,
          "result_attr": "fastrp_embedding"}

f.runAlgorithm("tg_fastRP", params)

[]

In [4]:
ExprFunctions=""  # For enterprise users, please use the link you received.
ExprUtil=""  # For enterprise users, please use the link you received.
#conn.installUDF(ExprFunctions, ExprUtil)

In [5]:
conn.gds.configureKafka(kafka_address="kaf.kdd.tigergraphlabs.com:19092")

In [6]:
train_loader = conn.gds.neighborLoader(
    v_in_feats={"Transaction": ["seller_k_size", "buyer_k_size"], 
                "NFT_User": ["pagerank", "kcore_size"], 
                "NFT": ["fastrp_embedding"], 
                "NFT_Collection": ["fastrp_embedding"], 
                "Category": ["fastrp_embedding"]},
    v_out_labels={"Transaction": ["usd_price"]},
    v_extra_feats={"Transaction":  ["train"]},
    filter_by={"Transaction": "train"},
    shuffle=True,
    batch_size=2048,
    buffer_size=4,
    add_self_loop=True,
    reverse_edge=True
)

In [7]:
for batch in train_loader:
    print(batch.metadata())
    break

  from .autonotebook import tqdm as notebook_tqdm


(['Transaction', 'NFT', 'NFT_User', 'NFT_Collection', 'Category'], [('Transaction', 'NFT_SOLD_BY', 'NFT_User'), ('Transaction', 'NFT_BOUGHT_BY', 'NFT_User'), ('Transaction', 'FOR_SALE_OF', 'NFT'), ('NFT', 'HAD_TRANSACTION', 'Transaction'), ('NFT', 'NFT_IN_COLLECTION', 'NFT_Collection'), ('NFT', 'NFT_IN_CATEGORY', 'Category'), ('NFT_User', 'USER_SOLD_NFT', 'Transaction'), ('NFT_User', 'USER_SOLD_TO', 'NFT_User'), ('NFT_User', 'USER_BOUGHT_FROM', 'NFT_User'), ('NFT_User', 'USER_BOUGHT_NFT', 'Transaction')])


In [8]:
train_loader.num_batches

24

In [9]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GATConv, to_hetero


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Create a normal (homogeneous) GAT model
class GAT(torch.nn.Module):
    def __init__(
        self, num_layers, out_dim, dropout, hidden_dim, num_heads
    ):
        super().__init__()
        self.dropout = dropout
        self.layers = torch.nn.ModuleList()
        for i in range(num_layers):
            in_units = (-1, -1) if i == 0 else hidden_dim * num_heads
            out_units = out_dim if i == (num_layers - 1) else hidden_dim
            heads = 1 if i == (num_layers - 1) else num_heads
            self.layers.append(
                GATConv(in_units, out_units, heads=heads, dropout=dropout)
            )
        self.double()

    def reset_parameters(self):
        for layer in self.layers:
            layer.reset_parameters()

    def forward(self, x, edge_index):
        x = x.float()
        for layer in self.layers[:-1]:
            x = layer(x, edge_index)
            x = F.elu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.layers[-1](x, edge_index)
        return x
    
model = GAT(
    num_layers=2,
    out_dim=1,
    dropout=0.8,
    hidden_dim=8,
    num_heads=4,
)

# Convert it to a heterogeneous model. See https://pytorch-geometric.readthedocs.io/en/latest/modules/nn.html#torch_geometric.nn.to_hetero_transformer.to_hetero for details.
model = to_hetero(model, batch.metadata(), aggr='mul').to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

mae = torch.nn.L1Loss()

In [10]:
def r2_loss(output, target):
    target_mean = torch.mean(target)
    ss_tot = torch.sum((target - target_mean) ** 2)
    ss_res = torch.sum((target - output) ** 2)
    r2 = 1 - ss_res / ss_tot
    return r2

In [11]:
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime
# default `log_dir` is "runs" - we'll be more specific here
writer = SummaryWriter('runs/gnn_training'+str(datetime.now()))

  if not hasattr(tensorboard, '__version__') or LooseVersion(tensorboard.__version__) < LooseVersion('1.15'):


In [12]:
with torch.profiler.profile(
        schedule=torch.profiler.schedule(wait=1, warmup=1, active=5),
        on_trace_ready=torch.profiler.tensorboard_trace_handler('./runs/gnn_training/profiler'),
        record_shapes=True
) as prof:
    for i in range(20):
        epochLoss = 0
        epochMae = 0
        epochR2 = 0
        j = 0
        for batch in train_loader:
            model.train()
            optimizer.zero_grad()
            out = model(batch.x_dict, batch.edge_index_dict)
            mask = batch["Transaction"].train
            loss = F.smooth_l1_loss(out["Transaction"][mask].flatten(), batch["Transaction"].y[mask])
            loss.backward()
            optimizer.step()
            epochLoss += loss.item()
            batchR2 = r2_loss(out["Transaction"][mask].flatten(), batch["Transaction"].y[mask]).item()
            epochR2 += batchR2
            batchMae = mae(out["Transaction"][mask].flatten(), batch["Transaction"].y[mask])
            epochMae += batchMae.item()
            #print("Batch:", j, "Loss:", loss.item(), "MAE:", batchMae.item())

                    # ...log the running loss
            writer.add_scalar('training loss',
                            loss.item(),
                            i * train_loader.num_batches + j)
            writer.add_scalar('training mae',
                              batchMae.item(),
                              i * train_loader.num_batches + j)
            writer.add_scalar('training R2',
                              batchR2,
                              i * train_loader.num_batches + j)

            j += 1
            prof.step()
        print("EPOCH:", i, "LOSS:", epochLoss / train_loader.num_batches, "MAE:", epochMae / train_loader.num_batches, "R2:", epochR2 / train_loader.num_batches)

EPOCH: 0 LOSS: 78.77748285543676 MAE: 79.13949146116086 R2: -0.004093586434539757
EPOCH: 1 LOSS: 78.02543941923834 MAE: 78.38746651635485 R2: -0.0038777763290915868
EPOCH: 2 LOSS: 78.29817783478056 MAE: 78.65913308409934 R2: -0.0035152458391657
EPOCH: 3 LOSS: 76.12090217557524 MAE: 76.48212772647099 R2: -0.004335519889594991
EPOCH: 4 LOSS: 77.37672537726861 MAE: 77.73714511261376 R2: -0.003326445372492428
EPOCH: 5 LOSS: 77.74104996748152 MAE: 78.10271661141279 R2: -0.0036487811634404546
EPOCH: 6 LOSS: 78.37719092397855 MAE: 78.74018209571047 R2: -0.003818260796842915
EPOCH: 7 LOSS: 79.44608394166376 MAE: 79.80940368010646 R2: -0.003334035389868012
EPOCH: 8 LOSS: 77.56743382232611 MAE: 77.93013261272456 R2: -0.0039340941368103495
EPOCH: 9 LOSS: 78.91554735492657 MAE: 79.27778628659708 R2: -0.004264730599281045
EPOCH: 10 LOSS: 78.26720706900942 MAE: 78.62839417482651 R2: -0.0038934333382166044
EPOCH: 11 LOSS: 77.59721180030512 MAE: 77.95783193299086 R2: -0.004212503220781094
EPOCH: 12 LO

In [13]:
test_loader = conn.gds.neighborLoader(
    v_in_feats={"Transaction": ["seller_k_size", "buyer_k_size"], 
                "NFT_User": ["pagerank", "kcore_size"], 
                "NFT": ["fastrp_embedding"], 
                "NFT_Collection": ["fastrp_embedding"], 
                "Category": ["fastrp_embedding"]},
    v_out_labels={"Transaction": ["usd_price"]},
    v_extra_feats={"Transaction":  ["test"]},
    filter_by={"Transaction": "test"},
    shuffle=False,
    batch_size=2048,
    add_self_loop=True,
    reverse_edge=True
)



Installing and optimizing queries. It might take a minute if this is the first time you use this loader.
Query installation finished.


In [14]:
totLoss = 0
totMAE = 0
totR2 = 0
for batch in test_loader:
    model.eval()
    with torch.no_grad():
        out = model(batch.x_dict, batch.edge_index_dict)
        mask = batch["Transaction"].test
        loss = F.smooth_l1_loss(out["Transaction"][mask].flatten(), batch["Transaction"].y[mask])
    totMAE += mae(out["Transaction"][mask].flatten(), batch["Transaction"].y[mask]).item()
    totLoss += loss.item()
    totR2 += r2_loss(out["Transaction"][mask].flatten(), batch["Transaction"].y[mask]).item()
print("LOSS:", totLoss / test_loader.num_batches, "MAE:", totMAE / test_loader.num_batches, "R2:", totR2 / test_loader.num_batches)

LOSS: 107.91221425399716 MAE: 108.29418413933134 R2: -0.006296718814673345
