In [68]:
# imports
import pyTigerGraph as tg
import os
import json
import torch
import torch.nn.functional as F
from torch_geometric.nn import GraphSAGE, GCN
from pyTigerGraph.gds.metrics import Accumulator, Accuracy, BinaryPrecision, BinaryRecall

# import TigerGraph instance config
os.chdir('../config/')
with open('tigergraph.json', 'r') as f:
    config = json.load(f)

# Connection parameters
hostName = config['host']
secret = config['secret']

conn = tg.TigerGraphConnection(host=hostName, gsqlSecret=secret, graphname="Ethereum")
conn.getToken(secret)

('4qh9m0b1ibjk7asj5ul6fmo9j0cd5mie', 1678066822, '2023-03-06 01:40:22')

In [83]:
# split nodes into train/test/validation sets
split = conn.gds.vertexSplitter(is_train=0.6, is_test=0.2, is_valid=0.2)
split.run()

Splitting vertices...
Vertex split finished successfully.


In [81]:
# load train/test/validation data from TigerGraph
graph_loader = conn.gds.graphLoader(
    num_batches=1,
    v_in_feats=["in_degree","out_degree","total_sent","send_min","recv_amount","recv_min","pagerank"],
    v_out_labels=['label'],
    v_extra_feats=['is_train','is_test','is_valid'],
    output_format = "PyG",
    shuffle=True
)
data = graph_loader.data

In [84]:
# define hyperparameters for model training
hp = {"hidden_dim": 128,
    "num_layers": 2,
    "dropout": 0.05,
    "lr": 0.0075,
    "l2_penalty": 5e-5}

In [85]:
# define model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = GCN(
    in_channels=7,
    hidden_channels=hp["hidden_dim"],
    num_layers=hp["num_layers"],
    out_channels=2,
    dropout=hp["dropout"],
).to(device)

optimizer = torch.optim.Adam(
    model.parameters(), lr=hp["lr"], weight_decay=hp["l2_penalty"]
)

In [86]:
logs = {}
data = data.to(device)
for epoch in range(20):
    # Train
    model.train()
    acc = Accuracy()
    # Forward pass
    out = model(data.x.float(), data.edge_index)
    # Calculate loss
    loss = F.cross_entropy(out[data.is_train].float(), data.y[data.is_train].long())
    # Backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    # Evaluate
    val_acc = Accuracy()
    with torch.no_grad():
        pred = out.argmax(dim=1)
        acc.update(pred[data.is_train], data.y[data.is_train])
        valid_loss = F.cross_entropy(out[data.is_valid].float(), data.y[data.is_valid].long())
        val_acc.update(pred[data.is_valid], data.y[data.is_valid])
    # Logging
    logs["loss"] = loss.item()
    logs["val_loss"] = valid_loss.item()
    logs["acc"] = acc.value
    logs["val_acc"] = val_acc.value
    print(
        "Epoch: {:02d}, Train Loss: {:.4f}, Valid Loss: {:.4f}, Train Accuracy: {:.4f}, Valid Accuracy: {:.4f}".format(
            epoch, logs["loss"], logs["val_loss"], logs["acc"], logs["val_acc"]
        )
    )

Epoch: 00, Train Loss: 15.1281, Valid Loss: 16.4260, Train Accuracy: 0.5401, Valid Accuracy: 0.5422
Epoch: 01, Train Loss: 2.4261, Valid Loss: 2.5215, Train Accuracy: 0.9927, Valid Accuracy: 0.9936
Epoch: 02, Train Loss: 3.1896, Valid Loss: 3.4478, Train Accuracy: 0.9947, Valid Accuracy: 0.9954
Epoch: 03, Train Loss: 3.7610, Valid Loss: 4.1130, Train Accuracy: 0.9947, Valid Accuracy: 0.9954
Epoch: 04, Train Loss: 4.0655, Valid Loss: 4.6156, Train Accuracy: 0.9947, Valid Accuracy: 0.9954
Epoch: 05, Train Loss: 4.2158, Valid Loss: 4.6833, Train Accuracy: 0.9947, Valid Accuracy: 0.9954
Epoch: 06, Train Loss: 4.1204, Valid Loss: 4.7525, Train Accuracy: 0.9947, Valid Accuracy: 0.9954
Epoch: 07, Train Loss: 4.1135, Valid Loss: 4.7026, Train Accuracy: 0.9947, Valid Accuracy: 0.9954
Epoch: 08, Train Loss: 3.8753, Valid Loss: 4.4584, Train Accuracy: 0.9947, Valid Accuracy: 0.9954
Epoch: 09, Train Loss: 3.7055, Valid Loss: 4.4024, Train Accuracy: 0.9947, Valid Accuracy: 0.9954
Epoch: 10, Train L

In [87]:
# compare:
# xgboost/SVM/knn
# GCN
# GraphSAGE
# Node2vec
# GAT?
# Deep Graph Infomax?

# our model is only predicting one class



In [90]:
# model is overfit
# perfectly predicting training data?
model.eval()
acc = Accuracy()
precision = BinaryPrecision()
recall = BinaryRecall()
with torch.no_grad():
    pred = model(data.x.float(), data.edge_index).argmax(dim=1)
    print(pred.unique(return_counts= True))
    acc.update(pred[data.is_test], data.y[data.is_test])
    precision.update(pred[data.is_test], data.y[data.is_test])
    recall.update(pred[data.is_test], data.y[data.is_test])
print("Accuracy: {:.4f}".format(acc.value))
print("Precision: {:.4f}".format(precision.value))
print("Recall: {:.4f}".format(recall.value))

(tensor([0]), tensor([86622]))
Accuracy: 0.9947


TypeError: unsupported format string passed to NoneType.__format__