In [4]:
# imports
import pyTigerGraph as tg
import os
import json
import torch
import torch.nn.functional as F
from torch_geometric.nn import GraphSAGE, GCN, Node2Vec, GAT
from pyTigerGraph.gds.metrics import Accumulator, Accuracy, BinaryPrecision, BinaryRecall
import warnings
warnings.filterwarnings("ignore")

# import TigerGraph instance config
os.chdir('../config/')
with open('tigergraph.json', 'r') as f:
    config = json.load(f)

# Connection parameters
hostName = config['host']
secret = '5kn71rhu7612ma7msfcvtm6d9de173gk'

conn = tg.TigerGraphConnection(host=hostName, gsqlSecret=secret, graphname="Ethereum2")
conn.getToken(secret)

('m0gnonnjv9447i2hps7s9u7ceir7laro', 1678401197, '2023-03-09 22:33:17')

In [5]:
# split nodes into train/test/validation sets
# split = conn.gds.vertexSplitter(is_train=0.6, is_test=0.2, is_valid=0.2)
# split.run()
print(
    "Number of vertices in training set:",
    conn.getVertexCount("Wallet", where="is_train!=0"),
)
print(
    "Number of vertices in test set:",
    conn.getVertexCount("Wallet", where="is_test!=0"),
)
conn.getVertexCount('*'),conn.getEdgeCount('*'),

Number of vertices in training set: 623
Number of vertices in test set: 267


({'Wallet': 86622}, {'sent_eth': 106083, 'reverse_sent_eth': 106083})

In [6]:
# load train/test/validation data from TigerGraph
graph_loader = conn.gds.graphLoader(
    num_batches=1,
    v_in_feats=["in_degree","out_degree","total_sent","send_min","recv_amount","recv_min","pagerank"],
    v_out_labels=['label'],
    v_extra_feats=['is_train','is_test'],
    output_format = "PyG"
)
data = graph_loader.data

In [7]:
data

Data(edge_index=[2, 106083], x=[86622, 7], y=[86622], is_train=[86622], is_test=[86622])

In [20]:
# define hyperparameters for model training
hp = {"hidden_dim": 512,
    "num_layers": 2,
    "dropout": 0.05,
    "lr": 0.0075,
    "l2_penalty": 5e-5}

In [21]:
# define model
torch.manual_seed(123)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model = GCN(
    in_channels=7,
    hidden_channels=hp["hidden_dim"],
    num_layers=hp["num_layers"],
    out_channels=7,
    dropout=hp["dropout"],
    heads=8
).to(device)

optimizer = torch.optim.Adam(
    model.parameters(), lr=hp["lr"], weight_decay=hp["l2_penalty"]
)

In [22]:
logs = {}
data = data.to(device)
for epoch in range(30):
    # Train
    model.train()
    acc = Accuracy()
    # Forward pass
    out = model(data.x.float(), data.edge_index)
    # Calculate loss
    loss = F.cross_entropy(out[data.is_train].float(), data.y[data.is_train].long())
    # Backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    # Evaluate
    val_acc = Accuracy()
    with torch.no_grad():
        pred = out.argmax(dim=1)
        acc.update(pred[data.is_train], data.y[data.is_train])
        valid_loss = F.cross_entropy(out[data.is_test].float(), data.y[data.is_test].long())
        val_acc.update(pred[data.is_test], data.y[data.is_test])
    # Logging
    logs["loss"] = loss.item()
    logs["test_loss"] = valid_loss.item()
    logs["acc"] = acc.value
    logs["test_acc"] = val_acc.value
    print(
        "Epoch: {:02d}, Train Loss: {:.4f}, Test Loss: {:.4f}, Train Accuracy: {:.4f}, Test Accuracy: {:.4f}".format(
            epoch, logs["loss"], logs["test_loss"], logs["acc"], logs["test_acc"]
        )
    )

Epoch: 00, Train Loss: 123.2926, Test Loss: 99.5436, Train Accuracy: 0.2360, Test Accuracy: 0.2322
Epoch: 01, Train Loss: 278.1959, Test Loss: 277.4247, Train Accuracy: 0.4912, Test Accuracy: 0.5206
Epoch: 02, Train Loss: 374.3616, Test Loss: 376.3057, Train Accuracy: 0.4912, Test Accuracy: 0.5206
Epoch: 03, Train Loss: 374.9960, Test Loss: 374.8954, Train Accuracy: 0.5024, Test Accuracy: 0.5318
Epoch: 04, Train Loss: 319.2099, Test Loss: 316.0376, Train Accuracy: 0.5185, Test Accuracy: 0.5543
Epoch: 05, Train Loss: 226.1408, Test Loss: 218.4551, Train Accuracy: 0.5425, Test Accuracy: 0.5880
Epoch: 06, Train Loss: 104.3793, Test Loss: 95.1380, Train Accuracy: 0.5698, Test Accuracy: 0.5955
Epoch: 07, Train Loss: 81.8119, Test Loss: 53.5573, Train Accuracy: 0.6180, Test Accuracy: 0.6142
Epoch: 08, Train Loss: 151.3272, Test Loss: 118.6835, Train Accuracy: 0.5425, Test Accuracy: 0.5169
Epoch: 09, Train Loss: 40.7220, Test Loss: 31.0490, Train Accuracy: 0.6324, Test Accuracy: 0.6105
Epoch:

In [24]:
model.eval()
acc = Accuracy()
with torch.no_grad():
    pred = model(data.x.float(), data.edge_index).argmax(dim=1)
    acc.update(pred[data.is_test], data.y[data.is_test])
print("Final Test Accuracy: {:.4f}".format(acc.value))

Final Test Accuracy: 0.7491


In [None]:
# Model Performance:

# Baseline: 

# GCN around 0.7079% with 30 epochs and hp = {"hidden_dim": 128,
    # "num_layers": 2,
    # "dropout": 0.05,
    # "lr": 0.005,
    # "l2_penalty": 5e-5}

# GraphSAGE around 0.7228% with 30 epochs and hp = {"hidden_dim": 128,
    # "num_layers": 2,
    # "dropout": 0.05,
    # "lr": 0.0075,
    # "l2_penalty": 5e-5}

# GAT around 0.7191 with hp = {"hidden_dim": 128,
    # "num_layers": 2,
    # "dropout": 0.05,
    # "lr": 0.0075,
    # "l2_penalty": 5e-5}
    # heads = 8

    # same training loops for GraphSAGE, GAT, GCN

# Node2Vec - 




# TAGCN - around .74%


# Deep Graph Infomax - DGI ROUGLY 73.5% can be tuned

In [None]:
# import os.path as osp

# import torch
# import torch.nn as nn

# from torch_geometric.datasets import Planetoid
# from torch_geometric.nn import DeepGraphInfomax, GCNConv


# class Encoder(nn.Module):
#     def __init__(self, in_channels, hidden_channels):
#         super().__init__()
#         self.conv = GCNConv(in_channels, hidden_channels, cached=True)
#         self.prelu = nn.PReLU(hidden_channels)

#     def forward(self, x, edge_index):
#         x = self.conv(x, edge_index)
#         x = self.prelu(x)
#         return x


# def corruption(x, edge_index):
#     return x[torch.randperm(x.size(0))], edge_index


# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model = DeepGraphInfomax(
#     hidden_channels=512, encoder=Encoder(data.num_features, 512),
#     summary=lambda z, *args, **kwargs: torch.sigmoid(z.mean(dim=0)),
#     corruption=corruption).to(device)
# data = data.to(device)
# optimizer = torch.optim.Adam(model.parameters(), lr=0.005)


# def train():
#     model.train()
#     optimizer.zero_grad()
#     pos_z, neg_z, summary = model(data.x.float(), data.edge_index)
#     loss = model.loss(pos_z, neg_z, summary)
#     loss.backward()
#     optimizer.step()
#     return loss.item()


# for epoch in range(1, 30):
#     loss = train()
#     print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')

In [None]:
# def test():
#     model.eval()
#     z, _, _ = model(data.x.float(), data.edge_index)
#     acc = model.test(z[data.is_train], data.y[data.is_train],
#                      z[data.is_test], data.y[data.is_test], max_iter=150, C=100000)
#     return acc

In [None]:
# acc = test()
# print(f'Final Test Accuracy: {acc:.4f}')

In [None]:
data2 = data

In [None]:
import argparse
import os.path as osp

import torch
import torch.nn.functional as F

from torch_geometric.datasets import Flickr
from torch_geometric.loader import GraphSAINTRandomWalkSampler
from torch_geometric.nn import GraphConv
from torch_geometric.utils import degree


data = data2
row, col = data.edge_index
data.edge_weight = 1. / degree(col, data.num_nodes)[col]  # Norm by in-degree.


loader = GraphSAINTRandomWalkSampler(data, batch_size=6000, walk_length=2,
                                     num_steps=5, sample_coverage=100,
                                     num_workers=4)


class Net(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        in_channels = data.num_node_features
        out_channels = 2
        self.conv1 = GraphConv(in_channels, hidden_channels)
        self.conv2 = GraphConv(hidden_channels, hidden_channels)
        self.conv3 = GraphConv(hidden_channels, hidden_channels)
        self.lin = torch.nn.Linear(3 * hidden_channels, out_channels)

    def set_aggr(self, aggr):
        self.conv1.aggr = aggr
        self.conv2.aggr = aggr
        self.conv3.aggr = aggr

    def forward(self, x0, edge_index, edge_weight=None):
        x1 = F.relu(self.conv1(x0, edge_index, edge_weight))
        x1 = F.dropout(x1, p=0.2, training=self.training)
        x2 = F.relu(self.conv2(x1, edge_index, edge_weight))
        x2 = F.dropout(x2, p=0.2, training=self.training)
        x3 = F.relu(self.conv3(x2, edge_index, edge_weight))
        x3 = F.dropout(x3, p=0.2, training=self.training)
        x = torch.cat([x1, x2, x3], dim=-1)
        x = self.lin(x)
        return x.log_softmax(dim=-1)


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net(hidden_channels=512).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
use_normalization=False


def train():
    model.train()
    model.set_aggr('add' if use_normalization else 'mean')

    total_loss = total_examples = 0
    for data in loader:
        data = data.to(device)
        optimizer.zero_grad()

        if use_normalization:
            edge_weight = data.edge_norm * data.edge_weight
            out = model(data.x.float(), data.edge_index.long(), edge_weight)
            loss = F.nll_loss(out, data.y.long(), reduction='none')
            loss = (loss * data.node_norm)[data.is_train].sum()
        else:
            out = model(data.x.float(), data.edge_index.long())
            loss = F.nll_loss(out[data.is_train], data.y[data.is_train].long())

        loss.backward()
        optimizer.step()
        total_loss += loss.item() * data.num_nodes
        total_examples += data.num_nodes
    return total_loss / total_examples


@torch.no_grad()
def test():
    model.eval()
    model.set_aggr('mean')

    out = model(data.x.to(device).float(), data.edge_index.to(device).long())
    pred = out.argmax(dim=-1)
    correct = pred.eq(data.y.to(device))

    accs = []
    for _, mask in data('is_train', 'is_test'):
        accs.append(correct[mask].sum().item() / mask.sum().item())
    return accs

In [None]:
for epoch in range(1, 51):
    loss = train()
    accs = test()
    print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}, Train: {accs[0]:.4f}, Test: {accs[1]:.4f}')

In [None]:
acc = test()
acc