In [1]:
import pyTigerGraph as tg
import numpy as np
import pandas as pd

conn = tg.TigerGraphConnection("http://3.22.188.182", graphname="KDD_2022_NFT")

In [2]:
splitter = conn.gds.vertexSplitter(v_types=["Transaction"], train=0.8, test=0.2)

KeyboardInterrupt: 

In [None]:
splitter.run()

Splitting vertices...
Vertex split finished successfully.


In [None]:
%%writefile ./seller_pagerank.gsql

CREATE QUERY seller_pagerank(BOOL print_accum = FALSE, STRING result_attr = "") {
    transactions = {Transaction.*};
    SumAccum<DOUBLE> @seller_pr;


    res = SELECT t FROM transactions:t -(NFT_SOLD_BY)-> NFT_User:u 
          ACCUM
            t.@seller_pr += u.pagerank
          POST-ACCUM
            IF result_attr != "" THEN
                t.setAttr(result_attr, t.@seller_pr)
            END;
    IF print_accum THEN
      PRINT res[res.@seller_pr];
    END;
}

Overwriting ./seller_pagerank.gsql


In [None]:
featurizer = conn.gds.featurizer()

In [None]:
featurizer.installAlgorithm("seller_pagerank", query_path="./seller_pagerank.gsql")

'seller_pagerank'

In [None]:
params = {"result_attr": "seller_pr"}

try:
    featurizer.runAlgorithm("seller_pagerank", params, feat_name="seller_pr", feat_type="DOUBLE", custom_query=True, schema_name=["Transaction"])
except ConnectionError:
    featurizer.runAlgorithm("seller_pagerank", params)

In [None]:
%%writefile ./buyer_pagerank.gsql

CREATE QUERY buyer_pagerank(BOOL print_accum = FALSE, STRING result_attr = "") {
    transactions = {Transaction.*};
    SumAccum<DOUBLE> @buyer_pr;


    res = SELECT t FROM transactions:t -(NFT_BOUGHT_BY)-> NFT_User:u 
          ACCUM
            t.@buyer_pr += u.pagerank
          POST-ACCUM
            IF result_attr != "" THEN
                t.setAttr(result_attr, t.@buyer_pr)
            END;
    IF print_accum THEN
      PRINT res[res.@buyer_pr];
    END;
}

Overwriting ./buyer_pagerank.gsql


In [None]:
featurizer.installAlgorithm("buyer_pagerank", query_path="./buyer_pagerank.gsql")

'buyer_pagerank'

In [None]:
params = {"result_attr": "buyer_pr"}

try:
    featurizer.runAlgorithm("buyer_pagerank", params, feat_name="buyer_pr", feat_type="DOUBLE", custom_query=True, schema_name=["Transaction"])
except ConnectionError:
    featurizer.runAlgorithm("buyer_pagerank", params)

In [None]:
%%writefile ./kcore_size.gsql

CREATE QUERY kcore_size(BOOL print_accum = FALSE, STRING result_attr = "") FOR GRAPH KDD_2022_NFT { 
  MapAccum<INT, SumAccum<INT>> @@kcore_size;
  
  trans = {Transaction.*};
  
  res = SELECT t FROM trans:t POST-ACCUM @@kcore_size += (t.k_core -> 1);
  
  IF print_accum THEN
    PRINT @@kcore_size;
  END;
  
  IF result_attr != "" THEN
    res = SELECT t FROM trans:t POST-ACCUM t.setAttr(result_attr, @@kcore_size.get(t.k_core));
  END;
}

Writing ./kcore_size.gsql


In [None]:
featurizer.installAlgorithm("kcore_size", query_path="./kcore_size.gsql")

Installing and optimizing the queries, it might take a minute


'kcore_size'

In [None]:
params = {"result_attr": "kcore_size"}

try:
    featurizer.runAlgorithm("kcore_size", params, feat_name="kcore_size", feat_type="INT", custom_query=True, schema_name=["Transaction"])
except ConnectionError:
    featurizer.runAlgorithm("kcore_size", params, custom_query=True)

In [None]:
tmp = conn.getSchema(force=True)

In [None]:
train_loader = conn.gds.vertexLoader(
    attributes={"Transaction": ["kcore_size", "usd_price", "seller_pr", "buyer_pr"]},
    filter_by="train",
    batch_size=128
)

Installing and optimizing queries. It might take a minute if this is the first time you use this loader.
Query installation finished.


In [None]:
import torch

nn = torch.nn.Sequential(
    torch.nn.Linear(3, 1000),
    torch.nn.ReLU(),
    torch.nn.Linear(1000, 100),
    torch.nn.ReLU(),
    torch.nn.Linear(100, 10),
    torch.nn.ReLU(),
    torch.nn.Linear(10, 1)
)

from torch.optim import Adam

opt = Adam(nn.parameters(), lr=0.01)
loss = torch.nn.SmoothL1Loss()
mae = torch.nn.L1Loss()

In [None]:
def r2_loss(output, target):
    target_mean = torch.mean(target)
    ss_tot = torch.sum((target - target_mean) ** 2)
    ss_res = torch.sum((target - output) ** 2)
    r2 = 1 - ss_res / ss_tot
    return r2

In [None]:
import numpy as np

In [None]:
for i in range(200):
    epoch_loss = 0
    epoch_mae = 0
    epoch_r2 = 0
    for batch in train_loader:
        X = torch.tensor(batch["Transaction"][["kcore_size", "seller_pr", "buyer_pr", "categoryOneHot", "collectionOneHot"]].values.astype(np.float32))
        y = torch.tensor(batch["Transaction"]["usd_price"].values.astype(np.float32))
        out = nn(X).flatten()
        loss_val = loss(out, y)
        opt.zero_grad()
        loss_val.backward()
        opt.step()
        epoch_loss += loss_val.item()
        epoch_mae += mae(out, y).item()
        epoch_r2 += r2_loss(out, y).item()
    print("Loss:", epoch_loss/train_loader.num_batches, "MAE:", epoch_mae/train_loader.num_batches, "R2:", epoch_r2/train_loader.num_batches)

Loss: 348.6691375125772 MAE: 349.10870423535454 R2: -418.2236720564552
Loss: 143.34268744743738 MAE: 143.80907596732087 R2: -0.04300043910661476
Loss: 143.3426495986486 MAE: 143.80939821412943 R2: -0.042974666122477655
Loss: 143.34267037641007 MAE: 143.80941925408706 R2: -0.04297420952840635
Loss: 143.34269116702427 MAE: 143.80943305023277 R2: -0.04297374554400174
Loss: 143.3427050248632 MAE: 143.8094446794042 R2: -0.04297329858949563
Loss: 143.34271578775903 MAE: 143.80945503615305 R2: -0.04297293614184439
Loss: 143.34272531422002 MAE: 143.8094629354554 R2: -0.04297266462742479
Loss: 143.34273096943159 MAE: 143.80946885543372 R2: -0.04297245127171519
Loss: 143.34273438312295 MAE: 143.8094738731487 R2: -0.042972284185918516
Loss: 143.34273967074577 MAE: 143.80947605554627 R2: -0.042972177829382556
Loss: 143.3427403879294 MAE: 143.8094779423305 R2: -0.04297208753878858
Loss: 143.34274287879308 MAE: 143.80948037407148 R2: -0.042972031307991626
Loss: 143.34274584520858 MAE: 143.8094813354

KeyboardInterrupt: 

In [None]:
test_loader = conn.gds.vertexLoader(
    attributes={"Transaction": ["kcore_size", "usd_price", "seller_pr", "buyer_pr"]},
    filter_by="test",
    batch_size=128
)

In [None]:
mae_sum = 0
r2_sum = 0
for batch in test_loader:
    X = torch.tensor(batch["Transaction"][["kcore_size", "seller_pr", "buyer_pr"]].values.astype(np.float32))
    y = torch.tensor(batch["Transaction"]["usd_price"].values.astype(np.float32))
    with torch.no_grad():
        out = nn(X).flatten()
        mae_sum += mae(out, y).item()
        r2_sum += r2_loss(out, y).item()
print("MAE:", mae_sum/test_loader.num_batches, "R2:", r2_sum/test_loader.num_batches)

MAE: 92.19558295797795 R2: -0.0536285676854722


In [None]:
interpretLoader = conn.gds.vertexLoader(
    attributes={"Transaction": ["kcore_size", "usd_price", "seller_pr", "buyer_pr"]},
    filter_by="test",
    num_batches=1
)

In [None]:
X = torch.tensor(interpretLoader.data["Transaction"][["kcore_size", "seller_pr", "buyer_pr"]].values.astype(np.float32))
y = torch.tensor(interpretLoader.data["Transaction"]["usd_price"].values.astype(np.float32))

## Interpret Model with Captum
https://captum.ai/tutorials/House_Prices_Regression_Interpret

In [None]:
# imports from captum library
from captum.attr import LayerConductance, LayerActivation, LayerIntegratedGradients
from captum.attr import IntegratedGradients, DeepLift, GradientShap, NoiseTunnel, FeatureAblation

In [None]:
ig = IntegratedGradients(nn)
ig_nt = NoiseTunnel(ig)
dl = DeepLift(nn)
#gs = GradientShap(nn)
fa = FeatureAblation(nn)

ig_attr_test = ig.attribute(X, n_steps=50)
ig_nt_attr_test = ig_nt.attribute(X)
dl_attr_test = dl.attribute(X)
#gs_attr_test = gs.attribute(X, X_train)
fa_attr_test = fa.attribute(X)

In [None]:
import matplotlib.pyplot as plt

feature_names = ["kcore_size", "seller_pr", "buyer_pr"]

# prepare attributions for visualization

x_axis_data = np.arange(X.shape[1])
x_axis_data_labels = list(map(lambda idx: feature_names[idx], x_axis_data))

ig_attr_test_sum = ig_attr_test.detach().numpy().sum(0)
ig_attr_test_norm_sum = ig_attr_test_sum / np.linalg.norm(ig_attr_test_sum, ord=1)

ig_nt_attr_test_sum = ig_nt_attr_test.detach().numpy().sum(0)
ig_nt_attr_test_norm_sum = ig_nt_attr_test_sum / np.linalg.norm(ig_nt_attr_test_sum, ord=1)

dl_attr_test_sum = dl_attr_test.detach().numpy().sum(0)
dl_attr_test_norm_sum = dl_attr_test_sum / np.linalg.norm(dl_attr_test_sum, ord=1)

fa_attr_test_sum = fa_attr_test.detach().numpy().sum(0)
fa_attr_test_norm_sum = fa_attr_test_sum / np.linalg.norm(fa_attr_test_sum, ord=1)

lin_weight = nn.lin1.weight[0].detach().numpy()
y_axis_lin_weight = lin_weight / np.linalg.norm(lin_weight, ord=1)

width = 0.14
legends = ['Int Grads', 'Int Grads w/SmoothGrad','DeepLift', 'Feature Ablation', 'Weights']

plt.figure(figsize=(20, 10))

ax = plt.subplot()
ax.set_title('Comparing input feature importances across multiple algorithms and learned weights')
ax.set_ylabel('Attributions')

FONT_SIZE = 16
plt.rc('font', size=FONT_SIZE)            # fontsize of the text sizes
plt.rc('axes', titlesize=FONT_SIZE)       # fontsize of the axes title
plt.rc('axes', labelsize=FONT_SIZE)       # fontsize of the x and y labels
plt.rc('legend', fontsize=FONT_SIZE - 4)  # fontsize of the legend

ax.bar(x_axis_data, ig_attr_test_norm_sum, width, align='center', alpha=0.8, color='#eb5e7c')
ax.bar(x_axis_data + width, ig_nt_attr_test_norm_sum, width, align='center', alpha=0.7, color='#A90000')
ax.bar(x_axis_data + 2 * width, dl_attr_test_norm_sum, width, align='center', alpha=0.6, color='#34b8e0')
ax.bar(x_axis_data + 4 * width, fa_attr_test_norm_sum, width, align='center', alpha=1.0, color='#49ba81')
ax.bar(x_axis_data + 5 * width, y_axis_lin_weight, width, align='center', alpha=1.0, color='grey')
ax.autoscale_view()
plt.tight_layout()

ax.set_xticks(x_axis_data + 0.5)
ax.set_xticklabels(x_axis_data_labels)

plt.legend(legends, loc=3)
plt.show()