In [1]:
import warnings

warnings.filterwarnings('ignore')

In [2]:
import networkx as nx
import networkit as nk
import numpy as np
import polars as pl
import pandas as pd

In [3]:
import torch.nn as nn
import torch

In [4]:
import polars as pl
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import igraph as ig
from torch_geometric.loader import NeighborLoader
import tqdm
from multiprocessing import Pool

In [5]:
from sklearn.metrics import average_precision_score

In [6]:
import os
import sys

sys.path.append('../')
sys.path.append('/Users/phamminhlong/Desktop/paper')

In [7]:
ROOT = os.getcwd()
WORK_DIR = os.path.dirname(os.path.dirname(ROOT))

## ***Test code***

In [8]:
FIRST_FEAT_NAME = {
        'column_1': 'transid',
        'column_2': 'time_steps',
}

In [9]:
df_classes = pl.read_csv(os.path.join(WORK_DIR, 'data/elliptic_bitcoin_dataset/elliptic_txs_classes.csv'), new_columns=['transid', 'class'])
df_edgelist = pl.read_csv(os.path.join(WORK_DIR, 'data/elliptic_bitcoin_dataset/elliptic_txs_edgelist.csv'), new_columns=['current_transid', 'next_transid'])
df_features = pl.read_csv(os.path.join(WORK_DIR, 'data/elliptic_bitcoin_dataset/elliptic_txs_features.csv'), has_header=False)

In [10]:
from src.service.data_loader import EllipticLoader
from src.service.graph_model.gat import GAT

In [11]:
e = EllipticLoader(
    path_classes='data/elliptic_bitcoin_dataset/elliptic_txs_classes.csv',
    path_edgelist='data/elliptic_bitcoin_dataset/elliptic_txs_edgelist.csv',
    path_features='data/elliptic_bitcoin_dataset/elliptic_txs_features.csv'   
)

In [12]:
hidden_dim=64
embedding_dim=128
n_layers=3
n_features=166
output_dim=2
dropout_rate=0.5
heads=5
batch_size=128
lr=1e-4
epochs=500

In [13]:
gat = GAT(
    num_features=n_features,
    hidden_dim=hidden_dim,
    embedding_dim=embedding_dim,
    output_dim=output_dim,
    n_layers=n_layers,
    heads=heads,
    dropout_rate=dropout_rate
)

In [14]:
data = e.load()

: 

In [None]:
data.get_network_torch()

Data(x=[203769, 166], edge_index=[2, 234355], y=[203769], train_mask=[203769], val_mask=[203769], test_mask=[203769])

In [None]:
Data(x=[203769, 93], edge_index=[2, 468710], y=[203769], train_mask=[203769], val_mask=[203769], test_mask=[203769])

In [None]:
loader = NeighborLoader(
                data=data.get_network_torch(), 
                num_neighbors=[-1]*gat.n_layers, 
                input_nodes=data.train_mask, 
                batch_size=batch_size, 
                shuffle=True, 
                num_workers=Pool()._processes
            )

In [None]:
device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')

In [None]:
losses = {}

for epoch in range(epochs):
    
    running_loss = 0
    ap_score = 0
    j = 0
    
    gat.train()
    optimizer = torch.optim.SGD(gat.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    for r, i in enumerate(loader):
        optimizer.zero_grad()
        out, h = gat(i.x, i.edge_index)
        y_hat = out[:i.batch_size]
        y = i.y[:i.batch_size]
        loss = criterion(y_hat, y)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        j = r
        
        
    ap_score = average_precision_score(
                    y_true=y.cpu().detach().numpy(), 
                    y_score=y_hat.cpu().detach().numpy()[:,1]
            )  
    
    print('Epoch: ', epoch)
    print('Loss: ', running_loss / j)

    losses[epoch] = running_loss / j
    # print('AP Score: ', ap_score)

Epoch:  0
Loss:  0.7471282976923637
Epoch:  1
Loss:  0.7125783718905403
Epoch:  2
Loss:  0.6800944562097198
Epoch:  3
Loss:  0.6525072232612128
Epoch:  4
Loss:  0.6274748852530729
Epoch:  5
Loss:  0.6102422536576836
Epoch:  6
Loss:  0.5934375588176319
Epoch:  7
Loss:  0.5800272200292754
Epoch:  8
Loss:  0.5687287776794249
Epoch:  9
Loss:  0.5600175718659336
Epoch:  10
Loss:  0.5515109537874611
Epoch:  11
Loss:  0.5448348857824085
Epoch:  12
Loss:  0.5375347028947571
Epoch:  13
Loss:  0.5325491557132851
Epoch:  14
Loss:  0.5278454815010423
Epoch:  15
Loss:  0.5222186880204284
Epoch:  16
Loss:  0.5185443016799908
Epoch:  17
Loss:  0.514062647535963
Epoch:  18
Loss:  0.5107584369992747
Epoch:  19
Loss:  0.507371376440363
Epoch:  20
Loss:  0.5038025954973351
Epoch:  21
Loss:  0.5016409126878942
Epoch:  22
Loss:  0.4992404878139496
Epoch:  23
Loss:  0.497137123375263
Epoch:  24
Loss:  0.49391878403506234
Epoch:  25
Loss:  0.49189841385605265
Epoch:  26
Loss:  0.490622833226491
Epoch:  27
Lo

In [None]:
torch.save({
            'model_state_dict': gat.state_dict(),
            }, './gat.pt')

In [None]:
checkpoint = torch.load('./gat.pt')
gat.load_state_dict(
    checkpoint['model_state_dict']
)

<All keys matched successfully>

In [None]:
network = data.get_network_torch()

In [None]:
n_random_samples = 1000

In [None]:
from src.utils.utils import resample_testmask
from sklearn.metrics import roc_auc_score

gat.to(device=device)
gat.eval()

ra_list = []
ap_list = []
      
for _ in tqdm.tqdm(range(n_random_samples), colour='green', desc='Testing: '):
    random_test_mark = resample_testmask(data.test_mask)
    
    out, h = gat(network.x.to(device=device), network.edge_index.to(device=device))
        
    y_hat = out[random_test_mark]
    y = network.y[random_test_mark]
    
    ra_score = roc_auc_score(
        y.cpu().detach().numpy(), 
        y_hat.cpu().detach().numpy()[:,1]
    )
    ap_score = average_precision_score(
        y.cpu().detach().numpy(), 
        y_hat.cpu().detach().numpy()[:,1]
    )
    
    ra_list.append(ra_score)
    ap_list.append(ap_score)