In [6]:
import numpy as np
import torch
import torch.nn.functional as F
from tqdm.auto import tqdm
from torch_geometric.data import Data
from model.GCN import GCNClassifier

In [2]:
def load_data(path: str) -> Data:
    npz_data = np.load(path, allow_pickle=True)

    x_np            = npz_data['x']                   # shape: (num_nodes, 17)
    y_np            = npz_data['y']                   # node labels (integers)
    edge_index_np   = npz_data['edge_index']          # shape: (num_edges, 2)
    edge_type_np    = npz_data['edge_type']
    edge_time_np    = npz_data['edge_timestamp']

    train_mask_np   = npz_data['train_mask']
    valid_mask_np   = npz_data['valid_mask']
    test_mask_np    = npz_data['test_mask']

    x = torch.tensor(x_np, dtype=torch.float)
    y = torch.tensor(y_np, dtype=torch.long)

    edge_index = torch.tensor(edge_index_np, dtype=torch.long).t().contiguous()

    edge_type = torch.tensor(edge_type_np, dtype=torch.long)
    edge_timestamp = torch.tensor(edge_time_np, dtype=torch.float)

    train_mask = torch.tensor(train_mask_np, dtype=torch.bool)
    valid_mask = torch.tensor(valid_mask_np, dtype=torch.bool)
    test_mask  = torch.tensor(test_mask_np, dtype=torch.bool)

    return Data(
        x=x,
        y=y,
        edge_index=edge_index,
        edge_type=edge_type,
        edge_timestamp=edge_timestamp,
        train_mask=train_mask,
        val_mask=valid_mask,   # note: naming convention may vary (valid_mask vs. val_mask)
        test_mask=test_mask
    )

In [4]:
data = load_data('data/dgraph/dgraphfin.npz')
len(data)

8

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data = data.to(device)

In [7]:
model = GCNClassifier(data.num_node_features, hidden_dim=64, num_classes=2).to(device)

In [8]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

In [None]:
model.train()
n_epochs = 128
for epoch in tqdm(range(n_epochs), desc="Training"):
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    
    # Create a mask to train only on the training set nodes.
    # Optionally, combine with a condition to include only nodes with label 0 (normal) or 1 (fraud).
    train_only_mask = data.train_mask & ((data.y == 0) | (data.y == 1))
    
    loss = F.nll_loss(out[train_only_mask], data.y[train_only_mask])
    loss.backward()
    optimizer.step()

Training:   0%|          | 0/128 [00:00<?, ?it/s]

RuntimeError: The size of tensor a (857899) must match the size of tensor b (3700550) at non-singleton dimension 0