In [7]:
import torch

from torch_frame.data import DataLoader
from torch_frame import stype
from torch_frame.nn import (
    EmbeddingEncoder,
    LinearEncoder,
    TimestampEncoder,
)

from src.datasets import IBMTransactionsAML
from src.nn.models import FTTransformerGINeFused

from icecream import ic
import sys

In [8]:
seed = 42
batch_size = 200
lr = 5e-4
eps = 1e-8
epochs = 3

compile = False
data_split = [0.6, 0.2, 0.2]
split_type = 'temporal'

khop_neighbors = [100, 100]
pos_sample_prob = 0.15
num_neg_samples = 64
channels = 32

pretrain = 'lp'

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
dataset = IBMTransactionsAML(
    root='/mnt/data/ibm-transactions-for-anti-money-laundering-aml/HI-Small_Trans-c.csv', 
    pretrain=pretrain, 
    split_type=split_type, 
    splits=data_split, 
    khop_neighbors=khop_neighbors
)
ic(dataset)
dataset.materialize()
dataset.df.head(5)
train_dataset, val_dataset, test_dataset = dataset.split()
ic(len(train_dataset), len(val_dataset), len(test_dataset))

In [None]:
# ic(dataset.feat_cols)
dataset.materialize()
is_classification = dataset.task_type.is_classification

train_dataset, val_dataset, test_dataset = dataset.split()
train_tensor_frame = train_dataset.tensor_frame
train_loader = DataLoader(train_tensor_frame, batch_size=batch_size, shuffle=True)
example = next(iter(train_loader))

numerical_encoder = LinearEncoder()
stype_encoder_dict = {
    stype.categorical: EmbeddingEncoder(),
    stype.numerical: numerical_encoder
}

if is_classification:
    output_channels = dataset.num_classes
else:
    output_channels = 1

model = FTTransformerGINeFused(
    num_node_features=dataset.num_node_features,
    num_edge_features=dataset.num_edge_features,
    num_layers=3,
    num_channels=channels,
    num_classes=output_channels,
    stype_encoder_dict=stype_encoder_dict,
    timestamp_encoder=TimestampEncoder(),
)

pred = model(example)
ic(example.y)
ic(pred[0])
ic(pred[1])