In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import torch.nn.functional as F
from torch_frame import stype
from torch_frame.data import DataLoader
from torch_frame.nn import (
    EmbeddingEncoder,
    LinearEncoder,
    TimestampEncoder,
)
from tqdm import tqdm

import sys
from icecream import ic

In [3]:
seed = 42
batch_size = 512
channels = 256
num_layers = 4

pretrain = True
compile = True
lr = 1e-3
epochs = 10

In [4]:
from torch_frame.datasets import IBMTransactionsAML
dataset = IBMTransactionsAML(root='/mnt/data/ibm-transactions-for-anti-money-laundering-aml/dummy.csv', pretrain=pretrain)
ic(dataset)
dataset.materialize()
num_numerical = len(dataset.tensor_frame.col_names_dict[stype.numerical])
num_categorical = len(dataset.tensor_frame.col_names_dict[stype.categorical])
dataset.df.head(5)

ic| dataset: IBMTransactionsAML()
ic| list(self._col_names_dict[stype.numerical]) + list(self._col_names_dict[stype.categorical]): ['Amount Paid',
                                                                                                  'Amount Received',
                                                                                                  'From Bank',
                                                                                                  'From ID',
                                                                                                  'Payment Currency',
                                                                                                  'Payment Format',
                                                                                                  'Receiving Currency',
                                                                                                  'To Bank',
                                                     

Unnamed: 0,Timestamp,From Bank,From ID,To Bank,To ID,Amount Received,Receiving Currency,Amount Paid,Payment Currency,Payment Format,Is Laundering,MASK,split
0,1200,B_10,8000EBD30,B_10,8000EBD30,,US Dollar,3697.34,US Dollar,Reinvestment,0,"[3697.34, 1]",0
1,1200,B_3208,8000F4580,B_1,8000F5340,0.01,US Dollar,0.01,,Cheque,0,"[0, 4]",0
2,0,B_3209,8000F4670,B_3209,8000F4670,14675.57,US Dollar,14675.57,US Dollar,,0,"[0, 5]",0
3,120,B_12,8000F5030,B_12,8000F5030,2806.97,,2806.97,US Dollar,Reinvestment,0,"[0, 6]",0
4,360,B_10,8000F5200,B_10,8000F5200,36682.97,US Dollar,36682.97,,Reinvestment,0,"[0, 4]",0


In [5]:
torch.manual_seed(seed)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [6]:
train_dataset, val_dataset, test_dataset = dataset.split()

In [7]:
train_tensor_frame = train_dataset.tensor_frame
val_tensor_frame = val_dataset.tensor_frame
test_tensor_frame = test_dataset.tensor_frame
train_loader = DataLoader(train_tensor_frame, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_tensor_frame, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_tensor_frame, batch_size=batch_size, shuffle=False)
ic(len(train_loader), len(val_loader), len(test_loader))

ic| len(train_loader): 977, len(val_loader): 1, len(test_loader): 1


(977, 1, 1)

In [8]:
# print an example batch
ic(next(iter(train_loader)).feat_dict)
ic(next(iter(train_loader)).y)

ic| next(iter(train_loader))

.feat_dict: {<stype.numerical: 'numerical'>: tensor([[84261.8672, 84261.8672],
                                                [  206.3000,   206.3000],
                                                [  327.1900,        nan],
                                                ...,
                                                [       nan, 31433.6992],
                                                [ 9969.0400,  9969.0400],
                                                [21614.0293, 21614.0293]]),
                                         <stype.categorical: 'categorical'>: tensor([[    36,  29817,     -1,  ...,      1,     37, 102771],
                                                [     0,      0,      0,  ...,      0,     72,  16904],
                                                [   348,  30276,      1,  ...,      1,    348, 131620],
                                                ...,
                                                [   508,  33812,      0,  ...,      0,    508,

tensor([[0.0000e+00, 5.0000e+00],
        [3.0000e+00, 5.0000e+00],
        [0.0000e+00, 4.0000e+00],
        ...,
        [2.0000e+00, 4.0000e+00],
        [2.3546e+04, 0.0000e+00],
        [2.0000e+00, 4.0000e+00]])

In [9]:
stype_encoder_dict = {
    stype.categorical: EmbeddingEncoder(),
    stype.numerical: LinearEncoder(),
    stype.timestamp: TimestampEncoder(),
}

In [20]:

from models.ft_transformer import FTTransformer 
model = FTTransformer(
    channels=channels,
    out_channels=None,
    num_layers=num_layers,
    col_stats=dataset.col_stats,
    col_names_dict=train_tensor_frame.col_names_dict,
    stype_encoder_dict=stype_encoder_dict,
    pretrain = pretrain
).to(device)

model = torch.compile(model, dynamic=True) if compile else model
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)

def calc_loss(pred, y):
    ic(pred[0].shape[1], num_numerical)
    ic(len(pred[1]), num_categorical)
    ic(pred[1][0].shape)
    sys.exit()
    assert len(pred[0].shape[1]) == num_numerical
    assert len(pred[1]) == num_categorical
    ic(pred[0])
    ic(pred[1])

    # ic(y)
    # ic(len(pred[1]))

    accum_n = accum_c = t_n = t_c = 0
    for i, ans in enumerate(y):
        # ans --> [val, idx]
        # pred --> feature_type_num X type_num X batch_size
        if ans[1] > (num_numerical-1):
            t_c += 1
            a = torch.tensor(int(ans[0])).to(device)
            accum_c += F.cross_entropy(pred[1][int(ans[1])-num_numerical][i], a)
            del a
        else:
            t_n += 1
            # ic(i, ans)
            # ic(ans[0], pred[0][i][int(ans[1])])
            # sys.exit()
            accum_n += pred[0][i][int(ans[1])] - ans[0] #l1
    return (accum_n + accum_c) / (t_c + t_n), (accum_c, t_c), (accum_n, t_n) # len(y)

def train(epoc: int) -> float:
    model.train()
    loss_accum = loss_c_accum = loss_n_accum = total_count = t_c = t_n = 0

    with tqdm(train_loader, desc=f'Epoch {epoc}') as t:
        for tf in t:
            tf = tf.to(device)
            pred = model(tf)
            loss, loss_c, loss_n = calc_loss(pred, tf.y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_accum += float(loss) * len(tf.y)
            loss_c_accum += loss_c[0]
            loss_n_accum += loss_n[0]
            total_count += len(tf.y)
            t_c += loss_c[1]
            t_n += loss_n[1]
            t.set_postfix(loss=f'{loss_accum/total_count:.4f}', loss_c = f'{loss_c_accum/t_c:.4f}', loss_n = f'{loss_n_accum/t_n:.4f}')
    return loss_c_accum / t_c
    return loss_accum / total_count

@torch.no_grad()
def test(loader: DataLoader) -> float:
    model.eval()
    accum_acc = accum_l1 = 0
    t_n = t_c = 0
    with tqdm(loader, desc=f'Evaluating') as t:
        for tf in t:
            tf = tf.to(device)
            pred = model(tf)
            
            for i, ans in enumerate(tf.y):
                # ans --> [val, idx]
                # pred --> feature_type_num X type_num X batch_size
                #ic(int(ans[1]), num_numerical)
                if ans[1] > (num_numerical-1):
                    #ic(pred[1][int(ans[1])-num_numerical][i].argmax(), int(ans[0]))
                    accum_acc += (pred[1][int(ans[1])-num_numerical][i].argmax() == int(ans[0]))
                    t_c += 1
                else:
                    accum_l1 += ans[0] - pred[0][i][int(ans[1])] #l1
                    t_n += 1

            t.set_postfix(accuracy=f'{accum_acc/t_c:.4f}', l1=f'{accum_l1/t_n:.4f}')

        accuracy = accum_acc / t_c
        l1 = accum_l1 / t_n
        return [l1, accuracy]

In [21]:
for epoch in range(1, epochs + 1):
    train_loss = train(epoch)
    train_metric = test(train_loader)
    val_metric = test(val_loader)
    test_metric = test(test_loader)
    #ic(train_loss, train_metric, val_metric, test_metric)

Epoch 1:   0%|          | 0/977 [00:00<?, ?it/s]

ic| pred[0].shape[1]: 2, num_numerical: 2
ic| len(pred[1]): 7, num_categorical: 7
ic| pred[1][0].shape: torch.Size([512, 16606])
Epoch 1:   0%|          | 0/977 [00:07<?, ?it/s]


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
