In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import torch.nn.functional as F
from torch_frame import stype
from torch_frame.data import DataLoader
from torch_frame.nn import (
    EmbeddingEncoder,
    LinearEncoder,
    TimestampEncoder,
)
from tqdm import tqdm

from transformers import get_inverse_sqrt_schedule

import sys
from icecream import ic
import wandb

torch.set_float32_matmul_precision('high')

In [3]:
seed = 42
batch_size = 1024
channels = 256
num_layers = 4

data_split = [0.6, 0.2, 0.2]
split_type = "temporal"

pretrain = 'mask'
compile = True
lr = 5e-4
eps = 1e-8
epochs = 15
args = {
    "testing": False,
    "seed": seed,
    "batch_size": batch_size,
    "channels": channels,
    "num_layers": num_layers,
    "pretrain": pretrain,
    "compile": compile,
    "lr": lr,
    "eps": eps,
    "epochs": epochs,
    "data_split": data_split,
    "split_type": split_type,
}


In [4]:
wandb.login()
run = wandb.init(
    mode="disabled" if args['testing'] else "online",
    project=f"rel-mm", 
    name="model=fttransformer,dataset=IBM-AML_Hi_Sm,objective=MCM,loss=weighted_loss", 
    config=args
)

[34m[1mwandb[0m: Currently logged in as: [33maakyildiz[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [5]:
from src.datasets import IBMTransactionsAML
#dataset = IBMTransactionsAML(root='/mnt/data/ibm-transactions-for-anti-money-laundering-aml/dummy.csv', pretrain=pretrain)
dataset = IBMTransactionsAML(root='/mnt/data/ibm-transactions-for-anti-money-laundering-aml/HI-Small_Trans-c.csv', pretrain=pretrain, split_type='temporal', splits=data_split)
ic(dataset)
dataset.materialize()
num_numerical = len(dataset.tensor_frame.col_names_dict[stype.numerical])
num_categorical = len(dataset.tensor_frame.col_names_dict[stype.categorical])
dataset.df.head(5)

ic| dataset: IBMTransactionsAML()
ic| list(self._col_names_dict[stype.numerical]) + list(self._col_names_dict[stype.categorical]): ['Amount Paid',
                                                                                                  'Amount Received',
                                                                                                  'From Bank',
                                                                                                  'From ID',
                                                                                                  'Payment Currency',
                                                                                                  'Payment Format',
                                                                                                  'Receiving Currency',
                                                                                                  'To Bank',
                                                     

Unnamed: 0,Timestamp,From Bank,From ID,To Bank,To ID,Amount Received,Receiving Currency,Amount Paid,Payment Currency,Payment Format,Is Laundering,MASK,split
0,1200,B_10,8000EBD30,B_10,8000EBD30,,US Dollar,0.296848,US Dollar,Reinvestment,0,"[0.2968476112178767, 1]",0
1,1200,B_3208,8000F4580,B_1,8000F5340,0.000359,,0.000359,US Dollar,Cheque,0,"[0, 6]",0
2,0,B_3209,8000F4670,B_3209,8000F4670,,US Dollar,0.346651,US Dollar,Reinvestment,0,"[0.346650841620288, 1]",0
3,120,B_12,8000F5030,B_12,8000F5030,0.286896,US Dollar,0.286896,,Reinvestment,0,"[0, 4]",0
4,360,B_10,8000F5200,B_10,8000F5200,0.379751,US Dollar,,US Dollar,Reinvestment,0,"[0.3797509348152993, 0]",0


In [6]:
num_columns = num_numerical + num_categorical
ic(
    num_numerical,
    num_categorical,
    num_columns,
)

ic| 

num_numerical: 2, num_categorical: 7, num_columns: 9


(2, 7, 9)

In [7]:
torch.manual_seed(seed)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
wandb.log({"device": str(device)})

In [8]:
train_dataset, val_dataset, test_dataset = dataset.split()

In [9]:
train_tensor_frame = train_dataset.tensor_frame
val_tensor_frame = val_dataset.tensor_frame
test_tensor_frame = test_dataset.tensor_frame
train_loader = DataLoader(train_tensor_frame, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_tensor_frame, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_tensor_frame, batch_size=batch_size, shuffle=False)
ic(len(train_loader), len(val_loader), len(test_loader))
wandb.log({
    "train_loader size": len(train_loader), 
    "val_loader size": len(val_loader), 
    "test_loader size": len(test_loader)
})

ic| len(train_loader): 3173
    len(val_loader): 943
    len(test_loader): 844


In [10]:

stype_encoder_dict = {
    stype.categorical: EmbeddingEncoder(),
    stype.numerical: LinearEncoder(),
    stype.timestamp: TimestampEncoder(),
}

from src.nn.models.ft_transformer import FTTransformer 
model = FTTransformer(
    channels=channels,
    out_channels=None,
    num_layers=num_layers,
    col_stats=dataset.col_stats,
    col_names_dict=train_tensor_frame.col_names_dict,
    stype_encoder_dict=stype_encoder_dict,
    pretrain = pretrain
).to(device)

model = torch.compile(model, dynamic=True) if compile else model
learnable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
ic(learnable_params)
wandb.log({"learnable_params": learnable_params})

# Prepare optimizer and lr scheduler
no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.0},
    {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
    ]
optimizer = torch.optim.AdamW(optimizer_grouped_parameters, lr=lr, eps=eps)
scheduler = get_inverse_sqrt_schedule(optimizer, num_warmup_steps=0, timescale=1000)
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)

def calc_loss(pred, y):
    accum_n = accum_c = t_n = t_c = 0
    for i, ans in enumerate(y):
        # ans --> [val, idx]
        # pred --> feature_type_num X type_num X batch_size
        if ans[1] > (num_numerical-1):
            t_c += 1
            a = torch.tensor(int(ans[0])).to(device)
            accum_c += F.cross_entropy(pred[1][int(ans[1])-num_numerical][i], a)
            del a
        else:
            t_n += 1
            accum_n += torch.square(pred[0][i][int(ans[1])] - ans[0]) #mse
    return (accum_n / t_n) + torch.sqrt(accum_c / t_c), (accum_c, t_c), (accum_n, t_n)

def train(epoc: int) -> float:
    model.train()
    loss_accum = loss_c_accum = loss_n_accum = total_count = t_c = t_n = 0

    with tqdm(train_loader, desc=f'Epoch {epoc}') as t:
        for tf in t:
            tf = tf.to(device)
            pred = model(tf)
            loss, loss_c, loss_n = calc_loss(pred, tf.y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_accum += float(loss) * len(tf.y)
            loss_c_accum += loss_c[0]
            loss_n_accum += loss_n[0]
            total_count += len(tf.y)
            t_c += loss_c[1]
            t_n += loss_n[1]
            t.set_postfix(loss=f'{loss_accum/total_count:.4f}', loss_c = f'{loss_c_accum/t_c:.4f}', loss_n = f'{loss_n_accum/t_n:.4f}')
            del pred
            del tf
        wandb.log({"train_loss": loss_accum/total_count, "train_loss_c": loss_c_accum/t_c, "train_loss_n": loss_n_accum/t_n})
    return ((loss_c_accum/t_c) * (num_categorical/num_columns)) + ((loss_n_accum/t_n) * (num_numerical/num_columns))

@torch.no_grad()
def test(loader: DataLoader, dataset_name) -> float:
    model.eval()
    accum_acc = accum_l2 = 0
    loss_c_accum = loss_n_accum = 0
    t_n = t_c = 0
    with tqdm(loader, desc=f'Evaluating') as t:
        for tf in t:
            tf = tf.to(device)
            pred = model(tf)
            _, loss_c, loss_n = calc_loss(pred, tf.y)
            loss_c_accum += loss_c[0]
            loss_n_accum += loss_n[0]
            t_c += loss_c[1]
            t_n += loss_n[1]
            for i, ans in enumerate(tf.y):
                # ans --> [val, idx]
                # pred --> feature_type_num X type_num X batch_size
                if ans[1] > (num_numerical-1):
                    accum_acc += (pred[1][int(ans[1])-num_numerical][i].argmax() == int(ans[0]))
                else:
                    accum_l2 += torch.square(ans[0] - pred[0][i][int(ans[1])]) #rmse
            
            t.set_postfix(accuracy=f'{accum_acc/t_c:.4f}', rmse=f'{torch.sqrt(accum_l2/t_n):.4f}', loss=f'{(loss_c_accum/t_c) + (loss_n_accum/t_n):.4f}', loss_c = f'{loss_c_accum/t_c:.4f}', loss_n = f'{loss_n_accum/t_n:.4f}')
        wandb.log({f"{dataset_name}_accuracy": accum_acc/t_c, f"{dataset_name}_rmse": torch.sqrt(accum_l2/t_n), f"{dataset_name}_loss": ((loss_c_accum/t_c) * (num_categorical/num_columns)) + ((loss_n_accum/t_n) * (num_numerical/num_columns)), f"{dataset_name}_loss_c": loss_c_accum/t_c, f"{dataset_name}_loss_n": loss_n_accum/t_n})
        del tf
        del pred
        accuracy = accum_acc / t_c
        rmse = torch.sqrt(accum_l2 / t_n)
        return [rmse, accuracy]

ic| learnable_params: 496110191


In [11]:
train_metric = test(train_loader, "train")
val_metric = test(val_loader, "val")
test_metric = test(test_loader, "test")
ic( 
    train_metric, 
    val_metric, 
    test_metric
)
for epoch in range(1, epochs + 1):
    train_loss = train(epoch)
    train_metric = test(train_loader, "train")
    val_metric = test(val_loader, "val")
    test_metric = test(test_loader, "test")
    ic(
        train_loss, 
        train_metric, 
        val_metric, 
        test_metric
    )

Evaluating:   0%|                                                                                | 0/3173 [00:00<?, ?it/s]

Evaluating: 100%|█| 3173/3173 [04:16<00:00, 12.39it/s, accuracy=0.0403, loss=2.7663, loss_c=2.5849, loss_n=0.1814, rmse=0.
Evaluating: 100%|█| 943/943 [01:25<00:00, 10.98it/s, accuracy=0.0401, loss=2.7538, loss_c=2.5791, loss_n=0.1748, rmse=0.41
Evaluating: 100%|█| 844/844 [01:16<00:00, 10.98it/s, accuracy=0.0397, loss=2.7554, loss_c=2.5816, loss_n=0.1738, rmse=0.41
Epoch 1: 100%|█████████████████████████████| 3173/3173 [11:22<00:00,  4.65it/s, loss=0.7505, loss_c=0.5498, loss_n=0.0142]
Evaluating: 100%|█| 3173/3173 [05:40<00:00,  9.31it/s, accuracy=0.8106, loss=0.4770, loss_c=0.4655, loss_n=0.0115, rmse=0.
Evaluating: 100%|█| 943/943 [01:15<00:00, 12.44it/s, accuracy=0.7959, loss=0.4967, loss_c=0.4877, loss_n=0.0090, rmse=0.09
Evaluating: 100%|█| 844/844 [01:07<00:00, 12.47it/s, accuracy=0.7908, loss=0.5071, loss_c=0.4982, loss_n=0.0090, rmse=0.09
ic| train_loss: tensor(0.4308, device='cuda:0', grad_fn=<AddBackward0>)
    train_metric: [tensor(0.1072, device='cuda:0'), tensor(0.8106, 

Process ForkProcess-29:
Process ForkProcess-4:
Process ForkProcess-19:
Process ForkProcess-5:
Process ForkProcess-16:
Process ForkProcess-28:
Process ForkProcess-15:
Process ForkProcess-32:
Process ForkProcess-25:
Process ForkProcess-31:
Process ForkProcess-6:
Process ForkProcess-23:
Process ForkProcess-27:
Process ForkProcess-18:
Process ForkProcess-7:
Process ForkProcess-30:
Process ForkProcess-21:
Process ForkProcess-14:
Process ForkProcess-11:
Process ForkProcess-20:
Process ForkProcess-26:
Process ForkProcess-10:
Process ForkProcess-9:
Process ForkProcess-8:
Process ForkProcess-2:
Process ForkProcess-12:
Process ForkProcess-24:
Process ForkProcess-17:
Process ForkProcess-1:
Process ForkProcess-22:
Process ForkProcess-3:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most rece

KeyboardInterrupt: 

In [12]:
wandb.finish()

[autoreload of src.datasets.ibm_transactions_for_aml failed: Traceback (most recent call last):
  File "/opt/miniconda3/envs/rel-mm/lib/python3.10/site-packages/IPython/extensions/autoreload.py", line 276, in check
    superreload(m, reload, self.old_objects)
  File "/opt/miniconda3/envs/rel-mm/lib/python3.10/site-packages/IPython/extensions/autoreload.py", line 475, in superreload
    module = reload(module)
  File "/opt/miniconda3/envs/rel-mm/lib/python3.10/importlib/__init__.py", line 169, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 619, in _exec
  File "<frozen importlib._bootstrap_external>", line 883, in exec_module
  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
  File "/proj/models-for-relational-multimodal-data/src/datasets/ibm_transactions_for_aml.py", line 10, in <module>
    class IBMTransactionsAML(torch_frame.data.Dataset):
AttributeError: module 'torch_frame' has no attribute 'data'
]


VBox(children=(Label(value='0.421 MB of 0.421 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
learnable_params,▁
test_accuracy,▁████████████████
test_loader size,▁
test_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_loss_c,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_loss_n,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_rmse,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_accuracy,▁▇▇▇▇▇▇██████████
train_loader size,▁
train_loss,█▃▂▃▂▃▂▃▂▃▂▃▁▃▁▃▁▃▁▂▁▂▁▂▁▂▁▂▁▂▁▂▁▂

0,1
device,cuda
learnable_params,496110191
test_accuracy,0.81308
test_loader size,844
test_loss,0.42932
test_loss_c,0.54992
test_loss_n,0.00722
test_rmse,0.08495
train_accuracy,0.91487
train_loader size,3173
