In [1]:
import os
from os import makedirs
from os.path import join, basename
import numpy as np
import torch
import random
from args import define_new_main_parser
import json

from transformers import Trainer, TrainingArguments, EarlyStoppingCallback

from dataset.aave import AaveDataset
from dataset.aave_time_static import AaveWithTimePosAndStaticSplitDataset
from dataset.aave_time_pos import AaveWithTimePosDataset
from dataset.aave_static import AaveWithStaticSplitDataset
from models.modules import TabFormerBertLM, TabFormerBertForClassification, TabFormerBertModel, TabStaticFormerBert, \
    TabStaticFormerBertLM, TabStaticFormerBertClassification
from misc.utils import ordered_split_dataset, compute_cls_metrics
from dataset.datacollator import *
from main_aave import main

import logging

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def setup_logging(output_dir="output_aave", log_file_name='output.log'):
    log_dir = join(output_dir, "logs")
    makedirs(output_dir, exist_ok=True)
    makedirs(log_dir, exist_ok=True)
    log_file = join(log_dir, log_file_name)

    logger = logging.getLogger()

    if logger.hasHandlers():
        logger.handlers.clear()

    fhandler = logging.FileHandler(log_file)
    fhandler.setLevel(logging.DEBUG)

    chandler = logging.StreamHandler()
    chandler.setLevel(logging.DEBUG)

    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    fhandler.setFormatter(formatter)
    chandler.setFormatter(formatter)

    logger.addHandler(fhandler)
    logger.addHandler(chandler)
    logger.setLevel(logging.DEBUG)

    return logger

logger = setup_logging(output_dir="output_aave")


logger.info("Logging setup completed.")

2025-01-13 15:34:22,479 - root - INFO - Logging setup completed.


In [3]:
data="/data/IDEA_DeFi_Research/LTM/Data/Lending_Protocols/Aave/V2/Mainnet" 
dt="Aave"
exp_name="debug"
time_pos_type="regular_position"
fextension= False
fname="transactions_train_user_market_exoLagged"
val_fname="transactions_val"
test_fname="transactions_test_user_market_exoLagged"
bs=32
field_hs = 64 # hidden state dimension of the transformer (default: 768)
seq_len = 25 # length for transaction sliding window
stride = 10 # stride for transaction sliding window
num_train_epochs=10
save_steps=100
eval_steps=100
external_val=False
output_dir=f"{data}/output/{exp_name}"
checkpoint=None
nrows=10000
vocab_dir=f"{data}/vocab"

In [4]:
arg_str = f"--do_train \
    --mlm \
    --pad_seq_first \
    --get_rids \
    --field_ce \
    --lm_type bert \
    --field_hs {field_hs} \
    --data_type {dt} \
    --seq_len {seq_len} \
    --stride {stride} \
    --num_train_epochs {num_train_epochs} \
    --data_root {data}/ \
    --train_batch_size {bs} \
    --eval_batch_size {bs} \
    --save_steps {save_steps} \
    --eval_steps {eval_steps} \
    --data_fname {fname} \
    --data_val_fname {val_fname} \
    --data_test_fname {test_fname} \
    --output_dir {output_dir} \
    --time_pos_type {time_pos_type} \
    --vocab_dir {vocab_dir} \
    --nrows {nrows} \
    --vocab_cached \
    --encoder_cached \
    --cached \
    "
   # 
if fextension:
    arg_str += f"--fextension {fextension} \
    --external_vocab_path {data}/vocab_ob_{fextension}"
else:
    arg_str += f"--external_vocab_path {data}/vocab/vocab_ob"
if external_val:
    arg_str += f"\
    --external_val"
if checkpoint is not None:
    arg_str += f"\
    --checkpoint {checkpoint}"

In [5]:
parser = define_new_main_parser(data_type_choices=["Aave", "Aave_time_pos", "Aave_time_static", "Aave_static"])
print(arg_str)

--do_train     --mlm     --pad_seq_first     --get_rids     --field_ce     --lm_type bert     --field_hs 64     --data_type Aave     --seq_len 25     --stride 10     --num_train_epochs 10     --data_root /data/IDEA_DeFi_Research/LTM/Data/Lending_Protocols/Aave/V2/Mainnet/     --train_batch_size 32     --eval_batch_size 32     --save_steps 100     --eval_steps 100     --data_fname transactions_train_user_market_exoLagged     --data_val_fname transactions_val     --data_test_fname transactions_test_user_market_exoLagged     --output_dir /data/IDEA_DeFi_Research/LTM/Data/Lending_Protocols/Aave/V2/Mainnet/output/debug     --time_pos_type regular_position     --vocab_dir /data/IDEA_DeFi_Research/LTM/Data/Lending_Protocols/Aave/V2/Mainnet/vocab     --nrows 10000     --vocab_cached     --encoder_cached     --cached     --external_vocab_path /data/IDEA_DeFi_Research/LTM/Data/Lending_Protocols/Aave/V2/Mainnet/vocab/vocab_ob


In [6]:
opts = parser.parse_args(arg_str.split())
print(opts)

Namespace(jid=1, seed=9, lm_type='bert', flatten=False, field_ce=True, mlm=True, cls_task=False, export_task=False, export_last_only=False, mlm_prob=0.15, freeze=False, data_type='Aave', time_pos_type='regular_position', data_root='/data/IDEA_DeFi_Research/LTM/Data/Lending_Protocols/Aave/V2/Mainnet/', data_fname='transactions_train_user_market_exoLagged', vocab_file='vocab.nb', user_ids=None, vocab_cached=True, external_encoder_fname='./data/preprocessed/transactionsAave_train.encoder_fit.pkl', external_vocab_fname='./data/vocab_ob', nrows=10000, label_category='last_label', nbatches=None, record_file='experiments', output_dir='/data/IDEA_DeFi_Research/LTM/Data/Lending_Protocols/Aave/V2/Mainnet/output/debug', pretrained_dir=None, vocab_dir='/data/IDEA_DeFi_Research/LTM/Data/Lending_Protocols/Aave/V2/Mainnet/vocab', checkpoint=0, do_train=True, do_eval=False, do_prediction=False, save_steps=100, eval_steps=100, num_train_epochs=10, train_batch_size=32, eval_batch_size=32, stride=10, seq

In [None]:
opts.log_dir = join(opts.output_dir, "logs")
makedirs(opts.output_dir, exist_ok=True)
makedirs(opts.log_dir, exist_ok=True)

opts.cls_exp_task = opts.cls_task or opts.export_task

if opts.data_type in ["Aave_time_pos", "Aave_time_static"]:
    assert opts.time_pos_type == 'time_aware_sin_cos_position'
elif opts.data_type in ["Aave", "Aave_static"]:
    assert opts.time_pos_type in ['sin_cos_position', 'regular_position']

if (not opts.mlm) and (not opts.cls_exp_task) and opts.lm_type == "bert":
    raise Exception(
        "Error: Bert needs either '--mlm', '--cls_task' or '--export_task' option. Please re-run with this flag "
        "included.")

main(opts)

2025-01-13 15:34:22,540 - dataset.aave_basic - INFO - cached encoded data is read from transactions_train_user_market_exoLagged.encoded.csv
2025-01-13 15:34:22,885 - dataset.aave_basic - INFO - read data : (10000, 1011)
2025-01-13 15:34:22,898 - dataset.aave_basic - INFO - using cached vocab from /data/IDEA_DeFi_Research/LTM/Data/Lending_Protocols/Aave/V2/Mainnet/vocab/vocab_ob
2025-01-13 15:34:22,933 - dataset.aave - INFO - preparing user level data...
100%|██████████| 18/18 [00:00<00:00, 20.19it/s]
2025-01-13 15:34:24,372 - dataset.aave - INFO - creating transaction samples with vocab
100%|██████████| 18/18 [00:05<00:00,  3.26it/s]
2025-01-13 15:34:29,904 - dataset.aave - INFO - ncols: 1008
2025-01-13 15:34:29,905 - dataset.aave - INFO - no of samples 1011
2025-01-13 15:34:29,993 - main_aave - INFO - vocab size: 11173
2025-01-13 15:34:30,007 - main_aave - INFO - dataset size: 1011
2025-01-13 15:34:30,095 - dataset.aave_basic - INFO - cached encoded data is read from transactions_trai