In [1]:
import os
import sys
sys.path.append(os.path.abspath('../'))
from os import makedirs
from os.path import join, basename
import numpy as np
import torch
import random
from arguments import define_new_main_parser
import json

from transformers import Trainer, TrainingArguments, EarlyStoppingCallback

from dataset.aave import AaveDataset
from models.modules import TabFormerBertLM, TabFormerBertForClassification, TabFormerBertModel, TabStaticFormerBert, \
    TabStaticFormerBertLM, TabStaticFormerBertClassification
from misc.utils import ordered_split_dataset, compute_cls_metrics
from dataset.datacollator import *
from main import main

import logging

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def setup_logging(output_dir="logs", log_file_name='output.log'):
    log_dir = join(output_dir, "logs")
    makedirs(output_dir, exist_ok=True)
    makedirs(log_dir, exist_ok=True)
    log_file = join(log_dir, log_file_name)

    logger = logging.getLogger()

    if logger.hasHandlers():
        logger.handlers.clear()

    fhandler = logging.FileHandler(log_file)
    fhandler.setLevel(logging.DEBUG)

    chandler = logging.StreamHandler()
    chandler.setLevel(logging.DEBUG)

    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    fhandler.setFormatter(formatter)
    chandler.setFormatter(formatter)

    logger.addHandler(fhandler)
    logger.addHandler(chandler)
    logger.setLevel(logging.DEBUG)

    return logger

logger = setup_logging(output_dir="logs")


logger.info("Logging setup completed.")

2025-01-22 13:07:29,791 - root - INFO - Logging setup completed.


In [3]:
include_user_features = True
include_time_features = True
include_market_features = True
include_exo_features = True

feature_extension = ""
if include_user_features:
    feature_extension += "_user"
if include_market_features:
    feature_extension += "_market"
if include_time_features:
    feature_extension += "_time"
if include_exo_features:
    feature_extension += "_exoLagged"

In [4]:
data="/data/IDEA_DeFi_Research/LTM/Data/Lending_Protocols/Aave/V2/Mainnet" 
dt="Aave"
exp_name="debug"
time_pos_type="regular_position"
fname = f"transactions{feature_extension}_train"  
val_fname = f"transactions{feature_extension}_val" 
test_fname = f"transactions{feature_extension}_test"  
fextension = False
bs=32
field_hs = 64 # hidden state dimension of the transformer (default: 768)
seq_len = 20 # length for transaction sliding window
stride = 5 # stride for transaction sliding window
num_train_epochs=10
save_steps=100
eval_steps=100
external_val=False
output_dir=f"{data}/output/{exp_name}"
checkpoint=None
nrows=10000
vocab_dir=f"{data}/vocab"

In [5]:
arg_str = f"--do_train \
    --mlm \
    --pad_seq_first \
    --get_rids \
    --field_ce \
    --lm_type bert \
    --field_hs {field_hs} \
    --data_type {dt} \
    --seq_len {seq_len} \
    --stride {stride} \
    --num_train_epochs {num_train_epochs} \
    --data_root {data}/ \
    --train_batch_size {bs} \
    --eval_batch_size {bs} \
    --save_steps {save_steps} \
    --eval_steps {eval_steps} \
    --data_fname {fname} \
    --data_val_fname {val_fname} \
    --data_test_fname {test_fname} \
    --output_dir {output_dir} \
    --time_pos_type {time_pos_type} \
    --vocab_dir {vocab_dir} \
    --nrows {nrows} \
    --vocab_cached \
    --encoder_cached \
    --cached \
    "
   # 
if fextension:
    arg_str += f"--fextension {fextension} \
    --external_vocab_path {data}/vocab_ob_{fextension}"
else:
    arg_str += f"--external_vocab_path {data}/vocab/vocab_ob"
if external_val:
    arg_str += f"\
    --external_val"
if checkpoint is not None:
    arg_str += f"\
    --checkpoint {checkpoint}"

In [6]:
parser = define_new_main_parser(data_type_choices=["Aave"])
print(arg_str)

--do_train     --mlm     --pad_seq_first     --get_rids     --field_ce     --lm_type bert     --field_hs 64     --data_type Aave     --seq_len 20     --stride 5     --num_train_epochs 10     --data_root /data/IDEA_DeFi_Research/LTM/Data/Lending_Protocols/Aave/V2/Mainnet/     --train_batch_size 32     --eval_batch_size 32     --save_steps 100     --eval_steps 100     --data_fname transactions_user_market_time_train     --data_val_fname transactions_user_market_time_val     --data_test_fname transactions_user_market_time_test     --output_dir /data/IDEA_DeFi_Research/LTM/Data/Lending_Protocols/Aave/V2/Mainnet/output/debug     --time_pos_type regular_position     --vocab_dir /data/IDEA_DeFi_Research/LTM/Data/Lending_Protocols/Aave/V2/Mainnet/vocab     --nrows 10000     --vocab_cached     --encoder_cached     --cached     --external_vocab_path /data/IDEA_DeFi_Research/LTM/Data/Lending_Protocols/Aave/V2/Mainnet/vocab/vocab_ob


In [7]:
opts = parser.parse_args(arg_str.split())
print(opts)

Namespace(jid=1, seed=42, output_dir='/data/IDEA_DeFi_Research/LTM/Data/Lending_Protocols/Aave/V2/Mainnet/output/debug', lm_type='bert', flatten=False, field_ce=True, mlm=True, cls_task=False, export_task=False, export_last_only=False, mlm_prob=0.15, freeze=False, data_type='Aave', time_pos_type='regular_position', data_root='/data/IDEA_DeFi_Research/LTM/Data/Lending_Protocols/Aave/V2/Mainnet/', data_fname='transactions_user_market_time_train', vocab_file='vocab.nb', user_ids=None, vocab_cached=True, external_encoder_fname='./data/preprocessed/transactionsAave_train.encoder_fit.pkl', external_vocab_fname='./data/vocab_ob', nrows=10000, label_category='last_label', nbatches=None, record_file='experiments', pretrained_dir=None, vocab_dir='/data/IDEA_DeFi_Research/LTM/Data/Lending_Protocols/Aave/V2/Mainnet/vocab', checkpoint=0, do_train=True, do_eval=False, do_prediction=False, save_steps=100, eval_steps=100, num_train_epochs=10, train_batch_size=32, eval_batch_size=32, stride=5, seq_len=

In [8]:
opts.log_dir = join(opts.output_dir, "logs")
makedirs(opts.output_dir, exist_ok=True)
makedirs(opts.log_dir, exist_ok=True)

opts.cls_exp_task = opts.cls_task or opts.export_task

if opts.data_type in ["Aave"]:
    assert opts.time_pos_type in ['sin_cos_position', 'regular_position']

if (not opts.mlm) and (not opts.cls_exp_task) and opts.lm_type == "bert":
    raise Exception(
        "Error: Bert needs either '--mlm', '--cls_task' or '--export_task' option. Please re-run with this flag "
        "included.")

main(opts)

2025-01-22 13:07:29,874 - dataset.aave_basic - INFO - cached encoded data is read from transactions_user_market_time_train.encoded.csv
2025-01-22 13:07:30,054 - dataset.aave_basic - INFO - read data : (10000, 128)
2025-01-22 13:07:30,057 - dataset.aave_basic - INFO - using cached vocab from /data/IDEA_DeFi_Research/LTM/Data/Lending_Protocols/Aave/V2/Mainnet/vocab/vocab_ob
2025-01-22 13:07:30,070 - dataset.aave - INFO - preparing user level data...
100%|██████████| 18/18 [00:00<00:00, 40.83it/s]
2025-01-22 13:07:30,553 - dataset.aave - INFO - creating transaction samples with vocab
100%|██████████| 18/18 [00:00<00:00, 32.58it/s]
2025-01-22 13:07:31,109 - dataset.aave - INFO - ncols: 125
2025-01-22 13:07:31,110 - dataset.aave - INFO - no of samples 2008
2025-01-22 13:07:31,120 - main - INFO - vocab size: 1732
2025-01-22 13:07:31,121 - main - INFO - dataset size: 2008
2025-01-22 13:07:31,147 - dataset.aave_basic - INFO - cached encoded data is read from transactions_user_market_time_train

KeyboardInterrupt: 