In [1]:
import os
import sys
sys.path.append(os.path.abspath('../'))
from os import makedirs
from os.path import join, basename
import logging
import numpy as np
import torch
import random
from arguments import define_new_main_parser
import json

from transformers import Trainer, TrainingArguments, EarlyStoppingCallback

from dataset.aave import AaveDataset
from models.modules import TabFormerBertLM, TabFormerBertForClassification, TabFormerBertModel, TabStaticFormerBert, \
    TabStaticFormerBertLM, TabStaticFormerBertClassification
from misc.utils import ordered_split_dataset, compute_cls_metrics
from dataset.datacollator import *
from main import main

logger = logging.getLogger(__name__)
log = logger
logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
    datefmt="%m/%d/%Y %H:%M:%S",
    level=logging.INFO,
)

import os
os.environ["WANDB_DISABLED"] = "true"

data_path = "/data/IDEA_DeFi_Research/LTM/Data/Lending_Protocols/Aave/V2/Mainnet"

include_user_features = True
include_time_features = True
include_market_features = True
include_exo_features = False

feature_extension = ""
if include_user_features:
    feature_extension += "_user"
if include_market_features:
    feature_extension += "_market"
if include_time_features:
    feature_extension += "_time"
if include_exo_features:
    feature_extension += "_exoLagged"



file_path = f"{data_path}/transactions_user_market_time_exoLagged.rds"
train_path = f"{data_path}/transactions{feature_extension}_train.csv"
test_path = f"{data_path}/transactions{feature_extension}_test.csv"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data="/data/IDEA_DeFi_Research/LTM/Data/Lending_Protocols/Aave/V2/Mainnet" 
dt="Aave"
exp_name="debug"
time_pos_type="regular_position"
fname = f"transactions{feature_extension}_train"  
val_fname = f"transactions{feature_extension}_val" 
test_fname = f"transactions{feature_extension}_test"  
fextension = False
bs=32
field_hs = 64 # hidden state dimension of the transformer (default: 768)
seq_len = 10 # length for transaction sliding window
stride = 5 # stride for transaction sliding window
num_train_epochs=10
save_steps=100
eval_steps=100
external_val=False
output_dir=f"{data}/output/{exp_name}"
checkpoint=None
nrows=1000
vocab_dir=f"{data}/vocab"
resample_method = None

# export-specific arguments:
nbatches = 1

In [3]:
arg_str = f" --export_task \
    --mlm \
    --pad_seq_first \
    --get_rids \
    --field_ce \
    --lm_type bert \
    --field_hs {field_hs} \
    --data_type {dt} \
    --seq_len {seq_len} \
    --stride {stride} \
    --num_train_epochs {num_train_epochs} \
    --data_root {data}/ \
    --train_batch_size {bs} \
    --eval_batch_size {bs} \
    --save_steps {save_steps} \
    --eval_steps {eval_steps} \
    --data_fname {fname} \
    --data_val_fname {val_fname} \
    --data_test_fname {test_fname} \
    --output_dir {output_dir} \
    --time_pos_type {time_pos_type} \
    --vocab_dir {vocab_dir} \
    --nrows {nrows} \
    --vocab_cached \
    --encoder_cached \
    --cached \
    --nbatches {nbatches} \
    "
if fextension:
    arg_str += f"--fextension {fextension} \
    --external_vocab_path {data}/vocab/vocab_ob_{fextension}"
else:
    arg_str += f"--external_vocab_path {data}/vocab/vocab_ob"
if resample_method is not None:
    arg_str += f"\
    --resample_method {resample_method}"
if external_val:
    arg_str += f"\
    --external_val"
if checkpoint is not None:
    arg_str += f"\
    --checkpoint {checkpoint}"

In [4]:
parser = define_new_main_parser(data_type_choices=["Aave"])
opts = parser.parse_args(arg_str.split())

In [5]:
opts.log_dir = join(opts.output_dir, "logs")
makedirs(opts.output_dir, exist_ok=True)
makedirs(opts.log_dir, exist_ok=True)

file_handler = logging.FileHandler(
    join(opts.log_dir, 'output.log'), 'w', 'utf-8')
logger.addHandler(file_handler)

opts.cls_exp_task = opts.cls_task or opts.export_task

if opts.data_type in ["Aave"]:
    assert opts.time_pos_type in ['sin_cos_position', 'regular_position']

if (not opts.mlm) and (not opts.cls_exp_task) and opts.lm_type == "bert":
    raise Exception(
        "Error: Bert needs either '--mlm', '--cls_task' or '--export_task' option. Please re-run with this flag "
        "included.")

main(opts)

2025-01-22 12:47:11 - INFO - dataset.aave_basic - cached encoded data is read from transactions_user_market_time_train.encoded.csv
2025-01-22 12:47:11 - INFO - dataset.aave_basic - read data : (1000, 128)
2025-01-22 12:47:11 - INFO - dataset.aave_basic - using cached vocab from /data/IDEA_DeFi_Research/LTM/Data/Lending_Protocols/Aave/V2/Mainnet/vocab/vocab_ob
2025-01-22 12:47:11 - INFO - dataset.aave - preparing user level data...
100%|██████████| 18/18 [00:00<00:00, 414.78it/s]
2025-01-22 12:47:11 - INFO - dataset.aave - creating transaction samples with vocab
100%|██████████| 18/18 [00:00<00:00, 416.58it/s]
2025-01-22 12:47:11 - INFO - dataset.aave - ncols: 125
2025-01-22 12:47:11 - INFO - dataset.aave - no of samples 211
2025-01-22 12:47:11 - INFO - main - vocab size: 1732
2025-01-22 12:47:11 - INFO - main - dataset size: 211
2025-01-22 12:47:11 - INFO - dataset.aave_basic - cached encoded data is read from transactions_user_market_time_train.encoded.csv
2025-01-22 12:47:11 - INFO -

2025-01-22 12:50:19 - INFO - main - row embeds shape: (211, 625, 1732)
2025-01-22 12:50:19 - INFO - main - seq embeds shape: (211, 625, 1732)


In [6]:
import numpy
b = numpy.load('/data/IDEA_DeFi_Research/LTM/Data/Lending_Protocols/Aave/V2/Mainnet/output/debug/all_embeddings.npz')
print(b.files)

['sids', 'seq_last_rids', 'seq_labels', 'row_embeds', 'seq_embeds']


In [7]:
b['seq_labels']

array([['0', '0', '0', ..., '0', '0',
        '0x0000000000000000000000000000000000000001'],
       ['0', '0', '0', ..., '0', '0',
        '0x000000000000000000000000000000000000dead'],
       ['0', '0', '0', ..., '0x000000000000000000000000000000000000dead',
        '0x000000000000000000000000000000000000dead',
        '0x000000000000000000000000000000000000dead'],
       ...,
       ['0x00000000032962b51589768828ad878876299e14',
        '0x00000000032962b51589768828ad878876299e14',
        '0x00000000032962b51589768828ad878876299e14', ...,
        '0x00000000032962b51589768828ad878876299e14',
        '0x00000000032962b51589768828ad878876299e14',
        '0x00000000032962b51589768828ad878876299e14'],
       ['0x00000000032962b51589768828ad878876299e14',
        '0x00000000032962b51589768828ad878876299e14',
        '0x00000000032962b51589768828ad878876299e14', ...,
        '0x00000000032962b51589768828ad878876299e14',
        '0x00000000032962b51589768828ad878876299e14',
        '0x000

In [8]:
b['seq_last_rids']

array(['0x3fec3516c8085e089d408562dd3f9ca1bbbc5b0eb4eaf20dc1acca3d5467ff57',
       '0x59a33c72e045d762a069ddfe10540ddc1b521cbd27e11e819a8b0e28b45732f1',
       '0x3f33d1fd27aee4e4ff56ef10ab6280729d187cd159a31cfec0c533174d92415d',
       '0xa0ab51d963a8def6633d42ca8960d2dc71455b017550640fcca2211873df47f1',
       '0x229cad41b44765888cf7d54aaf3f47e998e0a9c8caafb1f984b9825a4e118cde',
       '0x336226751c45747c068f70101e039966c1269d949fcbee610085f058bb7c74e9',
       '0xc305a55fa366754bb8f5cbff86fc23eb7c7cb0289e51d92b64cd9391946d5780',
       '0x0ee78a520958780577a64993f01050cc32e80bd0bad919f1b333318eaa325f0b',
       '0xac021c19420ca2ee4bad908743fc38051391aa625cd4e9667cbd0efa1a45ca36',
       '0xf979138b40124babdee0578126d1f8720932c7b2206cda088164bbd79b5d4a0a',
       '0xc0af2bf22c7db5e8b23919496e4c752545733d92492a8f221fd7c8458cdfa613',
       '0xdc5c9a1872fe7bc4d24601fa8d08701e53ac85ef04f7dcbc7d06381e15b73772',
       '0x050efa37de9d0ceb909c9416de62e8fbd646ee908c1f928f6f853a691c13a4f0',

In [9]:
b['row_embeds'][0:10]

array([[[ 0.        ,  0.3931925 , -2.541839  , ...,  0.17999268,
          2.1453867 ,  0.374165  ],
        [ 0.        ,  1.1048604 ,  1.0777364 , ..., -0.08454469,
          3.7837002 ,  1.0658599 ],
        [ 0.        ,  0.2853219 ,  1.1124626 , ...,  1.8817751 ,
         -2.4286623 , -1.227449  ],
        ...,
        [ 0.        ,  0.4888856 ,  0.74004716, ..., -1.6101717 ,
         -1.2522087 ,  3.1604326 ],
        [ 0.        , -3.6674755 ,  1.215533  , ..., -1.2232258 ,
         -2.7862146 , -0.72214365],
        [ 0.        ,  1.510782  ,  2.4452338 , ..., -1.435621  ,
          1.113825  ,  0.66890085]],

       [[ 0.        ,  0.15062726, -3.0834517 , ..., -0.7009773 ,
          2.0239444 , -0.29506773],
        [ 0.        ,  1.4960024 ,  3.0682466 , ...,  0.53378576,
          4.070704  ,  0.13370812],
        [ 0.        , -1.388824  ,  0.8851739 , ...,  1.0743958 ,
         -2.5841646 , -1.3651909 ],
        ...,
        [ 0.        ,  0.34176645,  1.7626485 , ..., -

In [10]:
b['seq_embeds'][0].shape

(625, 1732)