### Process

In [1]:
from dataset.aave_time_static import AaveWithTimePosAndStaticSplitDataset
from dataset.aave_time_pos import AaveWithTimePosDataset
from dataset.aave_static import AaveWithStaticSplitDataset
from dataset.aave import AaveDataset
from dataset.aave_basic import AaveBasicDataset

import logging


logging.basicConfig(level=logging.INFO,  
                    format='%(asctime)s - %(levelname)s - %(message)s')  
log = logging.getLogger(__name__) 

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
include_user_features = True
include_time_features = True
include_market_features = True
include_exo_features = False

fextension = ""
if include_user_features:
    fextension += "_user"
if include_market_features:
    fextension += "_market"
if include_time_features:
    fextension += "_time"
if include_exo_features:
    fextension += "_exoLagged"

In [3]:
root="/data/IDEA_DeFi_Research/LTM/Data/Lending_Protocols/Aave/V2/Mainnet"
vocab_dir=f"{root}/vocab"
fname = f"transactions{fextension}_train"  
val_fname = "" 
test_fname = f"transactions{fextension}_test"  
fextension=""
preload_fextension=""
save_vocab_dir=vocab_dir
preload_vocab_dir=vocab_dir
user_level_cached=False
vocab_cached=False
external_vocab_path=""
resample_method=None
resample_ratio=10
resample_seed=100
external_val=False

In [4]:
if not fextension:
    if vocab_cached:
        external_vocab_path=root+"vocab_ob"
    fextension=""
else:
    if vocab_cached:
        external_vocab_path=root+"vocab_ob_"+ fextension
if not preload_fextension:
    preload_fextension=""
if not external_vocab_path:
    external_vocab_path=""

assert fextension in ["static", "static-test", "time-pos", "time-pos-test", 
                      "", "test", "static-only", "static-only-test"]

In [5]:
dataset_class = ''
if fextension in ['static', 'static-test']:
    dataset_class = 'AaveWithTimePosAndStaticSplitDataset'
elif fextension in ["static-only", "static-only-test"]:
    dataset_class = 'AaveWithStaticSplitDataset'
elif fextension in ["time-pos", "time-pos-test"]:
    dataset_class = 'AaveWithTimePosDataset'
elif fextension in ["", "test"]:
    dataset_class = 'AaveDataset'

In [6]:
print(dataset_class)

AaveDataset


In [7]:
dataset = eval(dataset_class)(cls_task=True,
                              user_ids=None,
                              seq_len=10,
                              root=root,
                              fname=fname,
                              user_level_cached=user_level_cached,
                              vocab_cached=vocab_cached,
                              external_vocab_path=external_vocab_path,
                              preload_vocab_dir=preload_vocab_dir,
                              save_vocab_dir=save_vocab_dir,
                              preload_fextension=preload_fextension,
                              fextension=fextension,
                              nrows=None,
                              flatten=False,
                              stride=5,
                              return_labels=True,
                              label_category='last_label',
                              pad_seq_first=False,
                              get_rids=True,
                              long_and_sort=True,
                              resample_method=resample_method,
                              resample_ratio=resample_ratio,
                              resample_seed=resample_seed)

2025-01-15 12:56:44,719 - INFO - cached encoded data is read from transactions_user_market_time_train.encoded.csv
2025-01-15 12:56:55,092 - INFO - read data : (1921610, 128)
2025-01-15 12:56:55,108 - INFO - preparing user level data...
100%|██████████| 113601/113601 [01:20<00:00, 1415.95it/s]
2025-01-15 12:58:26,221 - INFO - creating transaction samples with vocab
100%|██████████| 113601/113601 [02:01<00:00, 935.52it/s] 
2025-01-15 13:00:27,656 - INFO - ncols: 125
2025-01-15 13:00:27,657 - INFO - no of samples 364279
2025-01-15 13:00:29,145 - INFO - saving vocab at /data/IDEA_DeFi_Research/LTM/Data/Lending_Protocols/Aave/V2/Mainnet/vocab/vocab.nb
2025-01-15 13:00:29,173 - INFO - saving vocab object at /data/IDEA_DeFi_Research/LTM/Data/Lending_Protocols/Aave/V2/Mainnet/vocab/vocab_ob


In [9]:
if not external_vocab_path:
    external_vocab_path=dataset.vocab_path
vocab_cached=True
encoder_cached=True

In [10]:
test_dataset = eval(dataset_class)(cls_task=True,
                              user_ids=None,
                              seq_len=10,
                              root=root,
                              fname=test_fname,
                              user_level_cached=user_level_cached,
                              vocab_cached=vocab_cached,
                              external_vocab_path=external_vocab_path,
                              preload_vocab_dir=preload_vocab_dir,
                              save_vocab_dir=save_vocab_dir,
                              preload_fextension=preload_fextension,
                              fextension=fextension,
                              nrows=None,
                              flatten=False,
                              stride=5,
                              return_labels=True,
                              label_category='last_label',
                              pad_seq_first=False,
                              get_rids=True,
                              long_and_sort=True,
                              resample_method=resample_method,
                              resample_ratio=resample_ratio,
                              resample_seed=resample_seed)

2025-01-15 13:00:46,705 - INFO - cached encoded data is read from transactions_user_market_time_test.encoded.csv
2025-01-15 13:00:47,079 - INFO - read data : (87566, 128)
2025-01-15 13:00:47,080 - INFO - using cached vocab from /data/IDEA_DeFi_Research/LTM/Data/Lending_Protocols/Aave/V2/Mainnet/vocab/vocab_ob
2025-01-15 13:00:47,102 - INFO - preparing user level data...
100%|██████████| 5889/5889 [00:03<00:00, 1591.07it/s]
2025-01-15 13:00:51,397 - INFO - creating transaction samples with vocab
100%|██████████| 5889/5889 [00:04<00:00, 1323.70it/s]
2025-01-15 13:00:55,850 - INFO - ncols: 125
2025-01-15 13:00:55,850 - INFO - no of samples 13554
