# PyTorch Forecasting for DeFi Borrow Rate Prediction
See https://pytorch-forecasting.readthedocs.io/en/stable/index.html for details on the library used.

In [2]:
! pip install pytorch-forecasting
! pip install pytorch-lightning



In [1]:
from fastai.tabular.all import *
import seaborn as sns
from tqdm import tqdm
from pytorch_forecasting import TemporalFusionTransformer

## 1. Load Data

In [2]:
df = pd.read_csv('../../defi_hacking_data/Compound_-_Data_1.csv')
df["Date"] = pd.to_datetime(df["Timestamp"], unit='s', origin='unix')

tokens = df["Token"].unique()
tokens

array(['DAI', 'USDC', 'USDT', 'ETH'], dtype=object)

In [3]:
df = df.drop_duplicates(['Timestamp', 'Token'])
counts = pd.DataFrame(df['Timestamp'].value_counts()).reset_index()
counts.columns = ['Timestamp', 'Counts']
df = df.merge(counts, on='Timestamp')
df = df[df['Counts'] == 4].reset_index(drop=True).drop('Counts', axis=1)

In [4]:
df1 = pd.DataFrame()

In [5]:
for tok in tokens:
    df_tok = df[df['Token']==tok]
    df_tok = df_tok.drop(['Token', 'Date'], axis=1)

    col_names = []
    for col in df_tok.columns:
        if col == 'Timestamp':
            col_names.append(f'{col}')
        else:
            col_names.append(f'{tok}_{col}')
        
    df_tok.columns = col_names
    #df_tok = df_tok.set_index('Timestamp', drop=True)
    
    if df1.empty:
        df1 = df_tok
    else:
        df1 = pd.merge(df1, df_tok, on='Timestamp')

In [6]:
df1.sort_values('Timestamp', inplace=True)
df1["Date"] = pd.to_datetime(df1["Timestamp"], unit='s', origin='unix')
df1['Timediff'] = df1['Timestamp'].shift(-1) - df1['Timestamp'] 

## 2. Train/test split

In [7]:
target_token = 'DAI'
target_column = f'{target_token}_Borrowing Rate'
target_window = 3
def get_target(row):

    try:
        target = df1[df1['Timestamp'] == row['Timestamp'] + 1800.0*target_window][target_column].values[0]
    except:
        target = np.NaN
    
    return target

In [8]:
df1[f'{target_token}_Target'] =  df1.apply(lambda x: get_target(x), axis=1)
df1 = df1.dropna()
df1 = df1.drop(['Timestamp', 'Date', 'Timediff'], axis=1)

In [9]:
#train_test_split
df1['Train'] = None
train_index = int(len(df1)*0.8)
df1.loc[:train_index, 'Train'] = True
df1.loc[train_index:, 'Train'] = False

In [24]:
df1['Train']

0         True
1         True
2         True
3         True
4         True
         ...  
13756    False
13757    False
13758    False
13760    False
13763    False
Name: Train, Length: 11918, dtype: object

## PyTorch Forecasting Model

In [15]:
df1 = df1.reset_index(drop=True)

In [20]:
idxs = [i for i in range(0, 11918)]
df1['time_idx'] = idxs

In [31]:
df1

Unnamed: 0,DAI_Borrowing Rate,DAI_Deposit Rate,DAI_Borrow Volume,DAI_Supply Volume,USDC_Borrowing Rate,USDC_Deposit Rate,USDC_Borrow Volume,USDC_Supply Volume,USDT_Borrowing Rate,USDT_Deposit Rate,USDT_Borrow Volume,USDT_Supply Volume,ETH_Borrowing Rate,ETH_Deposit Rate,ETH_Borrow Volume,ETH_Supply Volume,DAI_Target,Train,time_idx
0,0.073195,0.050982,1.069964e+09,6.196481e+10,0.087046,0.066993,7.285430e+08,4.063042e+10,0.099588,0.077548,6.430536e+07,3.696225e+09,0.022952,0.000489,30553.654354,5.663257e+07,0.073436,True,0
1,0.073101,0.050912,1.069961e+09,6.197050e+10,0.087053,0.066998,7.285469e+08,4.063019e+10,0.094890,0.073569,6.407877e+07,3.700299e+09,0.022952,0.000489,30553.703955,5.663353e+07,0.067829,True,1
2,0.073061,0.050882,1.069972e+09,6.197354e+10,0.087058,0.067003,7.285528e+08,4.063019e+10,0.085767,0.065933,6.399418e+07,3.729213e+09,0.022951,0.000489,30553.830472,5.664534e+07,0.066704,True,2
3,0.073436,0.051161,1.070496e+09,6.197966e+10,0.086921,0.066889,7.285713e+08,4.063660e+10,0.072946,0.055500,6.317162e+07,3.730028e+09,0.022952,0.000489,30553.786270,5.663860e+07,0.066708,True,3
4,0.067829,0.047015,1.070566e+09,6.234580e+10,0.086312,0.066383,7.285755e+08,4.066162e+10,0.057764,0.043239,6.256095e+07,3.752927e+09,0.022952,0.000489,30553.841412,5.663440e+07,0.066790,True,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13756,0.046650,0.031471,3.669882e+09,2.109054e+11,0.040323,0.025672,2.588018e+09,1.666458e+11,0.039179,0.024246,6.062863e+08,4.162129e+10,0.027231,0.000765,53139.698365,7.448813e+07,0.046637,False,11913
13757,0.046646,0.031465,3.669895e+09,2.109253e+11,0.040317,0.025664,2.587664e+09,1.666482e+11,0.039127,0.024181,6.054853e+08,4.162108e+10,0.027231,0.000765,53139.770109,7.448817e+07,0.046636,False,11914
13758,0.046644,0.031464,3.669904e+09,2.109312e+11,0.040301,0.025643,2.587190e+09,1.666826e+11,0.039243,0.024324,6.072485e+08,4.162099e+10,0.027232,0.000765,53143.491452,7.447149e+07,0.046639,False,11915
13760,0.046636,0.031453,3.669920e+09,2.109667e+11,0.040307,0.025651,2.587711e+09,1.666908e+11,0.039234,0.024314,6.072520e+08,4.162990e+10,0.027234,0.000765,53161.994342,7.447244e+07,0.046638,False,11916


In [34]:
train = df1.loc[df1['Train'] == True]

In [71]:
train[['DAI_Borrowing Rate', 'DAI_Deposit Rate', 'DAI_Borrow Volume', 'DAI_Supply Volume', 'DAI_Target', 'time_idx']]

Unnamed: 0,DAI_Borrowing Rate,DAI_Deposit Rate,DAI_Borrow Volume,DAI_Supply Volume,DAI_Target,time_idx
0,0.073195,0.050982,1.069964e+09,6.196481e+10,0.073436,0
1,0.073101,0.050912,1.069961e+09,6.197050e+10,0.067829,1
2,0.073061,0.050882,1.069972e+09,6.197354e+10,0.066704,2
3,0.073436,0.051161,1.070496e+09,6.197966e+10,0.066708,3
4,0.067829,0.047015,1.070566e+09,6.234580e+10,0.066790,4
...,...,...,...,...,...,...
9528,0.041900,0.025430,3.202788e+09,2.066683e+11,0.041900,8960
9529,0.041900,0.025430,3.202798e+09,2.066682e+11,0.041891,8961
9530,0.041900,0.025430,3.202802e+09,2.066681e+11,0.041876,8962
9532,0.041891,0.025419,3.201720e+09,2.066425e+11,0.041875,8963


In [75]:
train_dai = train[['DAI_Borrowing Rate', 'DAI_Deposit Rate', 'DAI_Borrow Volume', 'DAI_Supply Volume', 'time_idx']]

In [82]:
train_dai.index = train_dai['time_idx']

In [86]:
train_dai.index.name = ''

In [89]:
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_forecasting.metrics import QuantileLoss
from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer

# load data
train_data = train

# define dataset
max_encoder_length = 168
max_prediction_length = 1
# training_cutoff = 8964  # day for cutoff

training = TimeSeriesDataSet(
    train_dai,
    time_idx= 'time_idx',
    target= 'DAI_Borrowing Rate',
    group_ids=['DAI_Borrowing Rate', 'DAI_Deposit Rate', 'DAI_Borrow Volume', 'DAI_Supply Volume', 'time_idx'],
    min_encoder_length=1,
    max_encoder_length=5,
    min_prediction_length=1,
    max_prediction_length=5,
    time_varying_unknown_reals=['DAI_Borrowing Rate'],
)



AssertionError: filters should not remove entries all entries - check encoder/decoder lengths and lags

In [52]:
training.get_parameters()

{'time_idx': 'time_idx',
 'target': 'DAI_Borrowing Rate',
 'group_ids': ['DAI_Borrowing Rate',
  'DAI_Deposit Rate',
  'DAI_Borrow Volume',
  'DAI_Supply Volume',
  'USDC_Borrowing Rate',
  'USDC_Deposit Rate',
  'USDC_Borrow Volume',
  'USDC_Supply Volume',
  'USDT_Borrowing Rate',
  'USDT_Deposit Rate',
  'USDT_Borrow Volume',
  'USDT_Supply Volume',
  'ETH_Borrowing Rate',
  'ETH_Deposit Rate',
  'ETH_Borrow Volume',
  'ETH_Supply Volume',
  'Train',
  'time_idx'],
 'weight': None,
 'max_encoder_length': 0,
 'min_encoder_length': 0,
 'min_prediction_idx': 0,
 'min_prediction_length': 1,
 'max_prediction_length': 1,
 'static_categoricals': [],
 'static_reals': [],
 'time_varying_known_categoricals': [],
 'time_varying_known_reals': [],
 'time_varying_unknown_categoricals': [],
 'time_varying_unknown_reals': ['DAI_Borrowing Rate'],
 'variable_groups': {},
 'constant_fill_strategy': {},
 'allow_missing_timesteps': False,
 'lags': {},
 'add_relative_time_idx': False,
 'add_target_scales

In [54]:
# convert the dataset to a dataloader
dataloader = training.to_dataloader(batch_size=4)

# and load the first batch
x, y = next(iter(dataloader))
print("x =", x)
print("\ny =", y)
print("\nsizes of x =")
for key, value in x.items():
    print(f"\t{key} = {value.size()}")

x = {'encoder_cat': tensor([], size=(4, 0, 0), dtype=torch.int64), 'encoder_cont': tensor([], size=(4, 0, 1)), 'encoder_target': tensor([], size=(4, 0)), 'encoder_lengths': tensor([0, 0, 0, 0]), 'decoder_cat': tensor([], size=(4, 1, 0), dtype=torch.int64), 'decoder_cont': tensor([[[-1.0318]],

        [[-0.7013]],

        [[-0.2426]],

        [[ 0.6915]]]), 'decoder_target': tensor([[0.0420],
        [0.0529],
        [0.0682],
        [0.0992]]), 'decoder_lengths': tensor([1, 1, 1, 1]), 'decoder_time_idx': tensor([[8887],
        [4794],
        [ 523],
        [1278]]), 'groups': tensor([[1568, 1568, 8335, 8609, 1455, 1458, 7538, 8885, 2735, 2523, 6021, 6041,
         8320, 7669, 7448, 6172,    0, 8887],
        [3143, 3150, 6946, 7002, 5951, 5983, 7851, 7290, 6777, 6785, 8961, 8404,
          806, 1062, 6606, 8569,    0, 4794],
        [4165, 4198,  628,  676, 4965, 4987,  146,  158, 8873, 8873,  548,  477,
          398,  401,  247,  405,    0,  523],
        [6806, 6819,  291,  

  target_scale = torch.tensor([batch[0]["target_scale"] for batch in batches], dtype=torch.float)


In [65]:
training.get_parameters()

{'time_idx': 'time_idx',
 'target': 'DAI_Borrowing Rate',
 'group_ids': ['DAI_Borrowing Rate',
  'DAI_Deposit Rate',
  'DAI_Borrow Volume',
  'DAI_Supply Volume',
  'USDC_Borrowing Rate',
  'USDC_Deposit Rate',
  'USDC_Borrow Volume',
  'USDC_Supply Volume',
  'USDT_Borrowing Rate',
  'USDT_Deposit Rate',
  'USDT_Borrow Volume',
  'USDT_Supply Volume',
  'ETH_Borrowing Rate',
  'ETH_Deposit Rate',
  'ETH_Borrow Volume',
  'ETH_Supply Volume',
  'Train',
  'time_idx'],
 'weight': None,
 'max_encoder_length': 0,
 'min_encoder_length': 0,
 'min_prediction_idx': 0,
 'min_prediction_length': 1,
 'max_prediction_length': 1,
 'static_categoricals': [],
 'static_reals': [],
 'time_varying_known_categoricals': [],
 'time_varying_known_reals': [],
 'time_varying_unknown_categoricals': [],
 'time_varying_unknown_reals': ['DAI_Borrowing Rate'],
 'variable_groups': {},
 'constant_fill_strategy': {},
 'allow_missing_timesteps': False,
 'lags': {},
 'add_relative_time_idx': False,
 'add_target_scales

In [64]:
validation = TimeSeriesDataSet.from_dataset(training, train, min_prediction_idx=training.index.time.max() + 1, stop_randomization=True)
batch_size = 128
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=2)

IndexError: index 0 is out of bounds for axis 0 with size 0

In [66]:
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=1, verbose=False, mode="min")
lr_logger = LearningRateMonitor()
trainer = pl.Trainer(
    max_epochs=100,
    gpus=0,
    gradient_clip_val=0.1,
    limit_train_batches=30,
    callbacks=[lr_logger, early_stop_callback],
)


tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,
    hidden_size=32,
    attention_head_size=1,
    dropout=0.1,
    hidden_continuous_size=16,
    output_size=7,
    loss=QuantileLoss(),
    log_interval=2,
    reduce_on_plateau_patience=4
)
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


Number of parameters in network: 56.4k


In [67]:
# find optimal learning rate
res = trainer.lr_find(
    tft, train_dataloader=train_dataloader, early_stop_threshold=1000.0, max_lr=0.3,
)

print(f"suggested learning rate: {res.suggestion()}")
fig = res.plot(show=True, suggest=True)
fig.show()

trainer.fit(
    tft, train_dataloader=train_dataloader, val_dataloaders=val_dataloader,
)

AttributeError: 'Trainer' object has no attribute 'lr_find'

In [25]:
from pytorch_forecasting.data.examples import get_stallion_data
data = get_stallion_data()  # load data as pandas dataframe

In [34]:

df1[df1['Train']==True].index

Int64Index([   0,    1,    2,    3,    4,    5,    6,    7,    8,    9,
            ...
            8955, 8956, 8957, 8958, 8959, 8960, 8961, 8962, 8963, 8964],
           dtype='int64', length=8965)