In [1]:
%load_ext autoreload
%autoreload 2

import torch
from utility.attentionAutoEncoder import AttentionAutoEncoder, concate_data, AttentionDataset, get_return_from_batch
from torch.utils.data import DataLoader
import numpy as np

In [2]:
model = AttentionAutoEncoder(3, 5, 2)

In [3]:
input_data = torch.randn(3, 5)
input_data

tensor([[-0.0981, -0.9377,  0.3664,  1.1251,  0.6085],
        [ 1.1084, -0.6301,  0.2004, -0.1253, -1.1028],
        [-0.4710, -0.6498, -0.5386, -0.4563,  0.5787]])

In [4]:
model(input_data)

tensor([[ 0.5117,  0.3257,  0.7888, -1.2903,  0.1211]], grad_fn=<MmBackward0>)

In [5]:
import pandas as pd
from utility.data_processing import set_time_data

In [6]:
dfs = dict()
for ticker in ['btc', 'eth', 'doge', 'xrp', 'ada']:
  currency_metrics = pd.read_csv(f'../data/{ticker}_metrics_5_years.csv')
  set_currency_metrics = set_time_data(currency_metrics)
  dfs[ticker] = set_currency_metrics

In [7]:
res, test_res = concate_data(dfs, day_split='2023-12-31')

In [8]:
res.shape

torch.Size([3, 5, 1458])

In [9]:
X_t = res[:, :, 0]

In [10]:
model(X_t)

tensor([[-0.0529,  0.0195,  0.0765,  0.0070, -0.0378]], grad_fn=<MmBackward0>)

In [11]:
X_t.shape[-1]

5

In [12]:
# Create DataSet
training_dataset = AttentionDataset(res)
test_dataset = AttentionDataset(test_res)

# Create DataLoader
training_dataloader = DataLoader(training_dataset, batch_size=50, shuffle=False, num_workers=0)
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0)

# Iterate through DataLoader
for batch in training_dataloader:
    print(batch.shape)  # Output: torch.Size([10, 5])
    break  # To print only one batch

torch.Size([50, 3, 5])


In [13]:
# get_return_from_batch(batch)

In [14]:
# model(batch)

In [15]:
import torch.nn as nn
import torch.optim as optim

In [16]:
criterion = nn.MSELoss()  # Mean Squared Error for reconstruction
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [17]:
num_epochs = 50

for epoch in range(num_epochs):
    total_loss = 0
    for X_t in training_dataloader:
        R_T = get_return_from_batch(X_t)
        # Forward pass
        R_T_hat = model(X_t)
        loss = criterion(R_T_hat, R_T)
        
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    avg_loss = total_loss / len(training_dataloader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

Epoch [1/50], Loss: 0.6317
Epoch [2/50], Loss: 0.4477
Epoch [3/50], Loss: 0.3541
Epoch [4/50], Loss: 0.2976
Epoch [5/50], Loss: 0.2566
Epoch [6/50], Loss: 0.2239
Epoch [7/50], Loss: 0.1967
Epoch [8/50], Loss: 0.1739
Epoch [9/50], Loss: 0.1545
Epoch [10/50], Loss: 0.1380
Epoch [11/50], Loss: 0.1237
Epoch [12/50], Loss: 0.1115
Epoch [13/50], Loss: 0.1008
Epoch [14/50], Loss: 0.0915
Epoch [15/50], Loss: 0.0833
Epoch [16/50], Loss: 0.0761
Epoch [17/50], Loss: 0.0697
Epoch [18/50], Loss: 0.0641
Epoch [19/50], Loss: 0.0591
Epoch [20/50], Loss: 0.0546
Epoch [21/50], Loss: 0.0505
Epoch [22/50], Loss: 0.0469
Epoch [23/50], Loss: 0.0436
Epoch [24/50], Loss: 0.0406
Epoch [25/50], Loss: 0.0379
Epoch [26/50], Loss: 0.0355
Epoch [27/50], Loss: 0.0332
Epoch [28/50], Loss: 0.0312
Epoch [29/50], Loss: 0.0293
Epoch [30/50], Loss: 0.0276
Epoch [31/50], Loss: 0.0260
Epoch [32/50], Loss: 0.0246
Epoch [33/50], Loss: 0.0232
Epoch [34/50], Loss: 0.0220
Epoch [35/50], Loss: 0.0209
Epoch [36/50], Loss: 0.0198
E

In [18]:
errors = []
for X_t in training_dataloader:
  R_T = get_return_from_batch(X_t)
  # Forward pass
  R_T_hat = model(X_t)
  err = (R_T_hat - R_T).squeeze(1) # try to find top R_T_hat
  errors.append(err)

In [19]:
stacked_error = torch.cat(errors, dim=0) 
stacked_error.shape

torch.Size([1458, 5])

In [20]:
q90 = torch.quantile(stacked_error, 0.90, dim=0)
q90

tensor([0.0236, 0.0677, 0.2402, 0.0530, 0.0245], grad_fn=<SqueezeBackward4>)

In [21]:
anomalies_list = []
returns_list = []

for X_t in test_dataloader:
    R_T = get_return_from_batch(X_t)
    R_T_hat = model(X_t)

    err = (R_T_hat - R_T).squeeze(1)
    anomalies = err >= q90
    anomalies_list.append(anomalies)
    returns_list.append(R_T)

In [22]:
anomalies_detection = torch.concat(anomalies_list)
all_returns = torch.concat(returns_list).squeeze(1)

In [23]:
anomalies_detection.sum(dim=0)

tensor([26, 40, 48, 24, 20])

In [24]:
anomalies_detection.numpy()

array([[False, False, False, False, False],
       [False, False, False, False, False],
       [False,  True,  True,  True, False],
       ...,
       [False, False,  True, False, False],
       [False, False, False, False, False],
       [False, False, False, False, False]])

In [25]:
all_returns.numpy()

array([[ 0.04248666,  0.02955466,  0.02979293,  0.02624824,  0.05020779],
       [ 0.02004069,  0.00433265, -0.00832219, -0.00529069, -0.02655358],
       [-0.04943179, -0.0658676 , -0.107687  , -0.07494581, -0.08653914],
       ...,
       [-0.01766928, -0.01487776, -0.03122776, -0.04329219, -0.03598882],
       [-0.0100486 ,  0.00254795, -0.00297066, -0.01699607,  0.00157149],
       [ 0.00932239, -0.00688175,  0.00836564,  0.01134853, -0.01749099]],
      dtype=float32)

In [26]:
with open('./numpyarray/anomalies_test.npy', 'wb') as f:
    np.save(f, anomalies_detection.numpy())

In [27]:
with open('./numpyarray/returns_test.npy', 'wb') as f:
    np.save(f, all_returns.numpy())

In [28]:
q10 = torch.quantile(stacked_error, 0.10, dim=0)
q10

tensor([-0.0662, -0.0440, -0.0405, -0.1197, -0.0588],
       grad_fn=<SqueezeBackward4>)

In [29]:
# for X_t in all_returns:
#     R_T = get_return_from_batch(X_t)
#     R_T_hat = model(X_t)

#     err = (R_T_hat - R_T).squeeze(1)
#     anomalies = err >= q90
#     anomalies_list.append(anomalies)
#     returns_list.append(R_T)

In [30]:
for i in range(50, 100):
  t = 0.01 * i
  th = torch.quantile(stacked_error, t, dim=0)

  anomalies_list = []

  for X_t in test_dataloader:
    R_T = get_return_from_batch(X_t)
    R_T_hat = model(X_t)

    err = (R_T_hat - R_T).squeeze(1)
    anomalies = err >= th
    anomalies_list.append(anomalies)

  anomalies_detection = torch.concat(anomalies_list)
  
  with open(f'./numpyarray/anomalies_q{i}_test.npy', 'wb') as f:
    np.save(f, anomalies_detection.numpy())

In [31]:
training_dataloader = DataLoader(training_dataset, batch_size=1, shuffle=False, num_workers=0)
anomalies_list = []
returns_list = []

for X_t in training_dataloader:
    R_T = get_return_from_batch(X_t)
    R_T_hat = model(X_t)

    err = (R_T_hat - R_T).squeeze(1)
    anomalies = err >= q90
    anomalies_list.append(anomalies)
    returns_list.append(R_T)

anomalies_detection = torch.concat(anomalies_list)
all_returns = torch.concat(returns_list).squeeze(1)

with open('./numpyarray/returns_train.npy', 'wb') as f:
    np.save(f, all_returns.numpy())

with open('./numpyarray/returns_train.npy', 'wb') as f:
    np.save(f, all_returns.numpy())

In [33]:
ths = {}
for i in range(50, 100):
  t = 0.01 * i
  th = torch.quantile(stacked_error, t, dim=0)

  anomalies_list = []

  for X_t in training_dataloader:
    R_T = get_return_from_batch(X_t)
    R_T_hat = model(X_t)

    err = (R_T_hat - R_T).squeeze(1)
    anomalies = err >= th
    anomalies_list.append(anomalies)

  anomalies_detection = torch.concat(anomalies_list)
  ths[i] = th
  with open(f'./numpyarray/anomalies_q{i}_train.npy', 'wb') as f:
    np.save(f, anomalies_detection.numpy())

In [34]:
%load_ext autoreload
%autoreload 2

import numpy as np
from utility.metric import equal_weight_strategy, plot_profit, anomaly_rebalance_strategy2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [36]:
returns = np.load('./numpyarray/returns_train.npy')
returns

array([[ 0.00380051,  0.00186359,  0.01662825, -0.00121154,  0.00846868],
       [ 0.00032222,  0.00692728,  0.04046779,  0.00815522,  0.00361591],
       [ 0.05563713,  0.06459294,  0.01193943,  0.13204011,  0.07664178],
       ...,
       [-0.01587566, -0.02388262, -0.01249238, -0.02094503, -0.02095945],
       [ 0.00489029, -0.00095417, -0.00794847, -0.00056895, -0.00727318],
       [ 0.00030009, -0.00641982, -0.00972579, -0.01312717, -0.01711749]],
      dtype=float32)

In [38]:
res = equal_weight_strategy(returns)

In [39]:
max_diff = 0
idx_max_diff = 0
for i in range(50, 100):

  anomalies = np.load(f'./numpyarray/anomalies_q{i}_train.npy')
  
  res2 = anomaly_rebalance_strategy2(returns, anomalies)
  finally_diff = res2[-1] - res[-1]
  if finally_diff > 0:
    # print(i)
    # plot_profit([res, res2], ['Equal Weight Strategy', 'Anomaly-Rebalance Strategy'])
    # plt.show()

    if finally_diff > max_diff:
      max_diff = finally_diff
      idx_max_diff = i

In [40]:
max_diff

1.7662203845819011

In [41]:
idx_max_diff

96

In [42]:
th = ths[idx_max_diff]

anomalies_list = []

for X_t in test_dataloader:
  R_T = get_return_from_batch(X_t)
  R_T_hat = model(X_t)

  err = (R_T_hat - R_T).squeeze(1)
  anomalies = err >= th
  anomalies_list.append(anomalies)

anomalies_detection = torch.concat(anomalies_list)

with open(f'./numpyarray/anomalies_q_opimal_test.npy', 'wb') as f:
  np.save(f, anomalies_detection.numpy())

In [43]:
th

tensor([0.0453, 0.1074, 0.4233, 0.0952, 0.0420], grad_fn=<SqueezeBackward4>)