In [1]:
%load_ext autoreload
%autoreload 2

import torch
from utility.attentionAutoEncoder import AttentionAutoEncoder, concate_data, AttentionDataset, get_return_from_batch
from torch.utils.data import DataLoader
import numpy as np

In [2]:
model = AttentionAutoEncoder(3, 5, 2)

In [3]:
input_data = torch.randn(3, 5)
input_data

tensor([[-0.2624,  1.0378,  0.0998, -0.6758, -0.0736],
        [ 1.2901,  0.5918,  0.5332,  1.5005,  0.1368],
        [-1.2476, -1.4252, -0.4980, -1.4543, -1.9176]])

In [4]:
model(input_data)

tensor([[ 2.4396, -3.3849, -0.7749, -3.5856, -0.2195]], grad_fn=<MmBackward0>)

In [5]:
import pandas as pd
from utility.data_processing import set_time_data

In [6]:
dfs = dict()
for ticker in ['btc', 'eth', 'doge', 'xrp', 'ada']:
  currency_metrics = pd.read_csv(f'../data/{ticker}_metrics_5_years.csv')
  set_currency_metrics = set_time_data(currency_metrics)
  dfs[ticker] = set_currency_metrics

In [7]:
res, test_res = concate_data(dfs, day_split='2023-12-31')

In [8]:
res.shape

torch.Size([3, 5, 1458])

In [9]:
X_t = res[:, :, 0]

In [10]:
model(X_t)

tensor([[ 0.8744, -1.0539, -0.5060, -0.6572, -0.0760]], grad_fn=<MmBackward0>)

In [11]:
X_t.shape[-1]

5

In [12]:
# Create DataSet
training_dataset = AttentionDataset(res)
test_dataset = AttentionDataset(test_res)

# Create DataLoader
training_dataloader = DataLoader(training_dataset, batch_size=50, shuffle=False, num_workers=0)
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0)

# Iterate through DataLoader
for batch in training_dataloader:
    print(batch.shape)  # Output: torch.Size([10, 5])
    break  # To print only one batch

torch.Size([50, 3, 5])


In [13]:
# get_return_from_batch(batch)

In [14]:
# model(batch)

In [15]:
import torch.nn as nn
import torch.optim as optim

In [16]:
criterion = nn.MSELoss()  # Mean Squared Error for reconstruction
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [17]:
num_epochs = 50

for epoch in range(num_epochs):
    total_loss = 0
    for X_t in training_dataloader:
        R_T = get_return_from_batch(X_t)
        # Forward pass
        R_T_hat = model(X_t)
        loss = criterion(R_T_hat, R_T)
        
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    avg_loss = total_loss / len(training_dataloader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

Epoch [1/50], Loss: 1.1023
Epoch [2/50], Loss: 0.7778
Epoch [3/50], Loss: 0.5659
Epoch [4/50], Loss: 0.4195
Epoch [5/50], Loss: 0.3157
Epoch [6/50], Loss: 0.2407
Epoch [7/50], Loss: 0.1856
Epoch [8/50], Loss: 0.1446
Epoch [9/50], Loss: 0.1139
Epoch [10/50], Loss: 0.0908
Epoch [11/50], Loss: 0.0733
Epoch [12/50], Loss: 0.0600
Epoch [13/50], Loss: 0.0500
Epoch [14/50], Loss: 0.0423
Epoch [15/50], Loss: 0.0364
Epoch [16/50], Loss: 0.0318
Epoch [17/50], Loss: 0.0283
Epoch [18/50], Loss: 0.0255
Epoch [19/50], Loss: 0.0233
Epoch [20/50], Loss: 0.0216
Epoch [21/50], Loss: 0.0201
Epoch [22/50], Loss: 0.0189
Epoch [23/50], Loss: 0.0178
Epoch [24/50], Loss: 0.0169
Epoch [25/50], Loss: 0.0160
Epoch [26/50], Loss: 0.0153
Epoch [27/50], Loss: 0.0146
Epoch [28/50], Loss: 0.0139
Epoch [29/50], Loss: 0.0133
Epoch [30/50], Loss: 0.0127
Epoch [31/50], Loss: 0.0122
Epoch [32/50], Loss: 0.0117
Epoch [33/50], Loss: 0.0112
Epoch [34/50], Loss: 0.0108
Epoch [35/50], Loss: 0.0103
Epoch [36/50], Loss: 0.0099
E

In [18]:
errors = []
for X_t in training_dataloader:
  R_T = get_return_from_batch(X_t)
  # Forward pass
  R_T_hat = model(X_t)
  err = (R_T_hat - R_T).squeeze(1) # try to find top R_T_hat
  errors.append(err)

In [19]:
stacked_error = torch.cat(errors, dim=0) 
stacked_error.shape

torch.Size([1458, 5])

In [20]:
q90 = torch.quantile(stacked_error, 0.90, dim=0)
q90

tensor([0.0829, 0.0155, 0.0595, 0.0357, 0.0467], grad_fn=<SqueezeBackward4>)

In [21]:
anomalies_list = []
returns_list = []

for X_t in test_dataloader:
    R_T = get_return_from_batch(X_t)
    R_T_hat = model(X_t)

    err = (R_T_hat - R_T).squeeze(1)
    anomalies = err >= q90
    anomalies_list.append(anomalies)
    returns_list.append(R_T)

In [22]:
anomalies_detection = torch.concat(anomalies_list)
all_returns = torch.concat(returns_list).squeeze(1)

In [23]:
anomalies_detection.sum(dim=0)

tensor([26, 39, 45, 38, 23])

In [24]:
anomalies_detection.numpy()

array([[False, False, False, False, False],
       [False, False, False, False, False],
       [False,  True,  True, False,  True],
       ...,
       [False, False, False,  True, False],
       [False, False,  True, False, False],
       [False, False, False, False, False]])

In [25]:
all_returns.numpy()

array([[ 0.04248666,  0.02955466,  0.02979293,  0.02624824,  0.05020779],
       [ 0.02004069,  0.00433265, -0.00832219, -0.00529069, -0.02655358],
       [-0.04943179, -0.0658676 , -0.107687  , -0.07494581, -0.08653914],
       ...,
       [-0.01766928, -0.01487776, -0.03122776, -0.04329219, -0.03598882],
       [-0.0100486 ,  0.00254795, -0.00297066, -0.01699607,  0.00157149],
       [ 0.00932239, -0.00688175,  0.00836564,  0.01134853, -0.01749099]],
      dtype=float32)

In [26]:
with open('./numpyarray/anomalies_test.npy', 'wb') as f:
    np.save(f, anomalies_detection.numpy())

In [27]:
with open('./numpyarray/returns_test.npy', 'wb') as f:
    np.save(f, all_returns.numpy())

In [28]:
q10 = torch.quantile(stacked_error, 0.10, dim=0)
q10

tensor([-0.0344, -0.1058, -0.0659, -0.1793, -0.0601],
       grad_fn=<SqueezeBackward4>)

In [29]:
# for X_t in all_returns:
#     R_T = get_return_from_batch(X_t)
#     R_T_hat = model(X_t)

#     err = (R_T_hat - R_T).squeeze(1)
#     anomalies = err >= q90
#     anomalies_list.append(anomalies)
#     returns_list.append(R_T)

In [35]:
for i in range(50, 100):
  t = 0.01 * i
  th = torch.quantile(stacked_error, t, dim=0)

  anomalies_list = []

  for X_t in test_dataloader:
    R_T = get_return_from_batch(X_t)
    R_T_hat = model(X_t)

    err = (R_T_hat - R_T).squeeze(1)
    anomalies = err >= th
    anomalies_list.append(anomalies)

  anomalies_detection = torch.concat(anomalies_list)
  
  with open(f'./numpyarray/anomalies_q{i}_test.npy', 'wb') as f:
    np.save(f, anomalies_detection.numpy())