In [None]:
import nbimporter
from sklearn.metrics import accuracy_score
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from typing import Tuple, Callable
from skopt import gp_minimize
from skopt.space import Integer, Categorical, Real
from dataloader import load_dataset, get_cryptoquant_data, combineTA_to_OHLCV
from sklearn.metrics import r2_score
import torch.nn as nn
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import numpy as np

In [7]:
data = get_cryptoquant_data()
dataf = combineTA_to_OHLCV(data).process()
print(dataf.head(10))

                        close        volume       ema  rsi  MACD_12_26_9  \
datetime                                                                   
2019-04-14 02:00:00  0.325129  2.390760e+06       NaN  NaN           NaN   
2019-04-14 03:00:00  0.325212  2.035724e+06       NaN  NaN           NaN   
2019-04-14 04:00:00  0.325642  1.560139e+06       NaN  NaN           NaN   
2019-04-14 05:00:00  0.323081  2.632353e+06       NaN  NaN           NaN   
2019-04-14 06:00:00  0.322276  2.753541e+06       NaN  NaN           NaN   
2019-04-14 07:00:00  0.323365  1.710360e+06       NaN  NaN           NaN   
2019-04-14 08:00:00  0.323134  1.605113e+06       NaN  NaN           NaN   
2019-04-14 09:00:00  0.323954  2.177065e+06       NaN  NaN           NaN   
2019-04-14 10:00:00  0.325225  1.280122e+06       NaN  NaN           NaN   
2019-04-14 11:00:00  0.324816  1.051731e+06  0.324183  NaN           NaN   

                     MACDh_12_26_9  MACDs_12_26_9  bollinger_width  
datetime          

In [25]:
X_train, X_test_tensor, y_train_tensor, y_test_tensor, data_min, data_max, scaler = load_dataset()

In [9]:
print(f"X train shape:{X_train_tensor.shape}")
print(f"X test shape:{X_test_tensor.shape}")
print(f"y train shape:{y_train_tensor.shape}")
print(f"y test shape:{y_test_tensor.shape}")

X train shape:(36753, 20, 8)
X test shape:(15752, 20, 8)
y train shape:(36753, 8)
y test shape:(15752, 8)


In [10]:
# Create a function to time our experiments
from timeit import default_timer as timer
def print_train_time(start: float, end: float, device: torch.device = None):
  total_time = end - start
  print(f"Train time on {device}: {total_time:.3f} seconds")
  return total_time

In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class AttentionLSTM(nn.Module):
    def __init__(self, input_size: int, hidden_size: int, num_layers: int, dropout_prob: float = 0.2):
        super(AttentionLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # LSTM layer
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout_prob
        )

        # Attention mechanism
        self.attn_weight = nn.Linear(hidden_size, 1)

        # Output layer: now outputs 8 features
        self.fc = nn.Sequential(
            nn.Linear(hidden_size, input_size),  # output 8 features
            # nn.Sigmoid()  # Uncomment this ONLY if you want outputs in range [0, 1]
        )

        self.dropout = nn.Dropout(dropout_prob)

    def forward(self, x):
        # x: [batch_size, seq_len, input_size]
        batch_size = x.shape[0]

        # Initialize hidden and cell states
        h_0 = torch.zeros(self.num_layers, batch_size, self.hidden_size, device=x.device)
        c_0 = torch.zeros(self.num_layers, batch_size, self.hidden_size, device=x.device)

        # LSTM output
        lstm_out, _ = self.lstm(x, (h_0, c_0))  # [batch, seq_len, hidden_size]

        # Attention mechanism
        attn_scores = torch.tanh(self.attn_weight(lstm_out))  # [batch, seq_len, 1]
        attn_weights = F.softmax(attn_scores, dim=1)  # [batch, seq_len, 1]
        context_vector = torch.sum(attn_weights * lstm_out, dim=1)  # [batch, hidden_size]

        # Dropout + final output
        out = self.dropout(context_vector)
        out = self.fc(out)  # [batch, 8]
        return out



In [None]:
# Define the objective function for Bayesian Optimization
def objective(params):
    num_layers = int(params[0])
    hidden_size = int(params[1])
    learning_rate = params[2]
    dropout_prob = params[3]
    optimizer_name = params[4]
    num_epochs = params[5]

    # Instantiate the CNN-LSTM model with these hyperparameters
    model = AttentionLSTM(input_size=8, hidden_size=hidden_size, num_layers=num_layers, dropout_prob=dropout_prob)

    # Define the loss function and optimizer
    loss_fn = nn.MSELoss()

    # Define the optimizer
    if optimizer_name == 'adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    elif optimizer_name == 'sgd':
        optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

    # Training loop 
    model.train()
    num_epochs = num_epochs
    batch_size = 30

    for epoch in range(num_epochs):
        for i in range(0, len(X_train_tensor), batch_size):
            inputs = X_train_tensor[i:i+batch_size]
            labels = y_train_tensor[i:i+batch_size]

            # Zero the gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)
            loss = loss_fn(outputs, labels)

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

    # Validation step
    model.eval()
    with torch.inference_mode():
        val_outputs = model(X_test_tensor)
        val_loss = loss_fn(val_outputs, y_test_tensor)

    return val_loss.item()


In [13]:
torch.manual_seed(42)
# Instantiate a sample model for test
model = AttentionLSTM(input_size=8, hidden_size=33, num_layers=3, dropout_prob=0.02)

In [14]:
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr= 0.0003)

In [15]:
def regression_r2(y_true: torch.Tensor, y_pred: torch.Tensor) -> torch.Tensor:
    y_true_np = y_true.cpu().numpy()
    y_pred_np = y_pred.cpu().numpy()

    # Compute R² for each feature separately and average
    r2_scores = [r2_score(y_true_np[:, i], y_pred_np[:, i]) for i in range(y_true_np.shape[1])]
    return torch.tensor(sum(r2_scores) / len(r2_scores))


In [16]:
import torch
from typing import Tuple, Callable
import matplotlib.pyplot as plt


def recursive_forecast(
    model: torch.nn.Module,
    initial_input: torch.Tensor,
    forecast_steps: int,
    lookback: int,
    device: torch.device
) -> torch.Tensor:
    """
    Recursively forecasts future multi-feature values using the trained LSTM model.

    Args:
        model: Trained LSTM model
        initial_input: Tensor of shape [1, lookback, num_features]
        forecast_steps: Number of future steps to predict
        lookback: Length of input window
        device: CPU or CUDA

    Returns:
        forecast: Tensor of shape [forecast_steps, output_dim]
    """
    model.eval()
    forecast = []

    current_input = initial_input.clone().to(device)

    with torch.inference_mode():
        for _ in range(forecast_steps):
            pred = model(current_input)  # [1, output_dim] -> [1, 8]
            forecast.append(pred.squeeze(0))  # [8]

            # Replace last step with predicted features
            new_step = pred.unsqueeze(1)  # [1, 1, 8]
            current_input = torch.cat([current_input[:, 1:], new_step], dim=1)  # slide window

    return torch.stack(forecast)  # [forecast_steps, 8]


def evaluate_forecast(
    forecast: torch.Tensor,
    ground_truth: torch.Tensor,
    loss_fn: torch.nn.Module,
    metrics_fn: Callable[[torch.Tensor, torch.Tensor], float] = None
) -> Tuple[float, float]:
    """
    Compares forecast with ground truth and computes evaluation metrics.

    Args:
        forecast: Tensor of predicted values [forecast_steps, output_dim]
        ground_truth: Tensor of true values [forecast_steps, output_dim]
        loss_fn: Loss function (e.g. MSE)
        metrics_fn: Optional function to compute additional metrics like R^2

    Returns:
        loss_value, metrics_value
    """
    loss_value = loss_fn(forecast, ground_truth).item()
    metrics_value = metrics_fn(forecast, ground_truth).item() if metrics_fn else None
    return loss_value, metrics_value



In [17]:
from sklearn.metrics import r2_score
import torch.nn as nn

def run_training_and_testing(
    model: torch.nn.Module,
    X_train: torch.Tensor,
    y_train: torch.Tensor,
    X_test: torch.Tensor,
    y_test: torch.Tensor,
    lookback: int,
    device: torch.device,
    epochs: int = 100,
    lr: float = 0.001,
):
    
    X_train = torch.tensor(X_train, dtype=torch.float32)
    y_train = torch.tensor(y_train, dtype=torch.float32)
    X_test = torch.tensor(X_test, dtype=torch.float32)
    y_test = torch.tensor(y_test, dtype=torch.float32)
    
    # Define loss and optimizer
    loss_fn = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)


    # Begin training loop
    for epoch in tqdm(range(1, epochs + 1)):
        model.train()

        # Forward pass
        y_pred_train = model(X_train)
        loss = loss_fn(y_pred_train, y_train)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Evaluate every 20 epochs
        if epoch % 20 == 0 or epoch == epochs:
            # Forecast using recursive strategy
            initial_input = X_test[0].unsqueeze(0)  # [1, lookback, features]
            forecast_steps = len(y_test)
            y_pred_test = recursive_forecast(model, initial_input, forecast_steps, lookback, device)

            # Evaluate y_pred_test
            test_loss, test_r2 = evaluate_forecast(y_pred_test, y_test[:forecast_steps], loss_fn, regression_r2)

            print(f"Epoch [{epoch}/{epochs}]")
            print(f"  Train Loss: {loss.item():.5f}")
            print(f"  Test Loss:  {test_loss:.5f} | R² Score: {test_r2:.4f}")

    return model


In [18]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Measure time
from timeit import default_timer as timer
train_time_start_model_0 = timer()

trained_model = run_training_and_testing(
    model=AttentionLSTM(input_size=8, hidden_size=64, num_layers=2),
    X_train=X_train_tensor,
    y_train=y_train_tensor,
    X_test=X_test_tensor,
    y_test=y_test_tensor,
    lookback=20,
    device=device,
    epochs=100
)

train_time_end_model_0 = timer()
total_train_time_model_0 = print_train_time(start=train_time_start_model_0,
                                            end=train_time_end_model_0,
                                            device=device)


  0%|          | 0/100 [00:00<?, ?it/s]

Epoch [20/100]
  Train Loss: 0.01016
  Test Loss:  0.02431 | R² Score: -307882575.8647
Epoch [40/100]
  Train Loss: 0.00706
  Test Loss:  0.02293 | R² Score: -36042974.5470
Epoch [60/100]
  Train Loss: 0.00630
  Test Loss:  0.02263 | R² Score: -122572244.8929
Epoch [80/100]
  Train Loss: 0.00598
  Test Loss:  0.02326 | R² Score: -347463083.5751
Epoch [100/100]
  Train Loss: 0.00561
  Test Loss:  0.02317 | R² Score: -144723069.6040
Train time on cpu: 500.556 seconds


In [24]:
scaler1=scaler

In [None]:
def denormalize(data: torch.Tensor, scaler:MinMaxScaler) -> np.ndarray:
    """
    Denormalizes a tensor using the given scaler.
    
    Args:
        data: Tensor of shape [N, features]
        scaler: Fitted sklearn MinMaxScaler
    
    Returns:
        Denormalized NumPy array
    """
    return scaler.inverse_transform(data.cpu().numpy())



NameError: name 'np' is not defined

In [None]:
# Denormalize true and predicted values
y_train_true_denorm = denormalize(y_train_tensor, scaler)
y_train_pred_denorm = denormalize(y_train_pred, scaler)

y_test_true_denorm = denormalize(y_test_tensor, scaler)
y_test_pred_denorm = denormalize(y_test_pred, scaler)


In [None]:
def plot_predictions(true_vals, predicted_vals, title='Prediction vs Actual', feature_idx=0, dataset_type='Train'):
    plt.figure(figsize=(12, 5))
    plt.plot(true_vals[:, feature_idx], label='True', color='blue')
    plt.plot(predicted_vals[:, feature_idx], label='Predicted', color='red', alpha=0.7)
    plt.title(f'{title} - {dataset_type} Set (Feature {feature_idx})')
    plt.xlabel('Time step')
    plt.ylabel('Price')
    plt.legend()
    plt.grid(True)
    plt.show()

# Example: Plot for 'close' price (assuming it's feature 0)
plot_predictions(y_train_true_denorm, y_train_pred_denorm, dataset_type='Train', feature_idx=0)
plot_predictions(y_test_true_denorm, y_test_pred_denorm, dataset_type='Test', feature_idx=0)
