In [56]:
from processing_utils import DataPrepare
from gru_training_utils import TimeSeriesDataset, mape

import polars as pl
import pandas as pd
from datetime import datetime
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
import math


In [41]:
df: pl.DataFrame = pl.read_parquet(r'C:\Users\310\Desktop\Progects_Py\data\microstructure_price_prediction_data\dfs\2024-06-29 20-00-00_2024-07-01 00-00-00_delta_0-00-10_return_5_sec.parquet')
target_var: str = "log_return"
cat_features = ['currency_pair']

In [42]:
data = DataPrepare(df)

In [43]:
df_train, df_test = data.train_test_split()

Train test ratio is 0.8
Train len for DOGEUSDT is 7616
Test len for DOGEUSDT is 1904
Train len for AVAXUSDT is 6586
Test len for AVAXUSDT is 1647


In [45]:
dataset = TimeSeriesDataset(df=df_train, target_var=target_var)

In [62]:
class GRUModel(nn.Module):
    def __init__(self, input_features: int, num_currency_pairs: int,
                 hidden_size: int, num_layers: int, dropout: float):
        """
        input_features: number of features per currency pair
        num_currency_pairs: how many currency pairs we have
        hidden_size: number of units in GRU hidden layer(s)
        num_layers: number of stacked GRU layers
        dropout: dropout rate between GRU layers (if num_layers > 1)
        """
        super(GRUModel, self).__init__()
        self.num_currency_pairs = num_currency_pairs
        self.input_features = input_features
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # Input size to GRU = input_features * num_currency_pairs
        input_size = input_features * num_currency_pairs

        # Define GRU
        # batch_first=True means input is (batch, seq_len, input_size)
        self.gru = nn.GRU(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            dropout=dropout if num_layers > 1 else 0.0,
            batch_first=True
        )

        # Final fully connected layer to map hidden state to num_currency_pairs predictions
        self.fc = nn.Linear(hidden_size, num_currency_pairs)

    def forward(self, X, h=None):
        # X shape: (batch, seq_length, features, currency_pairs)
        B, F, C, L = X.shape
        #[32, 11, 2, 10]
        # From (B, F, C, L) to (B, L, F, C)
        X = X.permute(0, 3, 1, 2)  # 0->0, 3->1, 1->2, 2->3
        # Now X is (32, 10, 11, 2), which means:
        # B=32, L=10, F=11, C=2
        
        X = X.reshape(B, L, F*C)   # (32, 10, 22)
        # Pass through GRU
        # out: (B, L, hidden_size)
        # If h is provided, use it as initial hidden state
        out, h_n = self.gru(X, h)  # out: (B, L, hidden_size)
        last_out = out[:, -1, :]   # (B, hidden_size)
        preds = self.fc(last_out)  # (B, num_currency_pairs)
        return preds, h_n


In [63]:
def train_model(
    train_dataset,
    test_dataset,
    input_features,
    num_currency_pairs,
    hidden_size=128,
    num_layers=2,
    dropout=0.2,
    learning_rate=1e-3,
    batch_size=32,
    num_epochs=10,
    log_dir="./logs"
):
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, drop_last=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, drop_last=True)

    model = GRUModel(input_features, num_currency_pairs, hidden_size, num_layers, dropout)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    writer = SummaryWriter(log_dir=log_dir)

    global_step = 0
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        h = None
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            preds, h = model(X_batch, h)
            h = h.detach()

            loss = criterion(preds, y_batch)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            writer.add_scalar("Train/MSE", loss.item(), global_step)
            global_step += 1

        avg_train_mse = train_loss / len(train_loader)
        avg_train_rmse = math.sqrt(avg_train_mse)
        
        # Validation
        model.eval()
        val_loss = 0.0
        val_steps = 0
        all_preds = []
        all_targets = []
        with torch.no_grad():
            for X_batch, y_batch in test_loader:
                preds, _ = model(X_batch, None)
                loss = criterion(preds, y_batch)
                val_loss += loss.item()
                all_preds.append(preds)
                all_targets.append(y_batch)
                val_steps += 1

        all_preds = torch.cat(all_preds, dim=0)
        all_targets = torch.cat(all_targets, dim=0)
        avg_val_mse = val_loss / val_steps
        avg_val_rmse = math.sqrt(avg_val_mse)
        current_mape = mape(all_preds, all_targets)

        writer.add_scalar("Val/RMSE", avg_val_rmse, epoch)
        writer.add_scalar("Val/MAPE", current_mape, epoch)

        print(f"Epoch {epoch+1}/{num_epochs}, "
              f"Train RMSE: {avg_train_rmse:.6f}, "
              f"Val RMSE: {avg_val_rmse:.6f}, "
              f"Val MAPE: {current_mape:.2f}%")

        # Every 5 epochs: plot predictions vs. true values
        if (epoch + 1) % 5 == 0:
            # Let's take the first batch from test_loader again to plot
            X_plot, y_plot = next(iter(test_loader))
            model.eval()
            with torch.no_grad():
                p_plot, _ = model(X_plot, None)

            # Convert to numpy
            p_np = p_plot.cpu().numpy()
            t_np = y_plot.cpu().numpy()

            # We'll plot the predictions for the first currency pair only
            currency_pair_idx = 0
            fig, ax = plt.subplots(figsize=(8, 4))
            ax.plot(t_np[:, currency_pair_idx], label='True', marker='o')
            ax.plot(p_np[:, currency_pair_idx], label='Pred', marker='x')
            ax.set_title(f'Epoch {epoch+1} Predictions vs True (CP index {currency_pair_idx})')
            ax.set_xlabel('Sample index in batch')
            ax.set_ylabel('Value')
            ax.legend()

            # Log the figure to TensorBoard
            writer.add_figure("Val/Predictions", fig, epoch)
            plt.close(fig)

    writer.close()

In [61]:

train_dataset = TimeSeriesDataset(df=df_train, target_var="log_return", cat_features=['currency_pair'], seq_length=10)
test_dataset = TimeSeriesDataset(df=df_test, target_var="log_return", cat_features=['currency_pair'], seq_length=10)

input_features = train_dataset.num_features
num_currency_pairs = train_dataset.num_currency_pairs

train_model(
    train_dataset,
    test_dataset,
    input_features,
    num_currency_pairs,
    hidden_size=128,
    num_layers=3,
    dropout=0.2,
    learning_rate=1e-3,
    batch_size=32,
    num_epochs=10,
    log_dir="./logs"
)

Epoch 1/10, Train RMSE: 0.014236, Val RMSE: 0.001804, Val MAPE: 79054.09%
Epoch 2/10, Train RMSE: 0.002213, Val RMSE: 0.001221, Val MAPE: 69814.61%
Epoch 3/10, Train RMSE: 0.001718, Val RMSE: 0.001356, Val MAPE: 69432.66%
Epoch 4/10, Train RMSE: 0.001240, Val RMSE: 0.000955, Val MAPE: 40507.09%
Epoch 5/10, Train RMSE: 0.000921, Val RMSE: 0.001322, Val MAPE: 60986.18%
Epoch 6/10, Train RMSE: 0.000948, Val RMSE: 0.001769, Val MAPE: 98906.95%
Epoch 7/10, Train RMSE: 0.001115, Val RMSE: 0.002026, Val MAPE: 118303.76%
Epoch 8/10, Train RMSE: 0.000795, Val RMSE: 0.000315, Val MAPE: 9614.21%
Epoch 9/10, Train RMSE: 0.000494, Val RMSE: 0.000411, Val MAPE: 19068.13%
Epoch 10/10, Train RMSE: 0.001810, Val RMSE: 0.000532, Val MAPE: 27054.94%
