In [1]:
# Data Manipulation and Preparation
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# PyTorch
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Evaluation metrics and visualization
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error

# Utility imports
import itertools
import time
import csv
import os

# Set up GPU if available, otherwise use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [None]:
# Load the preprocessed datasets
path_30min = ''
path_4h = ''
path_1d = ''

df_30min = pd.read_csv(path_30min)
df_4h = pd.read_csv(path_4h)
df_daily = pd.read_csv(path_1d)

all_dfs = [df_30min, df_4h, df_daily]

# Convert timestamp columns to datetime
for df in all_dfs:
    df['open_time'] = pd.to_datetime(df['open_time'])
    df['close_time'] = pd.to_datetime(df['close_time'])

# Define feature sets
price_features = ['close', 'high', 'low', 'volume', 'quote_vol', 'count', 'buy_base', 'buy_quote']
diff_features = ['close_diff', 'high_diff', 'low_diff']

In [None]:
# Define GRU Model
class GRUModel(nn.Module):
    def __init__(self, input_dim, gru_dim, dense_dim, output_dim, num_layers=1, dropout=0.0, activation_function=nn.ReLU):
        """
        Initializes the GRU model.
        Args:
        - input_dim: Number of input features.
        - gru_dim: Number of units in the GRU layer.
        - dense_dim: Number of units in the dense (fully connected) layer.
        - output_dim: Number of output features.
        - num_layers: Number of GRU layers.
        - dropout: Dropout rate (used if num_layers > 1).
        - activation_function: Activation function to use between dense layers.
        """
        super(GRUModel, self).__init__()
        self.gru = nn.GRU(input_dim, gru_dim, num_layers=num_layers, batch_first=True, dropout=dropout)
        self.fc1 = nn.Linear(gru_dim, dense_dim)
        self.activation = activation_function()
        self.fc2 = nn.Linear(dense_dim, output_dim)

    def forward(self, x):
        """
        Defines the forward pass of the model.
        Args:
        - x: Input tensor of shape (batch_size, sequence_length, input_dim).
        Returns:
        - Output tensor of shape (batch_size, output_dim).
        """
        x, _ = self.gru(x)
        x = x[:, -1, :]  # Take the last output of the GRU sequence
        x = self.fc1(x)
        x = self.activation(x)
        x = self.fc2(x)
        return x

In [None]:
# Function to create sequences for training the model
def create_sequences(data, target, window_size):
    """
    Splits the data into sequences for GRU training.
    Args:
    - data: The feature data (e.g., prices).
    - target: The target data (e.g., future prices).
    - window_size: Number of time steps in each sequence.
    Returns:
    - sequences: Array of sequences of feature data.
    - labels: Array of corresponding labels (targets).
    """
    sequences = []
    labels = []
    for i in range(len(data) - window_size):
        seq = data[i:i + window_size]
        label = target[i + window_size]
        sequences.append(seq)
        labels.append(label)
    return np.array(sequences), np.array(labels)

# Function to prepare data for training and testing
def prepare_data(df, target_column, window_size, feature_columns):
    """
    Prepares the data for training and testing the GRU model.
    Args:
    - df: The dataframe containing the data.
    - target_column: The name of the column to be predicted.
    - window_size: Number of time steps in each input sequence.
    - feature_columns: List of columns to use as features.
    Returns:
    - X_train_seq: Sequences for training.
    - X_test_seq: Sequences for testing.
    - y_train_seq: Corresponding labels for training sequences.
    - y_test_seq: Corresponding labels for testing sequences.
    - scaler_y: Scaler used for the target column.
    """
    X = df[feature_columns].values
    y = df[target_column].values.reshape(-1, 1)
    
    scaler_X = MinMaxScaler()
    X_scaled = scaler_X.fit_transform(X)

    scaler_y = MinMaxScaler()
    y_scaled = scaler_y.fit_transform(y)

    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, shuffle=False)
    X_train_seq, y_train_seq = create_sequences(X_train, y_train, window_size)
    X_test_seq, y_test_seq = create_sequences(X_test, y_test, window_size)

    return X_train_seq, X_test_seq, y_train_seq, y_test_seq, scaler_y

# Helper function to plot and save the training and validation loss over epochs
def plot_and_save_loss(training_loss, validation_loss, file_prefix, model_counter, start_epoch=3):
    """
    Plots and saves the training and validation loss over epochs.
    Args:
    - training_loss: List of training losses per epoch.
    - validation_loss: List of validation losses per epoch.
    - file_prefix: Prefix for the saved file names.
    - model_counter: Model identifier (used in file names).
    - start_epoch: Epoch to start plotting (useful to skip early high losses).
    """
    plt.figure(figsize=(10, 6))
    plt.plot(range(start_epoch, len(training_loss)), training_loss[start_epoch:], label='Training Loss')
    plt.plot(range(start_epoch, len(validation_loss)), validation_loss[start_epoch:], label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title(f'Model {model_counter} - Training and Validation Loss Over Epochs')
    plt.legend()
    plt.savefig(f'{file_prefix}_loss_plot_model_{model_counter}.png')
    plt.show()

    # Save losses to a CSV file
    loss_df = pd.DataFrame({
        "Epoch": range(len(training_loss)),
        "Training Loss": training_loss,
        "Validation Loss": validation_loss
    })
    loss_file_path = f'{file_prefix}_losses_model_{model_counter}.csv'
    loss_df.to_csv(loss_file_path, index=False)

# Function to save the trained model to a file
def save_model(model, file_path):
    """
    Saves the trained PyTorch model to the specified file path.
    Args:
    - model: The trained model.
    - file_path: Path where the model will be saved.
    """
    torch.save(model.state_dict(), file_path)

# Function to save predictions to CSV files for further analysis
def save_predictions(y_train, y_train_pred, y_test, y_test_pred, model_counter, file_prefix):
    """
    Saves the actual and predicted values for training and testing data to CSV files.
    Args:
    - y_train: Actual training labels.
    - y_train_pred: Predicted training labels.
    - y_test: Actual testing labels.
    - y_test_pred: Predicted testing labels.
    - model_counter: Model identifier (used in file names).
    - file_prefix: Prefix for the saved file names.
    """
    train_df = pd.DataFrame({"Actual": y_train.flatten(), f"Model_{model_counter}": y_train_pred.flatten()})
    test_df = pd.DataFrame({"Actual": y_test.flatten(), f"Model_{model_counter}": y_test_pred.flatten()})

    train_file_path = f"{file_prefix}_train_predictions_model_{model_counter}.csv"
    test_file_path = f"{file_prefix}_test_predictions_model_{model_counter}.csv"
    train_df.to_csv(train_file_path, index=False)
    test_df.to_csv(test_file_path, index=False)

# Main function to train and evaluate the GRU model
def train_and_evaluate(df, target_column, feature_columns, params, model_counter, file_prefix='results', use_early_stopping=False, plot_loss=False):
    """
    Trains and evaluates the GRU model using the provided parameters and data.
    Args:
    - df: DataFrame containing the data.
    - target_column: The name of the column to predict.
    - feature_columns: List of feature columns.
    - params: Dictionary of hyperparameters for the model.
    - model_counter: Identifier for the model (used in file names and logs).
    - file_prefix: Prefix for saving results and models.
    - use_early_stopping: Boolean flag to use early stopping during training.
    - plot_loss: Boolean flag to plot and save the loss over epochs.
    Returns:
    - result: Dictionary containing performance metrics and model details.
    """
    result = {}  # Initialize result to ensure it's always defined
    try:
        # Extract parameters
        window_size = params['window_size']
        gru_dim = params['gru_dim'] 
        dense_dim = params['dense_dim']
        num_layers = params['num_layers']
        dropout = params['dropout']
        lr = params['lr']
        batch_size = params['batch_size']
        num_epochs = params['num_epochs']
        optimizer_type = params['optimizer_type']
        patience = params['patience']
        activation_function = params['activation_function']

        # Clear CUDA cache
        torch.cuda.empty_cache()

        # Prepare data sequences for training and testing
        X_train_seq, X_test_seq, y_train_seq, y_test_seq, scaler_y = prepare_data(df, target_column, window_size, feature_columns)

        # Convert sequences to PyTorch tensors and move to the device (GPU)
        X_train_tensor = torch.tensor(X_train_seq, dtype=torch.float32).to(device)
        X_test_tensor = torch.tensor(X_test_seq, dtype=torch.float32).to(device)
        y_train_tensor = torch.tensor(y_train_seq, dtype=torch.float32).view(-1, 1).to(device)
        y_test_tensor = torch.tensor(y_test_seq, dtype=torch.float32).view(-1, 1).to(device)

        # Create DataLoader objects for training and testing
        train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
        test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
        train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
        test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

        # Initialize the GRU model with the specified parameters
        input_dim = X_train_tensor.shape[2]
        output_dim = 1
        model = GRUModel(input_dim, gru_dim, dense_dim, output_dim, num_layers, dropout, activation_function).to(device)

        # Define the loss function and the optimizer
        criterion = nn.MSELoss()
        optimizer = getattr(optim, optimizer_type)(model.parameters(), lr=lr)

        best_loss = float('inf')
        patience_counter = 0
        start_time = time.time()

        training_losses = []
        validation_losses = []

        # Training loop
        for epoch in range(num_epochs):
            model.train()
            epoch_train_loss = 0
            for inputs, labels in train_loader:
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                epoch_train_loss += loss.item()

            # Calculate average training loss for the epoch
            epoch_train_loss /= len(train_loader)
            training_losses.append(epoch_train_loss)

            # Evaluate the model on the validation (test) set
            model.eval()
            val_loss = 0
            with torch.no_grad():
                for inputs, labels in test_loader:
                    outputs = model(inputs)
                    val_loss += criterion(outputs, labels).item()

            # Calculate average validation loss for the epoch
            val_loss /= len(test_loader)
            validation_losses.append(val_loss)

        # Calculate total training time
        training_time = time.time() - start_time

        # Final evaluation on the training and testing data
        model.eval()
        evaluation_start_time = time.time()
        with torch.no_grad():
            train_predictions = model(X_train_tensor).cpu().numpy()
            test_predictions = model(X_test_tensor).cpu().numpy()

            # Inverse transform predictions and actual values back to the original scale
            train_predictions_inverse = scaler_y.inverse_transform(train_predictions)
            test_predictions_inverse = scaler_y.inverse_transform(test_predictions)
            y_train_inverse = scaler_y.inverse_transform(y_train_seq)
            y_test_inverse = scaler_y.inverse_transform(y_test_seq)

            # Calculate performance metrics
            train_mse = mean_squared_error(y_train_inverse, train_predictions_inverse)
            test_mse = mean_squared_error(y_test_inverse, test_predictions_inverse)
            train_mae = mean_absolute_error(y_train_inverse, train_predictions_inverse)
            test_mae = mean_absolute_error(y_test_inverse, test_predictions_inverse)
            train_rmse = np.sqrt(train_mse)
            test_rmse = np.sqrt(test_mse)
            train_r2 = r2_score(y_train_inverse, train_predictions_inverse)
            test_r2 = r2_score(y_test_inverse, test_predictions_inverse)
            train_mape = np.mean(np.abs((y_train_inverse - train_predictions_inverse) / y_train_inverse)) * 100
            test_mape = np.mean(np.abs((y_test_inverse - test_predictions_inverse) / y_test_inverse)) * 100
            train_directional_acc = np.mean(np.sign(y_train_inverse[1:] - y_train_inverse[:-1]) == np.sign(train_predictions_inverse[1:] - train_predictions_inverse[:-1]))
            test_directional_acc = np.mean(np.sign(y_test_inverse[1:] - y_test_inverse[:-1]) == np.sign(test_predictions_inverse[1:] - test_predictions_inverse[:-1]))

        evaluation_time = time.time() - evaluation_start_time

        # Store results in a dictionary
        result = {
            "window_size": window_size,
            "gru_dim": gru_dim,  # Changed from lstm_dim to gru_dim
            "num_layers": num_layers,
            "dense_dim": dense_dim,
            "dropout": dropout,
            "lr": lr,
            "batch_size": batch_size,
            "num_epochs": num_epochs,
            "optimizer_type": optimizer_type,
            "train_mse": train_mse,
            "test_mse": test_mse,
            "train_mae": train_mae,
            "test_mae": test_mae,
            "train_rmse": train_rmse,
            "test_rmse": test_rmse,
            "train_r2": train_r2,
            "test_r2": test_r2,
            "train_mape": train_mape,
            "test_mape": test_mape,
            "train_directional_acc": train_directional_acc,
            "test_directional_acc": test_directional_acc,
            "training_time": training_time,
            "evaluation_time": evaluation_time,
            "patience": patience,
            "activation_function": activation_function.__name__
        }

        # Save the result to a CSV file
        with open(f'{file_prefix}_{target_column}.csv', 'a', newline='') as f:
            writer = csv.DictWriter(f, fieldnames=result.keys())
            if f.tell() == 0:
                writer.writeheader()  # Write header only if file is empty
            writer.writerow(result)

        # Save predictions for further analysis
        save_predictions(y_train_inverse, train_predictions_inverse, y_test_inverse, test_predictions_inverse, model_counter, file_prefix)

        # Plot and save loss over epochs if specified
        if plot_loss:
            plot_and_save_loss(training_losses, validation_losses, file_prefix, model_counter)

        # Print a summary of the results
        print(f"Results: Train MAE: {train_mae:.4f}, Test MAE: {test_mae:.4f}")
        print(f"Train Directional Accuracy: {train_directional_acc:.4f}, Test Directional Accuracy: {test_directional_acc:.4f}")
        print(f"Training Time: {training_time:.4f} seconds, Evaluation Time: {evaluation_time:.4f} seconds\n")

    except Exception as e:
        # Handle exceptions and store error details
        print(f"An error occurred: {e}")
        result = {
            "window_size": window_size,
            "gru_dim": gru_dim,
            "num_layers": num_layers,
            "dense_dim": dense_dim,
            "dropout": dropout,
            "lr": lr,
            "batch_size": batch_size,
            "num_epochs": num_epochs,
            "optimizer_type": optimizer_type,
            "error": str(e)
        }

    return result

In [None]:
# Define the hyperparameter spaces for grid search
params_daily = {
    'window_size': [1],
    'gru_dim': [60, 80, 120],
    'dense_dim': [60, 80, 120],
    'num_layers': [1],
    'dropout': [0.0],
    'lr': [0.0001],
    'num_epochs': [40, 50, 60, 70],
    'batch_size': [64],
    'optimizer_type': ['Adam'],
    'patience': [24],
    'activation_function': [nn.Tanh]
}

params_30min = {
    'window_size': [48],
    'gru_dim': [80, 90, 100],
    'dense_dim': [90, 100, 110],
    'num_layers': [1],
    'dropout': [0.0],
    'lr': [0.0001],
    'num_epochs': [60, 70, 80, 90],
    'batch_size': [48],
    'optimizer_type': ['Adam'],
    'patience': [24],
    'activation_function': [nn.Tanh]
}

params_4h = {
    'window_size': [6],
    'gru_dim': [100, 120, 140],
    'dense_dim': [90, 100, 110],
    'num_layers': [1],
    'dropout': [0.0],
    'lr': [0.00001],
    'num_epochs': [40, 50, 60, 70],
    'batch_size': [64],
    'optimizer_type': ['Adam'],
    'patience': [24],
    'activation_function': [nn.Tanh]
}

In [None]:
run_grid_search(df_daily, 'df_daily', params_4h)
run_grid_search(df_4h, 'df 4h', params_4h)
run_grid_search(df_30min, 'df_30min', params_4h)