In [14]:
import cvxpy as cp
import numpy as np

# Risk functions module
#
# This module defines the financial risk measures to be used in the optimization layer of the E2E
# problem.
#


def p_var(z: cp.Expression, c: float, x: np.ndarray) -> cp.Expression:
    """
    Compute the squared error for the given input.

    :param z: A cvxpy expression (decision variable)
    :param c: A constant threshold or target value
    :param x: A numpy array (weights or features)
    :return: The squared error expression
    """
    return cp.square(x @ z - c)


def p_mad(z: cp.Expression, c: float, x: np.ndarray) -> cp.Expression:
    """
    Compute the mean absolute deviation for the given input.

    :param z: A cvxpy expression (decision variable)
    :param c: A constant threshold or target value
    :param x: A numpy array (weights or features)
    :return: The absolute deviation expression
    """
    return cp.abs(x @ z - c)


# Define test data
z = cp.Variable(3)  # Decision variable (portfolio weights)
c = 0.02  # Centering parameter (expected return)
x = np.array(
    [
        [0.05, 0.02, -0.01],  # Realized returns for multiple scenarios
        [0.03, -0.01, 0.04],
        [-0.02, 0.01, 0.01],
    ]
)

# Test variance function (p_var)
print("\nTesting p_var...")
var_expr = p_var(z, c, x[0])  # Apply p_var to the first row of x
objective_var = cp.Minimize(var_expr)  # Minimize the variance
constraints = [
    cp.sum(z) == 1,
    z >= 0,
]  # Portfolio constraints: sum of weights = 1, weights >= 0
problem_var = cp.Problem(objective_var, constraints)
var_opt_value = problem_var.solve()

# Output results for variance minimization
print("Optimized portfolio weights (Variance):", z.value)
print("Variance objective value:", var_opt_value)

# Reinitialize decision variable for MAD problem
z = cp.Variable(3)

# Test MAD function (p_mad)
print("\nTesting p_mad...")
mad_expr = p_mad(z, c, x[0])  # Apply p_mad to the first row of x
objective_mad = cp.Minimize(mad_expr)  # Minimize the MAD
problem_mad = cp.Problem(objective_mad, constraints)
mad_opt_value = problem_mad.solve()

# Output results for MAD minimization
print("Optimized portfolio weights (MAD):", z.value)
print("MAD objective value:", mad_opt_value)



Testing p_var...
Optimized portfolio weights (Variance): [0.33333333 0.33333333 0.33333333]
Variance objective value: 0.0

Testing p_mad...
Optimized portfolio weights (MAD): [ 0.13355262  0.33328474 -0.66566742]
MAD objective value: 3.469446951953614e-18


In [15]:
import torch
from torch import Tensor


# Performance loss functions with type hints and improved comments
def single_period_loss(z_star: Tensor, y_perf: Tensor) -> Tensor:
    """
    Calculate the single-period loss based on the out-of-sample portfolio return.

    This function computes the out-of-sample portfolio return for a given portfolio over the next
    time step. It computes the loss as the negative return since optimization typically focuses
    on minimizing the loss, and maximizing returns translates into minimizing negative returns.

    :param z_star: Tensor of shape (n_y, 1) representing the optimal portfolio weights.
    :param y_perf: Tensor of shape (perf_period, n_y) representing the realized returns.
    :return: A scalar tensor representing the realized return at the first time step (negative).
    """
    # Calculate the portfolio return for the first time step and negate it (since we want to minimize loss)
    return -y_perf[0] @ z_star


def single_period_over_var_loss(z_star: Tensor, y_perf: Tensor) -> Tensor:
    """
    Calculate the loss as the portfolio return divided by the portfolio's volatility.

    This function computes the portfolio return at the first time step and divides it by the
    realized volatility (standard deviation) of the portfolio returns over the performance period.
    This provides a return-over-risk measure, which is often used in portfolio analysis.

    :param z_star: Tensor of shape (n_y, 1) representing the optimal portfolio weights.
    :param y_perf: Tensor of shape (perf_period, n_y) representing the realized returns.
    :return: A scalar tensor representing the return over realized volatility (negative).
    """
    # Calculate the portfolio returns over the entire performance period
    portfolio_returns = y_perf @ z_star
    # Calculate the standard deviation (volatility) of the portfolio returns, adding epsilon for numerical stability
    volatility = torch.std(portfolio_returns, unbiased=True) + 1e-6
    # Calculate the return at the first time step and divide by the volatility, then negate for loss
    return -portfolio_returns[0] / volatility


def sharpe_loss(z_star: Tensor, y_perf: Tensor) -> Tensor:
    """
    Calculate the loss based on the Sharpe ratio over a performance period.

    This function computes a simplified Sharpe ratio, which is the ratio of the mean portfolio
    return to its standard deviation (volatility) over the performance period. The loss is defined
    as the negative Sharpe ratio to allow for minimization.

    :param z_star: Tensor of shape (n_y, 1) representing the optimal portfolio weights.
    :param y_perf: Tensor of shape (perf_period, n_y) representing the realized returns.
    :return: A scalar tensor representing the negative Sharpe ratio.
    """
    # Calculate the portfolio returns over the entire performance period
    portfolio_returns = y_perf @ z_star
    # Calculate the mean return of the portfolio
    mean_return = torch.mean(portfolio_returns)
    # Calculate the standard deviation (volatility) of the portfolio returns, adding epsilon for numerical stability
    volatility = torch.std(portfolio_returns, unbiased=True) + 1e-6
    # Calculate the Sharpe ratio and negate it for loss
    return -mean_return / volatility


if __name__ == "__main__":
    # Example portfolio weights (3 assets)
    z_star = torch.tensor([0.3, 0.5, 0.2])
    # Realized returns for 3 assets over 3 periods
    y_perf = torch.tensor(
        [
            [0.01, 0.02, -0.01],
            [0.03, -0.01, 0.04],
            [0.02, 0.01, 0.01],
        ]
    )

    # Test the single-period loss function
    print("Testing single_period_loss...")
    loss_sp = single_period_loss(z_star, y_perf)
    print(f"Single period loss: {loss_sp.item()}")

    # Test the single-period-over-volatility loss function
    print("\nTesting single_period_over_var_loss...")
    loss_sp_var = single_period_over_var_loss(z_star, y_perf)
    print(f"Single period loss over volatility: {loss_sp_var.item()}")

    # Test the Sharpe ratio loss function
    print("\nTesting sharpe_loss...")
    loss_sharpe = sharpe_loss(z_star, y_perf)
    print(f"Sharpe ratio loss: {loss_sharpe.item()}")

Testing single_period_loss...
Single period loss: -0.010999999940395355

Testing single_period_over_var_loss...
Single period loss over volatility: -10.989008903503418

Testing sharpe_loss...
Sharpe ratio loss: -11.988011360168457


In [16]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset
from typing import Tuple

####################################################################################################
# SlidingWindow Dataset to index data using a sliding window
####################################################################################################


class SlidingWindow(Dataset):
    """Dataset class for creating a sliding window from time series data."""

    def __init__(
        self,
        X: pd.DataFrame,
        Y: pd.DataFrame,
        n_obs: int,
        perf_period: int,
        dtype: torch.dtype = torch.float32,
        device: torch.device = torch.device("cpu"),
    ) -> None:
        """
        Initialize the SlidingWindow dataset.

        :param X: DataFrame containing the complete feature dataset.
        :param Y: DataFrame containing the complete asset return dataset.
        :param n_obs: Number of observations in the sliding window.
        :param perf_period: Number of future observations used for out-of-sample performance evaluation.
        :param dtype: The desired data type for tensors (default is torch.float32).
        :param device: Device on which to place the tensors (e.g., 'cpu' or 'cuda' for GPU).
        """
        self.X = torch.tensor(
            X.values, dtype=dtype, device=device
        )  # Convert feature dataset to tensor
        self.Y = torch.tensor(
            Y.values, dtype=dtype, device=device
        )  # Convert asset return dataset to tensor
        self.n_obs = n_obs  # Number of observations in the sliding window
        self.perf_period = (
            perf_period  # Number of future observations for performance evaluation
        )

    def __getitem__(
        self, index: int
    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
        """
        Retrieve a single window of data.

        :param index: Index of the sliding window.
        :return: Tuple (x, y, y_perf):
            x: Features window of shape (n_obs + 1, n_x).
            y: Realizations window of shape (n_obs, n_y).
            y_perf: Future performance window of shape (perf_period, n_y).
        """
        # Retrieve features for the sliding window (n_obs + 1 observations)
        x = self.X[index : index + self.n_obs + 1]
        # Retrieve asset returns for the sliding window (n_obs observations)
        y = self.Y[index : index + self.n_obs]
        # Retrieve future performance data (perf_period observations)
        y_perf = self.Y[index + self.n_obs : index + self.n_obs + self.perf_period]
        return (x, y, y_perf)

    def __len__(self) -> int:
        """
        Return the number of windows that can be created from the dataset.

        :return: Length of the dataset, considering the sliding windows.
        """
        return (
            len(self.X) - self.n_obs - self.perf_period
        )  # Total number of sliding windows available


####################################################################################################
# Backtest class to store out-of-sample results
####################################################################################################


class Backtest:
    """Class to store out-of-sample results for a backtest."""

    def __init__(self, len_test: int, n_y: int, dates: pd.DatetimeIndex) -> None:
        """
        Initialize the Backtest object.

        :param len_test: Number of scenarios in the out-of-sample evaluation period.
        :param n_y: Number of assets in the portfolio.
        :param dates: DatetimeIndex containing the corresponding dates.
        """
        self.weights = np.zeros(
            (len_test, n_y)
        )  # Initialize portfolio weights over time
        self.rets = np.zeros(
            len_test
        )  # Initialize realized portfolio returns over time
        self.dates = dates[
            -len_test:
        ]  # Keep only the dates for the out-of-sample period

    def stats(self) -> None:
        """
        Compute and store the cumulative returns, mean return, volatility, and Sharpe ratio.

        This method calculates key performance metrics of the portfolio, including:
        - Cumulative returns (Total Return Index), which show the total growth of the portfolio over time.
        - Annualized mean return, which is an estimate of the average return the portfolio would achieve per year.
        - Volatility, which measures the risk by calculating the standard deviation of returns.
        - Sharpe ratio, which indicates the risk-adjusted return of the portfolio.
        """
        # Calculate cumulative returns (Total Return Index)
        tri = np.cumprod(self.rets + 1)
        # Calculate the annualized mean return using the final cumulative return and the number of periods
        self.mean = (tri[-1]) ** (1 / len(tri)) - 1
        # Calculate the volatility (standard deviation) of the portfolio returns
        self.vol = np.std(self.rets)
        # Calculate the Sharpe ratio (mean return divided by volatility)
        self.sharpe = self.mean / self.vol
        # Create a DataFrame containing realized returns and cumulative returns, indexed by dates
        if len(self.dates) == len(self.rets):
            self.rets = pd.DataFrame(
                {"Date": self.dates, "rets": self.rets, "tri": tri}
            ).set_index("Date")
        else:
            raise ValueError("Length of dates and returns must be equal.")


####################################################################################################
# InSample class to store in-sample results
####################################################################################################


class InSample:
    """Class to store the in-sample results of neural network training."""

    def __init__(self) -> None:
        """
        Initialize the InSample object.
        """
        self.loss = []  # List to hold training losses
        self.gamma = []  # List to hold gamma values (hyperparameter)
        self.delta = []  # List to hold delta values (hyperparameter)
        self.val_loss = []  # List to hold validation losses (optional)

    def df(self) -> pd.DataFrame:
        """
        Return a DataFrame containing the training statistics.

        :return: DataFrame with columns representing different metrics during training.
        """
        # Return a DataFrame based on available data, adjusting columns accordingly
        if not self.delta and not self.val_loss:
            return pd.DataFrame(
                list(zip(self.loss, self.gamma)), columns=["loss", "gamma"]
            )
        elif not self.delta:
            return pd.DataFrame(
                list(zip(self.loss, self.val_loss, self.gamma)),
                columns=["loss", "val_loss", "gamma"],
            )
        elif not self.val_loss:
            return pd.DataFrame(
                list(zip(self.loss, self.gamma, self.delta)),
                columns=["loss", "gamma", "delta"],
            )
        else:
            return pd.DataFrame(
                list(zip(self.loss, self.val_loss, self.gamma, self.delta)),
                columns=["loss", "val_loss", "gamma", "delta"],
            )


####################################################################################################
# CrossVal class to store cross-validation results
####################################################################################################


class CrossVal:
    """Class to store cross-validation results of neural network training."""

    def __init__(self) -> None:
        """
        Initialize the CrossVal object.
        """
        self.lr = []  # List to hold learning rates
        self.epochs = []  # List to hold the number of epochs in each run
        self.val_loss = []  # List to hold validation losses

    def df(self) -> pd.DataFrame:
        """
        Return a DataFrame containing the cross-validation statistics.

        :return: DataFrame with learning rate, epochs, and validation loss.
        """
        # Create and return a DataFrame with learning rates, epochs, and validation losses
        return pd.DataFrame(
            list(zip(self.lr, self.epochs, self.val_loss)),
            columns=["lr", "epochs", "val_loss"],
        )


####################################################################################################
# Test code for the Backtest class
####################################################################################################

if __name__ == "__main__":
    # Example usage
    X = pd.DataFrame(
        np.random.randn(100, 3)
    )  # Create feature dataset with 100 samples and 3 features
    Y = pd.DataFrame(
        np.random.randn(100, 2)
    )  # Create asset return dataset with 100 samples and 2 assets
    dates = pd.date_range(start="2020-01-01", periods=100, freq="D")

    # Check if GPU is available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Initialize SlidingWindow with given parameters
    n_obs = 10
    perf_period = 5
    sliding_window = SlidingWindow(X, Y, n_obs, perf_period, device=device)

    # Fetch a sample window
    print("Testing SlidingWindow...")
    x, y, y_perf = sliding_window[0]
    print(f"x (features): {x.shape}")
    print(f"y (realizations): {y.shape}")
    print(f"y_perf (performance window): {y_perf.shape}")

    # Initialize Backtest with given parameters
    len_test = 30
    backtest_obj = Backtest(len_test=len_test, n_y=2, dates=dates)

    # Simulate some portfolio returns
    backtest_obj.rets = np.random.randn(len_test)

    print("\nTesting Backtest...")
    backtest_obj.stats()
    print(backtest_obj.rets.head())
    print(f"Mean return: {backtest_obj.mean:.4f}")
    print(f"Volatility: {backtest_obj.vol:.4f}")
    print(f"Sharpe ratio: {backtest_obj.sharpe:.4f}")

    # Test Backtest stats calculation
    print("\nTesting Backtest stats calculation...")
    backtest_obj.rets = np.array([0.05, -0.02, 0.03, 0.04, -0.01])
    backtest_obj.dates = dates[
        -len(backtest_obj.rets) :
    ]  # Adjust dates to match returns length
    backtest_obj.stats()
    print(backtest_obj.rets.head())
    print(f"Mean return: {backtest_obj.mean:.4f}")
    print(f"Volatility: {backtest_obj.vol:.4f}")
    print(f"Sharpe ratio: {backtest_obj.sharpe:.4f}")

Testing SlidingWindow...
x (features): torch.Size([11, 3])
y (realizations): torch.Size([10, 2])
y_perf (performance window): torch.Size([5, 2])

Testing Backtest...
                rets       tri
Date                          
2020-03-11 -1.961243 -0.961243
2020-03-12 -0.652503 -0.334029
2020-03-13 -1.149731  0.050015
2020-03-14  0.026874  0.051359
2020-03-15 -0.958301  0.002142
Mean return: nan
Volatility: 0.8065
Sharpe ratio: nan

Testing Backtest stats calculation...
            rets       tri
Date                      
2020-04-05  0.05  1.050000
2020-04-06 -0.02  1.029000
2020-04-07  0.03  1.059870
2020-04-08  0.04  1.102265
2020-04-09 -0.01  1.091242
Mean return: 0.0176
Volatility: 0.0279
Sharpe ratio: 0.6324


  self.mean = (tri[-1]) ** (1 / len(tri)) - 1


In [17]:
# DataLoad module
#
####################################################################################################
# Import libraries
####################################################################################################
import torch
import torch.nn as nn
import pandas as pd
import pandas_datareader as pdr
import numpy as np
from alpha_vantage.timeseries import TimeSeries
import time
import statsmodels.api as sm
from typing import List, Tuple, Optional


####################################################################################################
# TrainTest class
####################################################################################################
class TrainTest:
    def __init__(self, data: pd.DataFrame, n_obs: int, split: list[float]) -> None:
        """
        Object to hold the training, validation, and testing datasets.

        :param data: pandas DataFrame with time series data.
        :param n_obs: Number of observations per batch.
        :param split: List of ratios that control the partition of data into training, testing, and validation sets.
        """
        self.data: pd.DataFrame = data  # Store the input data as a DataFrame
        self.n_obs: int = n_obs  # Set the number of observations per batch
        self.split: list[float] = (
            split  # Set the split ratios for training, validation, and testing
        )

        n_obs_tot: int = self.data.shape[
            0
        ]  # Calculate the total number of observations in the dataset
        numel: np.ndarray = n_obs_tot * np.cumsum(
            split
        )  # Calculate the cumulative number of elements based on split ratios
        self.numel: list[int] = [
            round(i) for i in numel
        ]  # Round the cumulative elements to get the indices for splits

    def split_update(self, split: list[float]) -> None:
        """
        Update the list outlining the split ratio of training, validation, and testing datasets.

        :param split: List of ratios that control the partition of data into training, testing, and validation sets.
        """
        self.split: list[float] = (
            split  # Update the split ratios with the new list provided
        )
        n_obs_tot: int = self.data.shape[
            0
        ]  # Calculate the total number of observations in the dataset
        numel: np.ndarray = n_obs_tot * np.cumsum(
            split
        )  # Calculate the cumulative number of elements based on new split ratios
        self.numel: list[int] = [
            round(i) for i in numel
        ]  # Round the cumulative elements to get the indices for splits

    def train(self) -> pd.DataFrame:
        """
        Return the training subset of observations.

        :return: pandas DataFrame containing the training data subset.
        """
        return self.data[
            : self.numel[0]
        ]  # Return the data from the start up to the end of the training set

    def test(self) -> pd.DataFrame:
        """
        Return the test subset of observations.

        :return: pandas DataFrame containing the test data subset.
        """
        return self.data[
            self.numel[0] - self.n_obs : self.numel[1]
        ]  # Return the data for the test set, including overlap
if __name__ == "__main__":
    # Generate synthetic data for testing
    n_tot = 1000  # Total number of observations
    n_features = 5  # Number of features
    split_ratios = [0.7, 0.3]  # 70% training, 30% testing
    n_obs = 50  # Number of observations per batch

    # Create a synthetic DataFrame with random data
    data = pd.DataFrame(
        np.random.randn(n_tot, n_features),
        columns=[f"Feature_{i}" for i in range(n_features)],
    )

    # Initialize TrainTest object
    train_test_obj = TrainTest(data=data, n_obs=n_obs, split=split_ratios)

    # Test the training data split
    train_data = train_test_obj.train()
    print("Training Data:")
    print(train_data.head())
    print(f"Number of training observations: {len(train_data)}")

    # Test the test data split
    test_data = train_test_obj.test()
    print("\nTest Data:")
    print(test_data.head())
    print(f"Number of test observations: {len(test_data)}")

    # Update split ratios and test again
    new_split_ratios = [0.6, 0.4]  # Update split ratios
    train_test_obj.split_update(split=new_split_ratios)

    # Test the updated training data split
    updated_train_data = train_test_obj.train()
    print("\nUpdated Training Data:")
    print(updated_train_data.head())
    print(f"Number of updated training observations: {len(updated_train_data)}")

    # Test the updated test data split
    updated_test_data = train_test_obj.test()
    print("\nUpdated Test Data:")
    print(updated_test_data.head())
    print(f"Number of updated test observations: {len(updated_test_data)}")

Training Data:
   Feature_0  Feature_1  Feature_2  Feature_3  Feature_4
0  -0.074288  -1.611273   1.551819  -0.570024  -0.968283
1   0.460295   0.830840  -0.169389  -1.699986  -0.895349
2   1.748863   1.145611  -0.486230   1.499275   0.197001
3   0.214283  -0.362579  -0.619226   0.905574   0.437117
4   1.184838  -0.616461   1.771004   0.241603   0.195820
Number of training observations: 700

Test Data:
     Feature_0  Feature_1  Feature_2  Feature_3  Feature_4
650  -0.128159   0.060916   1.026948  -0.036438  -1.136972
651   0.520090  -0.620568   0.838925  -0.219185   1.081232
652   1.498235   0.744269   1.025227  -0.283826  -0.550065
653   0.064667   1.787847  -0.359358   1.498579   1.320986
654  -0.934380  -0.904862  -0.629665  -0.246409   0.865627
Number of test observations: 350

Updated Training Data:
   Feature_0  Feature_1  Feature_2  Feature_3  Feature_4
0  -0.074288  -1.611273   1.551819  -0.570024  -0.968283
1   0.460295   0.830840  -0.169389  -1.699986  -0.895349
2   1.748863

In [19]:
####################################################################################################
# Generate linear synthetic data
####################################################################################################
def synthetic(
    n_x: int = 5,
    n_y: int = 10,
    n_tot: int = 1200,
    n_obs: int = 104,
    split: list[float] = [0.6, 0.4],
    set_seed: int = 100,
) -> Tuple[TrainTest, TrainTest]:
    """
    Generates synthetic (normally-distributed) asset and factor data.

    :param n_x: Number of features.
    :param n_y: Number of assets.
    :param n_tot: Number of observations in the whole dataset.
    :param n_obs: Number of observations per batch.
    :param split: List of floats representing train-validation-test split percentages (must sum up to one).
    :param set_seed: Integer seed for replicability of the numpy RNG.

    :return: Tuple of TrainTest objects for features and asset data split into train, validation, and test subsets.
    """
    np.random.seed(set_seed)  # Set the random seed for reproducibility

    # 'True' prediction bias and weights
    a: np.ndarray = (
        np.sort(np.random.rand(n_y) / 250) + 0.0001
    )  # Generate small bias terms for each asset
    b: np.ndarray = (
        np.random.randn(n_x, n_y) / 5
    )  # Generate random weights for linear relationships between features and assets
    c: np.ndarray = np.random.randn(
        int((n_x + 1) / 2), n_y
    )  # Generate additional random weights for auxiliary features

    # Noise standard deviation
    s: np.ndarray = (
        np.sort(np.random.rand(n_y)) / 20 + 0.02
    )  # Generate small standard deviations for noise for each asset

    # Synthetic features
    X: np.ndarray = (
        np.random.randn(n_tot, n_x) / 50
    )  # Generate synthetic features from a normal distribution
    X2: np.ndarray = (
        np.random.randn(n_tot, int((n_x + 1) / 2)) / 50
    )  # Generate auxiliary features from a normal distribution

    # Synthetic outputs
    Y: np.ndarray = (
        a + X @ b + X2 @ c + s * np.random.randn(n_tot, n_y)
    )  # Generate synthetic outputs based on linear combinations of features and noise

    X: pd.DataFrame = pd.DataFrame(X)  # Convert features to a pandas DataFrame
    Y: pd.DataFrame = pd.DataFrame(Y)  # Convert outputs to a pandas DataFrame

    # Partition dataset into training and testing sets
    return TrainTest(X, n_obs, split), TrainTest(
        Y, n_obs, split
    )  # Return TrainTest objects for features and outputs


####################################################################################################
# Test code for synthetic data generation
####################################################################################################
if __name__ == "__main__":
    # Parameters for synthetic data generation
    n_x = 5  # Number of features
    n_y = 10  # Number of assets
    n_tot = 1200  # Total number of observations
    n_obs = 104  # Number of observations per batch
    split = [0.6, 0.4]  # Split ratios for training and testing
    set_seed = 100  # Random seed for reproducibility

    # Generate synthetic data
    train_test_features, train_test_outputs = synthetic(
        n_x, n_y, n_tot, n_obs, split, set_seed
    )

    # Test the generated feature data
    train_features = train_test_features.train()
    test_features = train_test_features.test()
    print("Training Features:")
    print(train_features.head())
    print(f"Number of training feature observations: {len(train_features)}")
    print("\nTest Features:")
    print(test_features.head())
    print(f"Number of test feature observations: {len(test_features)}")

    # Test the generated output data
    train_outputs = train_test_outputs.train()
    test_outputs = train_test_outputs.test()
    print("\nTraining Outputs:")
    print(train_outputs.head())
    print(f"Number of training output observations: {len(train_outputs)}")
    print("\nTest Outputs:")
    print(test_outputs.head())
    print(f"Number of test output observations: {len(test_outputs)}")

    # Verify the shape of the generated data
    assert (
        train_features.shape[1] == n_x
    ), "Number of features in training set does not match expected value."
    assert (
        train_outputs.shape[1] == n_y
    ), "Number of assets in training set does not match expected value."
    assert (
        test_features.shape[1] == n_x
    ), "Number of features in test set does not match expected value."
    assert (
        test_outputs.shape[1] == n_y
    ), "Number of assets in test set does not match expected value."

Training Features:
          0         1         2         3         4
0  0.037531 -0.007538  0.036639  0.000060 -0.001520
1  0.000079 -0.003700 -0.049743 -0.034093 -0.022725
2 -0.059466  0.000666 -0.004978 -0.009004  0.002649
3  0.000444  0.006347 -0.015048 -0.025928  0.001903
4 -0.008474 -0.023720 -0.007309 -0.025420  0.031723
Number of training feature observations: 720

Test Features:
            0         1         2         3         4
616  0.014443  0.017019 -0.003300 -0.056563 -0.017051
617  0.018964 -0.016458 -0.003853 -0.011657 -0.022333
618 -0.022941  0.020247 -0.012068  0.003381  0.019936
619  0.006151  0.024943 -0.006490 -0.010648 -0.017412
620 -0.031169 -0.034504  0.012386  0.022285 -0.043454
Number of test feature observations: 584

Training Outputs:
          0         1         2         3         4         5         6  \
0  0.020391 -0.024384 -0.050628 -0.016314 -0.044117  0.017402  0.090213   
1 -0.001323  0.039438  0.068975 -0.035036  0.059205 -0.009392 -0.091626   