In [1]:
###CNN and MLP primitive

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F



##The encoder in the paper takes x in R^(TxHxW) and y in R^(TxE) and maps them to R^(TxL) the dimensionality of the context encoder
##and encoder is set to 5. So this cnn needs to output (32x5) so each (surface in R^(5x5) -> (z in R^5)

##we enhance the dimensionality of the iv surface first by upgrading the number of channels. The reasoning behind this is similar to why 
##we do this in transformer architecture. A larger dimensional space will be able to capture more nuanced information and represent it in number form
##then we compress this to something digestable
class CNN(nn.Module):
    #input_size and output_size represent the number of channels in the input and output data
    #channels is the number of dimensions a single data point will have ie RGB = 3 channelss
    def __init__(self, input_size, output_size):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(input_size, output_size, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(output_size, output_size, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(output_size, output_size, kernel_size=3, stride=1, padding=1)
        self.fc = nn.Linear(output_size * 5 * 5, 5)

    def forward(self, x):
        batch_size, H, W = x.shape
        x = x.reshape(batch_size, 1, H, W)
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        #print(f"x shape before resize and convultion passthrough is {x.shape}")
        #x = x.reshape(batch_size, 1, 5)
        x = x.view(batch_size, -1)
        x = self.fc(x)
        #print(f"after passthrough into convultion layers and fully connected layer ther shape of x is {x.shape}")
        return x

class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, y):
        x = F.relu(self.fc1(y))
        x = self.fc2(y)
        return y

In [2]:
###CNN For decoder 

#due to difference in dimensionality - What happened st i needed to make this. z, zeta make different shape so needed redfinition
##plus it will define a new coniditional probability distribtution

##had to make another cnn for decoder due difference in dimensionality

##in here the output size should be the number of days in the future?

##for now output_size = 1 so we predict 1 day into the future?
class TCNN(nn.Module):
    def __init__(self, input_size, output_size, num_surfaces):
        super(TCNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 5 * 5 * num_surfaces)
        self.output_size = output_size
        self.num_surfaces = num_surfaces
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        x = x.view(-1, self.num_surfaces, 5, 5)
        return x

In [3]:
###ENCODER DECODER CONTEXTENCODER

class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size, latent_size):
        super(Encoder, self).__init__()
        self.cnn = CNN(input_size, 5)
        self.mlp = nn.Identity()
        self.lstm = nn.LSTM(5 + 3, hidden_size, num_layers=2, batch_first=True, dropout=0.2)
        self.linear_mu = nn.Linear(hidden_size, latent_size)
        self.linear_sigma = nn.Linear(hidden_size, latent_size)

    def forward(self, x, y):
        x_encoded = self.cnn(x)
        y_encoded = self.mlp(y)
        y_encoded = torch.squeeze(y_encoded, dim=1)
        #print(f"x_encoded shape is {x_encoded.shape}  y_encoded shape is {y_encoded.shape}")
        encoded = torch.cat((x_encoded, y_encoded), dim=-1)
        #print(f"concatenated vector is of size {encoded.shape}")
        _, (hidden, _) = self.lstm(encoded)
        #print("hidden state created")
        hidden = hidden[-1]  # Take the last hidden state
        mu = self.linear_mu(hidden)
        log_var = self.linear_sigma(hidden)
        z = self.reparameterize(mu, log_var)
        #print("encoding successful")
        return z, mu, log_var

    def reparameterize(self, mu, log_var):
        std = torch.exp(0.5 * log_var)
        eps = torch.randn_like(std)
        return mu + eps * std

class ContextEncoder(nn.Module):
    def __init__(self, input_size, hidden_size, context_size):
        super(ContextEncoder, self).__init__()
        self.cnn = CNN(input_size, 5)
        self.mlp = nn.Identity()
        self.lstm = nn.LSTM(5 + 3, hidden_size, num_layers=2, batch_first=True, dropout=0.2)
        self.linear = nn.Linear(hidden_size, context_size)

    def forward(self, x_c, y_c):
        x_encoded = self.cnn(x_c)
        y_encoded = self.mlp(y_c)
        y_encoded = torch.squeeze(y_encoded, dim=1)
        #print(f"x_encoded size = {x_encoded.shape} ||y_encoded size = {y_encoded.shape}")
        encoded = torch.cat((x_encoded, y_encoded), dim=-1)
        _, (hidden, _) = self.lstm(encoded)
        hidden = hidden[-1]  # Take the last hidden state
        zeta = self.linear(hidden)
        #print("context encoding successful")
        return zeta

class Decoder(nn.Module):
    def __init__(self, latent_size, hidden_size, output_size, num_surfaces):
        super(Decoder, self).__init__()
        self.lstm = nn.LSTM(latent_size + 5, hidden_size, num_layers=2, batch_first=True, dropout=0.2) #latent
        self.tcnn = TCNN(hidden_size, output_size, num_surfaces)
        self.mlp = nn.Linear(hidden_size, 1)

    def forward(self, z, zeta):
        # Reshape z and zeta to have shape (1, 5)
        z = z.view(1, -1)
        zeta = zeta.view(1, -1)
        # Concatenate z and zeta along the second dimension to get shape (1, 10)
        z_concat = torch.cat((z, zeta), dim=1)
        
        #print(f"z_concat is of size {z_concat.shape}")
        hidden, _ = self.lstm(z_concat)
        #print('i am here')
        #print(f"hidden state published, hidden state shape {hidden.shape}")
        x_n = self.tcnn(hidden) 
        #print('i am here')
        r_n = self.mlp(hidden)
        #print("decoding successful")
        return torch.squeeze(x_n, dim=0), torch.squeeze(r_n, dim=0)

In [4]:
# Moneyness levels and time to maturity (in days ##based of paper single skew and slope for surface
moneyness_levels = [0.7, 0.85, 1, 1.15, 1.3]
ttm_levels = [1, 3, 6, 12, 24] #days

def calculate_skew_slope(iv_row):
    # Select IV values for the specific TTM and moneyness levels
    iv_12d_085 = iv_row.get((12, 0.85), 0)  # IV for 1 year TTM and moneyness=0.85
    iv_12d_100 = iv_row.get((12, 1.00), 0)  # IV for 1 year TTM and moneyness=1.00
    iv_12d_115 = iv_row.get((12, 1.15), 0)  # IV for 1 year TTM and moneyness=1.15

    # Calculate skew
    skew = (iv_12d_085 + iv_12d_115) / 2 - iv_12d_100 if iv_12d_100 else 0  # Avoid division by zero

    # Select IV values for slope calculation
    iv_3d_100 = iv_row.get((3, 1.00), 0)  # IV for 3 months TTM and moneyness=1.00
    iv_24d_100 = iv_row.get((24, 1.00), 0)  # IV for 2 years TTM and moneyness=1.00

    # Calculate slope
    slope = iv_24d_100 - iv_3d_100

    return skew, slope

In [5]:
###CVAE

class CVAE(nn.Module):
    def __init__(self, input_size, hidden_size, latent_size, context_size, output_size, num_surfaces):
        super(CVAE, self).__init__()
        self.encoder = Encoder(input_size, hidden_size, latent_size)
        self.context_encoder = ContextEncoder(input_size, hidden_size, context_size)
        self.decoder = Decoder(latent_size, hidden_size, output_size, num_surfaces)
        self.latent_size = latent_size
        self.context_size = context_size

    def forward(self, x, y, x_c, y_c):
        #print('1')
        z, mu, log_var = self.encoder(x, y) ##we have sampled z from distribution here
        #print(f"The shape of latent representation z is {z.shape}")
        #print('2')
        zeta = self.context_encoder(x_c, y_c) ##we have sampled zeta from distribution here
        #print(f"the shape of zeta (encoded context) is {zeta.shape}")
        x_n, r_n = self.decoder(z, zeta)
        return x_n, r_n, mu, log_var

    
    def generate(self, x_c, y_c, ttm):
        x_hat = []
        r_hat = []
    
        for i in range(0, ttm):
            # reasing z and context
            z = torch.tensor(np.array([np.random.normal(loc=0, scale=1, size=5)]), dtype=torch.float32)
            zeta = self.context_encoder(x_c, y_c)
            x_n, r_n = self.decoder(z, zeta)
            x_hat.append(x_n)
            r_hat.append(r_n)
    
            # Update x_c by removing the first value and appending x_n
            x_c = torch.cat((x_c[1:], x_n[0].unsqueeze(0)))
    
            # Convert x_n (generated IV surfaces) to a dictionary format
            iv_row = {(ttm, moneyness): x_n[0][i, j] for i, moneyness in enumerate(moneyness_levels) for j, ttm in enumerate(ttm_levels)}
    
            # Calculate skew and slope using the calculate_skew_slope function
            skew, slope = calculate_skew_slope(iv_row)
           # print(skew)
            #print(slope)
    
            # Create y_n by combining r_n (predicted log returns), skew, and slope
            y_n = torch.cat((r_n, torch.tensor([skew, slope])))

            y_n = y_n.view(1, -1)
            #print(y_c)
            #print(y_n)
            # Update y_c by removing the first value and appending y_n
            y_c = torch.cat((y_c[1:], y_n.unsqueeze(0)))
    
        return x_hat, r_hat

In [6]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Load the DataFrame from the provided CSV file
file_path = 'combined_iv_data.csv'
df = pd.read_csv(file_path, header=[0, 1], index_col=0)

# Extract extra features
Y = df[['Log Return', 'Skew', 'Slope']]

# Extract IV surface data
X = df.drop(['Log Return', 'Skew', 'Slope'], axis=1)

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
Y_scaled = scaler.fit_transform(Y)

# Split data into training and temporary set
X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, Y_scaled, test_size=2000, random_state=42)

# Split the temporary set into validation and test sets
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

  X = df.drop(['Log Return', 'Skew', 'Slope'], axis=1)


In [7]:
import random

def generate_random_day(n):
    """
    Generates a random number between 1 and n
    """
    # Generate a random number between 1 and n
    random_number = random.randint(1, n - 1)
    return random_number

In [8]:
###PREPARE DATA

import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from torch.utils import mkldnn as mkldnn_utils

# Hyperparameters
input_size = 25
hidden_size = 100
latent_size = 5
context_size = 5
input_size = 1
output_size = 5
learning_rate = 1e-3
num_epochs = 500
batch_size = 32
alpha = 1.0
beta = 1e-5


# Reshape the input data to the desired 5-dimensional shape
batch_size_train = X_train.shape[0]
batch_size_val = X_val.shape[0]
batch_size_test = X_test.shape[0]
sequence_length = 1  # 
num_surfaces = 1  # the number of surfaces to generate


H = 5  # Height of the IV surface grid
W = 5  # Width of the IV surface grid

#print(X_train.shape)
X_train_reshaped = X_train.reshape(batch_size_train,  H, W)
X_val_reshaped = X_val.reshape(batch_size_val, H, W)
X_test_reshaped = X_test.reshape(batch_size_test, H, W)
#print(X_train_reshaped.shape)
extra_features_size = 3

y_train_reshaped = y_train.reshape(y_train.shape[0], 1, extra_features_size)
y_val_reshaped = y_val.reshape(y_val.shape[0], 1, extra_features_size)
y_test_reshaped = y_test.reshape(y_test.shape[0], 1, extra_features_size)

train_data = TensorDataset(torch.Tensor(X_train_reshaped), torch.Tensor(y_train_reshaped))
val_data = TensorDataset(torch.Tensor(X_val_reshaped), torch.Tensor(y_val_reshaped))
test_data = TensorDataset(torch.Tensor(X_test_reshaped), torch.Tensor(y_test_reshaped))

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

# Create the model
model = CVAE(input_size, hidden_size, latent_size, context_size, input_size, num_surfaces)

In [None]:
###TRAIN MODEL

def loss_function(x_n, x_n_recon, r_n, r_n_recon, mu, log_var):
    # Ensure x_n and r_n are unsqueezed if needed (depends on data shape handling in other parts of your code)
    x_n = torch.unsqueeze(x_n, dim=0) if len(x_n.shape) < 3 else x_n
    r_n = torch.unsqueeze(r_n, dim=0) if len(r_n.shape) < 3 else r_n

    # Calculate the mean squared error, normalized by the number of elements (H*W)
    recon_loss_x = F.mse_loss(x_n_recon, x_n, reduction='mean') / (H * W)
    recon_loss_r = F.mse_loss(r_n_recon, r_n, reduction='mean')

    # Calculate the Kullback-Leibler divergence
    kl_loss = -0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp())

    # Combine the losses with the scaling factors
    total_loss = recon_loss_x + alpha * recon_loss_r + beta * kl_loss

    return total_loss

# Define the optimizer
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
best_val_loss = float('inf')
for epoch in range(num_epochs):
    # Training
    model.train()
    train_loss = 0.0
    for batch in train_loader:
        x, y = batch
        batch_size = x.shape[0] #batch_size, context_length, num_surfaces, H, W = x.shape
        
        # Generate a random day t to split the sequence into context and future
        t = batch_size - num_surfaces - 1
        #print(t)

        x_c, x_n = x[:t, :], x[t:, :]
        y_c, y_n = y[:t, :], y[t:, :]
            
        r_n = y_n[:, :, 0]
        
        x_n_recon, r_n_recon, mu, log_var = model(x, y, x_c, y_c)
        
        # Compute the loss
        loss = loss_function(x_n[0], x_n_recon, r_n[0], r_n_recon, mu, log_var)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
    
    # Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for batch in val_loader:
            x, y = batch
            batch_size = x.shape[0]  # Number of days in the sequence
            
            # Generate a random day t to split the sequence into context and future
            #t = generate_random_day(batch_size)
            t = batch_size - num_surfaces - 1
            
            x_c, x_n = x[:t, :], x[t:, :]
            y_c, y_n = y[:t, :], y[t:, :]
            
            # Extract the log return feature from y_n
            r_n = y_n[:, :, 0]
            
            x_n_recon, r_n_recon, mu, log_var = model(x, y, x_c, y_c)
            print(f"x_n_recon: {x_n_recon}")
            
            # Compute the loss
            loss = loss_function(x_n[0], x_n_recon, r_n[0], r_n_recon, mu, log_var)
            
            val_loss += loss.item()
    
    # Print the losses for each epoch
    print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss/len(train_loader):.4f}, Val Loss: {val_loss/len(val_loader):.4f}")
    
    # Save the best model based on validation loss
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), 'best_model.pth')

# Testing
model.load_state_dict(torch.load('best_model.pth'))
model.eval()
#test_loss = 0.0
#with torch.no_grad():
 #   for batch in test_loader:
  #      x, y = batch
   #     batch_size = x.shape[0]  # Number of days in the sequence
    #    
        # Generate a random day t to split the sequence into context and future
     #   t = batch_size - num_surfaces - 1
      #  
       # x_c, x_n = x[:, :t], x[:, t:]
        #y_c, y_n = y[:, :t], y[:, t:]
        
        # Extract the log return feature from y_n
        #r_n = y_n[:, :, 0]
        
        #x_n_recon, r_n_recon, mu, log_var = model(x, y, x_c, y_c)
        
        # Compute the loss
        #loss = loss_function(x_n[0], x_n_recon, r_n[0], r_n_recon, mu, log_var)
        
        #test_loss += loss.item()

#ßprint(f"Test Loss: {test_loss/len(test_loader):.4f}") 

  recon_loss_r = F.mse_loss(r_n_recon, r_n, reduction='mean')


x_n_recon: tensor([[[ 0.0401,  0.0353, -0.0685, -0.0107,  0.0291],
         [-0.0173,  0.0663,  0.0305,  0.0289, -0.0093],
         [ 0.0492, -0.0367, -0.0147,  0.0218, -0.0488],
         [ 0.0534, -0.0179, -0.0104,  0.0706,  0.0266],
         [ 0.0065, -0.0084,  0.0286, -0.0251, -0.0298]]])
x_n_recon: tensor([[[ 0.0413,  0.0363, -0.0690, -0.0094,  0.0300],
         [-0.0169,  0.0671,  0.0302,  0.0296, -0.0091],
         [ 0.0501, -0.0366, -0.0132,  0.0221, -0.0490],
         [ 0.0544, -0.0171, -0.0091,  0.0709,  0.0274],
         [ 0.0074, -0.0074,  0.0292, -0.0247, -0.0307]]])
x_n_recon: tensor([[[ 0.0418,  0.0365, -0.0690, -0.0094,  0.0307],
         [-0.0158,  0.0674,  0.0302,  0.0303, -0.0083],
         [ 0.0507, -0.0360, -0.0127,  0.0219, -0.0490],
         [ 0.0558, -0.0172, -0.0080,  0.0721,  0.0280],
         [ 0.0079, -0.0061,  0.0297, -0.0249, -0.0316]]])
x_n_recon: tensor([[[ 0.0388,  0.0338, -0.0678, -0.0125,  0.0281],
         [-0.0180,  0.0649,  0.0315,  0.0267, -0.0098]

In [None]:
###HYPERPARAMETER TUNING


import torch
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import ParameterGrid, KFold

# Hyperparameters
input_size = 25
context_size = 5
input_size = 1
output_size = 5

# Reshape the input data to the desired 5-dimensional shape
batch_size_train = X_train.shape[0]
batch_size_test = X_test.shape[0]

sequence_length = 1
num_surfaces = 1  # the number of surfaces to generate
H = 5  # Height of the IV surface grid
W = 5  # Width of the IV surface grid

X_train_reshaped = X_train.reshape(batch_size_train, H, W)
X_test_reshaped = X_test.reshape(batch_size_test, H, W)

extra_features_size = 3
y_train_reshaped = y_train.reshape(y_train.shape[0], 1, extra_features_size)
y_test_reshaped = y_test.reshape(y_test.shape[0], 1, extra_features_size)

train_data = TensorDataset(torch.Tensor(X_train_reshaped), torch.Tensor(y_train_reshaped))
test_data = TensorDataset(torch.Tensor(X_test_reshaped), torch.Tensor(y_test_reshaped))

# Create the model
model = CVAE(input_size, hidden_size, latent_size, context_size, input_size, num_surfaces)

def loss_function(x_n, x_n_recon, r_n, r_n_recon, mu, log_var):
    # Ensure x_n and r_n are unsqueezed if needed (depends on data shape handling in other parts of your code)
    x_n = torch.unsqueeze(x_n, dim=0) if len(x_n.shape) < 3 else x_n
    r_n = torch.unsqueeze(r_n, dim=0) if len(r_n.shape) < 3 else r_n

    # Calculate the mean squared error, normalized by the number of elements (H*W)
    recon_loss_x = F.mse_loss(x_n_recon, x_n, reduction='mean') / (H * W)
    recon_loss_r = F.mse_loss(r_n_recon, r_n, reduction='mean')

    # Calculate the Kullback-Leibler divergence
    kl_loss = -0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp())

    # Combine the losses with the scaling factors
    total_loss = recon_loss_x + alpha * recon_loss_r + beta * kl_loss

    return total_loss

# Define the hyperparameter search space
param_grid = {
    'hidden_size': [50, 100, 200, 300, 400],
    'latent_size': [2, 5, 10, 20, 30],
    'learning_rate': [1e-2, 1e-3, 1e-4, 1e-5],
    'alpha': [0.1, 0.5, 1.0, 2.0, 5.0],
    'beta': [1e-6, 1e-5, 1e-4, 1e-3, 1e-2],
    'batch_size': [16, 32, 64, 128],
    'num_epochs': [100, 200, 300, 400, 500]
}

# Generate all possible combinations of hyperparameters
hyperparameter_combinations = ParameterGrid(param_grid)

# Initialize variables to store the best hyperparameters and validation loss
best_hyperparameters = None
best_val_loss = float('inf')

# Define the number of folds for cross-validation
num_folds = 5

# Create a KFold object for cross-validation
kfold = KFold(n_splits=num_folds, shuffle=True, random_state=42)

# Iterate over each combination of hyperparameters
for params in hyperparameter_combinations:
    hidden_size = params['hidden_size']
    latent_size = params['latent_size']
    learning_rate = params['learning_rate']
    alpha = params['alpha']
    beta = params['beta']
    batch_size = params['batch_size']
    num_epochs = params['num_epochs']

    # Initialize variables to store the total validation loss across all folds
    total_val_loss = 0.0

    # Perform k-fold cross-validation
    for fold, (train_indices, val_indices) in enumerate(kfold.split(X_train_reshaped)):
        print(f"Fold {fold + 1}/{num_folds}")

        # Create data subsets for the current fold
        train_subset = TensorDataset(torch.Tensor(X_train_reshaped[train_indices]), torch.Tensor(y_train_reshaped[train_indices]))
        val_subset = TensorDataset(torch.Tensor(X_train_reshaped[val_indices]), torch.Tensor(y_train_reshaped[val_indices]))

        # Create data loaders for the current fold
        train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False)

        # Create the model with the current hyperparameters
        model = CVAE(input_size, hidden_size, latent_size, context_size, input_size, num_surfaces)
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        # Training loop
        for epoch in range(num_epochs):
            # Training
            model.train()
            train_loss = 0.0
            for batch in train_loader:
                x, y = batch
                batch_size = x.shape[0]  # batch_size, context_length, num_surfaces, H, W = x.shape

                # Generate a random day t to split the sequence into context and future
                t = batch_size - num_surfaces - 1

                x_c, x_n = x[:t, :], x[t:, :]
                y_c, y_n = y[:t, :], y[t:, :]
                r_n = y_n[:, :, 0]

                x_n_recon, r_n_recon, mu, log_var = model(x, y, x_c, y_c)

                # Compute the loss
                loss = loss_function(x_n[0], x_n_recon, r_n[0], r_n_recon, mu, log_var)

                # Backward pass and optimization
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                train_loss += loss.item()

            # Validation
            model.eval()
            val_loss = 0.0
            with torch.no_grad():
                for batch in val_loader:
                    x, y = batch
                    batch_size = x.shape[0]  # Number of days in the sequence

                    # Generate a random day t to split the sequence into context and future
                    t = batch_size - num_surfaces - 1

                    x_c, x_n = x[:t, :], x[t:, :]
                    y_c, y_n = y[:t, :], y[t:, :]

                    # Extract the log return feature from y_n
                    r_n = y_n[:, :, 0]

                    x_n_recon, r_n_recon, mu, log_var = model(x, y, x_c, y_c)

                    # Compute the loss
                    loss = loss_function(x_n[0], x_n_recon, r_n[0], r_n_recon, mu, log_var)
                    val_loss += loss.item()

            # Print the losses for each epoch
            print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss/len(train_loader):.4f}, Val Loss: {val_loss/len(val_loader):.4f}")

        # Accumulate the validation loss for the current fold
        total_val_loss += val_loss

    # Calculate the average validation loss across all folds
    avg_val_loss = total_val_loss / num_folds

    # Check if the current hyperparameters yield a better validation loss
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        best_hyperparameters = params

# Print the best hyperparameters and validation loss
print("Best Hyperparameters:")
print(best_hyperparameters)
print(f"Best Validation Loss: {best_val_loss:.4f}")

# Train the final model with the best hyperparameters on the entire training set
best_model = CVAE(input_size, best_hyperparameters['hidden_size'], best_hyperparameters['latent_size'],
                  context_size, input_size, num_surfaces)
optimizer = optim.Adam(best_model.parameters(), lr=best_hyperparameters['learning_rate'])

train_loader = DataLoader(train_data, batch_size=best_hyperparameters['batch_size'], shuffle=True)

for epoch in range(best_hyperparameters['num_epochs']):
    # Training
    best_model.train()
    train_loss = 0.0
    for batch in train_loader:
        x, y = batch
        batch_size = x.shape[0]  # batch_size, context_length, num_surfaces, H, W = x.shape

        # Generate a random day t to split the sequence into context and future
        t = batch_size - num_surfaces - 1

        x_c, x_n = x[:t, :], x[t:, :]
        y_c, y_n = y[:t, :], y[t:, :]
        r_n = y_n[:, :, 0]

        x_n_recon, r_n_recon, mu, log_var = best_model(x, y, x_c, y_c)

        # Compute the loss
        loss = loss_function(x_n[0], x_n_recon, r_n[0], r_n_recon, mu, log_var)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    # Print the training loss for each epoch
    print(f"Epoch [{epoch+1}/{best_hyperparameters['num_epochs']}], Train Loss: {train_loss/len(train_loader):.4f}")

# Evaluate the final model on the test set
best_model.eval()
test_loss = 0.0
with torch.no_grad():
    for batch in test_loader:
        x, y = batch
        batch_size = x.shape[0]  # Number of days in the sequence

        # Generate a random day t to split the sequence into context and future
        t = batch_size - num_surfaces - 1

        x_c, x_n = x[:t, :], x[t:, :]
        y_c, y_n = y[:t, :], y[t:, :]

        # Extract the log return feature from y_n
        r_n = y_n[:, :, 0]

        x_n_recon, r_n_recon, mu, log_var = best_model(x, y, x_c, y_c)

        # Compute the loss
        loss = loss_function(x_n[0], x_n_recon, r_n[0], r_n_recon, mu, log_var)
        test_loss += loss.item()

print(f"Test Loss: {test_loss/len(test_loader):.4f}")

In [9]:
####IV SURFACE 1 FORECAST ASSESSMENT

import time
from scipy.optimize import brentq
from scipy.stats import norm

moneyness_levels = [0.7, 0.85, 1, 1.15, 1.3]
ttms = [1, 3, 6, 12, 24]

def black_scholes_call(S, K, T, r, sigma):
    d1 = (np.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))
    d2 = d1 - sigma * np.sqrt(T)
    return S * norm.cdf(d1) - K * np.exp(-r * T) * norm.cdf(d2)
    
def find_iv(market_price, S, K, T, r):
    def objective(sigma): return market_price - black_scholes_call(S, K, T, r, sigma)
    try:
        return brentq(objective, 1e-6, 3.0, xtol=1e-6)
    except ValueError:
        return np.nan  # or any other default value

def find_closest_moneyness_range_and_ttm(underlying_price, strike_price, T):
    moneyness = underlying_price / strike_price
    closest_range = None
    min_diff = float('inf')
    moneyness_index = 99
    ttm_index = 99
    
    counter = 0
    for level in moneyness_levels:
        diff = abs(moneyness - level)
        if diff < min_diff:
            min_diff = diff
            closest_range = level
            moneyness_index = counter
    counter = counter + 1

    counter=0
    for ttm in ttms:
        if T == ttm:
            ttm_index = counter
        counter = counter + 1

    return closest_range, min_diff, moneyness_index, ttm_index

In [48]:
# Load the saved model state dictionary
model.load_state_dict(torch.load('best_model.pth'))
print("Model loaded successfully.")
# Set the model to evaluation mode
print(model.eval())
    
#underlying params for black scholes
r = 0.05 #risk_free_rate
volatility = 0.04 #long term volatility
time_to_maturity = 1.0

file_path = 'option_prices_timeseries.csv'
options = pd.read_csv(file_path, index_col=0)

file_path = 'combined_iv_data_new.csv'
iv_surfaces = pd.read_csv(file_path, header=[0, 1], index_col=0)

y = iv_surfaces[['Log Return', 'Skew', 'Slope']]

x = iv_surfaces.drop(['Log Return', 'Skew', 'Slope'], axis=1)


# Normalize features
scaler = StandardScaler()
x = scaler.fit_transform(x)
y = scaler.fit_transform(y)

x = np.array(x)
y = np.array(y)

x_reshaped = x.reshape(x.shape[0],  H, W)

y_reshaped = y.reshape(y.shape[0], 1, extra_features_size)

iv_surfaces_tensor = TensorDataset(torch.Tensor(x_reshaped ), torch.Tensor(y_reshaped))

x = [x[0] for x in iv_surfaces_tensor]
y = [x[1] for x in iv_surfaces_tensor]

bought_options = []

#expiration_date =  50
#asset_price_at_expiration = options.loc[expiration_date]["Asset Price"].iloc[0]
#print(asset_price_at_expiration)


for i in range(32, 132): ##start from 32 so i can have enough context
    #print('hi')
    options_t = options.loc[i]
    x_c = x[i-31:i]
    y_c = y[i-31:i]

    x_c = torch.stack(x_c)
    y_c = torch.stack(y_c)
    x_n, r_n = model.generate(x_c, y_c, 1) ##predict 1 unit in the future


    for _, selected_row in options_t.iterrows():
        #print(selected_row)
        S = selected_row["Asset Price"] 
        K = selected_row["Strike"]
        T = selected_row["Time to Maturity (Months)"] ##TTM in months
        market_price = selected_row["Call Price"]
        iv = find_iv(market_price, S, K, T / 12, r)
        if iv == np.nan:
            continue

        #print(iv)
        level, diff, moneyness_index, ttm_index = find_closest_moneyness_range_and_ttm(S, K, int(T))
        if moneyness_index == 99:
            continue

       # print(moneyness_index)
        #print("I am here")
        if diff < 0.05: ##close to actual moneyness level compare ivs with ttm and moneyness on forecast - will be underpriced
           # print(T)
            forecast_iv = x_n[0][0][moneyness_index][ttm_index]
            print(forecast_iv)
            #print(forecast_iv)
            #print(f"black scholes iv: {abs(iv)}")
            #print(f"The forecast iv: {forecast_iv}")
            if abs(forecast_iv) < abs(iv):
                bought_options.append((i, S, K, T, market_price)) 
                #print('buy\n\n')

total_profit = 0

for option in bought_options:
    buy_date, S, K, T, market_price = option
    expiration_date = buy_date + int(T) * 30 #months
    if expiration_date < 5886:
        asset_price_at_expiration = options.loc[expiration_date]["Asset Price"].iloc[0]
            # If asset_price_at_expiration is a scalar value, calculate profit as before
        if asset_price_at_expiration > K:
            profit = asset_price_at_expiration - K - market_price
        else:
            profit = -market_price 
            total_profit += profit
            
print(f"{len(bought_options)} options bought")
print(f"Total profit: {total_profit}")
print(f"Profit per trade: {total_profit / len(bought_options)}")

Model loaded successfully.
CVAE(
  (encoder): Encoder(
    (cnn): CNN(
      (conv1): Conv2d(1, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv2): Conv2d(5, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv3): Conv2d(5, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (fc): Linear(in_features=125, out_features=5, bias=True)
    )
    (mlp): Identity()
    (lstm): LSTM(8, 100, num_layers=2, batch_first=True, dropout=0.2)
    (linear_mu): Linear(in_features=100, out_features=5, bias=True)
    (linear_sigma): Linear(in_features=100, out_features=5, bias=True)
  )
  (context_encoder): ContextEncoder(
    (cnn): CNN(
      (conv1): Conv2d(1, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv2): Conv2d(5, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv3): Conv2d(5, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (fc): Linear(in_features=125, out_features=5, bias=True)
    )
    (mlp): Identity()
    (ls

  x = iv_surfaces.drop(['Log Return', 'Skew', 'Slope'], axis=1)


tensor(-0.0592, grad_fn=<SelectBackward0>)
tensor(0.0015, grad_fn=<SelectBackward0>)
tensor(-0.0361, grad_fn=<SelectBackward0>)
tensor(-0.0633, grad_fn=<SelectBackward0>)
tensor(-0.0014, grad_fn=<SelectBackward0>)
tensor(-0.0592, grad_fn=<SelectBackward0>)
tensor(0.0015, grad_fn=<SelectBackward0>)
tensor(-0.0361, grad_fn=<SelectBackward0>)
tensor(-0.0633, grad_fn=<SelectBackward0>)
tensor(-0.0014, grad_fn=<SelectBackward0>)
tensor(-0.0592, grad_fn=<SelectBackward0>)
tensor(0.0015, grad_fn=<SelectBackward0>)
tensor(-0.0361, grad_fn=<SelectBackward0>)
tensor(-0.0633, grad_fn=<SelectBackward0>)
tensor(-0.0014, grad_fn=<SelectBackward0>)
tensor(-0.0597, grad_fn=<SelectBackward0>)
tensor(0.0029, grad_fn=<SelectBackward0>)
tensor(-0.0369, grad_fn=<SelectBackward0>)
tensor(-0.0638, grad_fn=<SelectBackward0>)
tensor(-0.0018, grad_fn=<SelectBackward0>)
tensor(-0.0597, grad_fn=<SelectBackward0>)
tensor(0.0029, grad_fn=<SelectBackward0>)
tensor(-0.0369, grad_fn=<SelectBackward0>)
tensor(-0.0638, 

In [43]:
file_path = 'option_prices_timeseries.csv'
options = pd.read_csv(file_path, index_col=0)

#underlying params for black scholes
r = 0.05 #risk_free_rate
volatility = 0.04 #long term volatility
time_to_maturity = 1.0

##Black scholes  ASSESSMENT
longterm_volatility = 0.04 #long term volatility

bought_options = []
for i in range(32, 132): ##start from 32 so i can have enough context
    #print('hi')
    options_t = options.loc[i]
    for _, selected_row in options_t.iterrows():
        #print(selected_row)
        S = selected_row["Asset Price"] 
        K = selected_row["Strike"]
        T = selected_row["Time to Maturity (Months)"] ##TTM in months
        market_price = selected_row["Call Price"]
        iv = find_iv(market_price, S, K, T / 12, r)
        if iv == np.nan:
            continue

        if abs(longterm_volatility) < abs(iv):
            bought_options.append((i, S, K, T, market_price)) 
            #print('buy\n\n')

total_profit = 0

print('options bought')
for option in bought_options:
    buy_date, S, K, T, market_price = option
    expiration_date = buy_date + int(T) * 30 #months
    if expiration_date < 5886:
        asset_price_at_expiration = options.loc[expiration_date]["Asset Price"].iloc[0]
            # If asset_price_at_expiration is a scalar value, calculate profit as before
        if asset_price_at_expiration > K:
            profit = asset_price_at_expiration - K - market_price
        else:
            profit = -market_price 
            total_profit += profit

print(f"{len(bought_options)} options bought")
print(f"Total profit: {total_profit}")
print(f"Profit per trade: {total_profit / len(bought_options)}")

options bought
2208 options bought
Total profit: -2925.015882750329
Profit per trade: -1.324735454144171


In [44]:
file_path = 'option_prices_timeseries.csv'
options = pd.read_csv(file_path, index_col=0)

##Buy Random  ASSESSMENT
longterm_volatility = 0.04 #long term volatility
#underlying params for black scholes
r = 0.05 #risk_free_rate
volatility = 0.04 #long term volatility
time_to_maturity = 1.0

bought_options = []
for i in range(32, 132): ##start from 32 so i can have enough context
    #print('hi')
    options_t = options.loc[i]
    for _, selected_row in options_t.iterrows():
        #print(selected_row)
        S = selected_row["Asset Price"] 
        K = selected_row["Strike"]
        T = selected_row["Time to Maturity (Months)"] ##TTM in months
        market_price = selected_row["Call Price"]
        iv = find_iv(market_price, S, K, T / 12, r)

        import random

        # Generates True or False with equal probability
        result = random.choice([True, False])
        #print(result)

        if result:
            bought_options.append((i, S, K, T, market_price)) 
            #print('buy\n\n')

total_profit = 0

print('options bought')
for option in bought_options:
    buy_date, S, K, T, market_price = option
    expiration_date = buy_date + int(T) * 30 #months
    if expiration_date < 5886:
        asset_price_at_expiration = options.loc[expiration_date]["Asset Price"].iloc[0]
            # If asset_price_at_expiration is a scalar value, calculate profit as before
        if asset_price_at_expiration > K:
            profit = asset_price_at_expiration - K - market_price
        else:
            profit = -market_price 
            total_profit += profit

print(f"{len(bought_options)} options bought")
print(f"Total profit: {total_profit}")
print(f"Profit per trade: {total_profit / len(bought_options)}")

options bought
1279 options bought
Total profit: -1529.4891569069061
Profit per trade: -1.195847659817753


In [45]:
###In this generate empircal expected value/profit
def predict_profit(x_c, y_c, t, N, S, K, market_price):
    total_profit = 0
    num_iterations = 0

    for i in range(1, N):
        x_n, r_n = model.generate(x_c, y_c, t)  # predict t units in the future

        # Calculate the asset price at expiration using the generated log returns
        asset_price_at_expiration = S
       # print(r_n)
        for j in range(t):
            asset_price_at_expiration *= np.exp(r_n[j].detach().numpy())  # Assuming r_n has shape (t, 1, 1)

        # Calculate the profit for the option
        if asset_price_at_expiration > K:
            profit = asset_price_at_expiration - K - market_price
        else:
            profit = -market_price

        total_profit += profit
        num_iterations += 1

    # Calculate the average profit
    if num_iterations > 0:
        average_profit = total_profit / num_iterations
    else:
        average_profit = 0

    return average_profit
    

    
# Load the saved model state dictionary
model.load_state_dict(torch.load('best_model.pth'))
print("Model loaded successfully.")
# Set the model to evaluation mode
print(model.eval())
    
#underlying params for black scholes
r = 0.05 #risk_free_rate
volatility = 0.04 #long term volatility
time_to_maturity = 1.0

file_path = 'option_prices_timeseries.csv'
options = pd.read_csv(file_path, index_col=0)

file_path = 'combined_iv_data_new.csv'
iv_surfaces = pd.read_csv(file_path, header=[0, 1], index_col=0)

y = iv_surfaces[['Log Return', 'Skew', 'Slope']]

x = iv_surfaces.drop(['Log Return', 'Skew', 'Slope'], axis=1)


# Normalize features
scaler = StandardScaler()
x = scaler.fit_transform(x)
y = scaler.fit_transform(y)

x = np.array(x)
y = np.array(y)

x_reshaped = x.reshape(x.shape[0],  H, W)

y_reshaped = y.reshape(y.shape[0], 1, extra_features_size)

iv_surfaces_tensor = TensorDataset(torch.Tensor(x_reshaped ), torch.Tensor(y_reshaped))

x = [x[0] for x in iv_surfaces_tensor]
y = [x[1] for x in iv_surfaces_tensor]

bought_options = []


N = 10 ##outcomes to generate


##only do ttm of 1
for i in range(32, 132): ##start from 32 so i can have enough context
    x_hat = []
    r_hat = []

    
    options_t = options.loc[i]
    x_c = x[i-31:i]
    y_c = y[i-31:i]

    x_c = torch.stack(x_c)
    y_c = torch.stack(y_c)

    for _, selected_row in options_t.iterrows():
        T = selected_row["Time to Maturity (Months)"] ##TTM in months
        if int(T) != 24:
            continue
        S = selected_row["Asset Price"] 
        K = selected_row["Strike"]
        market_price = selected_row["Call Price"]
        iv = find_iv(market_price, S, K, T / 12, r)

        empircal_profit = predict_profit(x_c, y_c, int(T), N, S, K, market_price)
        if iv == np.nan:
            continue

        level, diff, moneyness_index, ttm_index = find_closest_moneyness_range_and_ttm(S, K, int(T))
        if moneyness_index == 99:
            continue

        if diff < 0.05: ##close to actual moneyness level compare ivs with ttm and moneyness on forecast - will be underpriced
            if empircal_profit > 1: ###if profit is above a threshold here for room 
                bought_options.append((i, S, K, T, market_price)) 

    #print(i)

total_profit = 0

print('options bought')
for option in bought_options:
    buy_date, S, K, T, market_price = option
    expiration_date = buy_date + int(T) * 30 #months
    if expiration_date < 5886:
        asset_price_at_expiration = options.loc[expiration_date]["Asset Price"].iloc[0]
            # If asset_price_at_expiration is a scalar value, calculate profit as before
        if asset_price_at_expiration > K:
            profit = asset_price_at_expiration - K - market_price
        else:
            profit = -market_price 
            total_profit += profit

print(f"{len(bought_options)} options bought")
print(f"Total profit: {total_profit}")
print(f"Profit per trade: {total_profit / len(bought_options)}")

Model loaded successfully.
CVAE(
  (encoder): Encoder(
    (cnn): CNN(
      (conv1): Conv2d(1, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv2): Conv2d(5, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv3): Conv2d(5, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (fc): Linear(in_features=125, out_features=5, bias=True)
    )
    (mlp): Identity()
    (lstm): LSTM(8, 100, num_layers=2, batch_first=True, dropout=0.2)
    (linear_mu): Linear(in_features=100, out_features=5, bias=True)
    (linear_sigma): Linear(in_features=100, out_features=5, bias=True)
  )
  (context_encoder): ContextEncoder(
    (cnn): CNN(
      (conv1): Conv2d(1, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv2): Conv2d(5, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv3): Conv2d(5, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (fc): Linear(in_features=125, out_features=5, bias=True)
    )
    (mlp): Identity()
    (ls

  x = iv_surfaces.drop(['Log Return', 'Skew', 'Slope'], axis=1)


options bought
300 options bought
Total profit: -939.9564074235823
Profit per trade: -3.1331880247452744


In [10]:
###In this generate empircal expected value/profit
def predict_iv(x_c, y_c, t, N, moneyness_index, ttm_index):
    total_profit = 0
    num_iterations = 0

    sum_iv = 0
    num_iv = 0
    ##by the WLLN if our model is accurate it should converge to the true average profitability making a positive ev trading strategy
    for i in range(1, N):
        x_n, r_n = model.generate(x_c, y_c, t)  # predict t units in the future
        predicted_iv_surface = x_n[0][0]
        #print(f"x_n: {x_n}")
        #print(f"predicted iv surface: {predicted_iv_surface}")
        #print(predicted_iv_surface)
        iv = predicted_iv_surface[ttm_index][moneyness_index]
        #print(iv)
        sum_iv = sum_iv + abs(iv)
        num_iv = num_iv + 1
        #print('here')

    forecast_iv = (sum_iv / num_iv) #num_iv just N no

    #print(f"forecast iv: {forecast_iv}")

    return forecast_iv 
    

    
# Load the saved model state dictionary
model.load_state_dict(torch.load('best_model.pth'))
print("Model loaded successfully.")
# Set the model to evaluation mode
print(model.eval())
    
#underlying params for black scholes
r = 0.05 #risk_free_rate
volatility = 0.04 #long term volatility
time_to_maturity = 1.0

file_path = 'option_prices_timeseries.csv'
options = pd.read_csv(file_path, index_col=0)

file_path = 'combined_iv_data_new.csv'
iv_surfaces = pd.read_csv(file_path, header=[0, 1], index_col=0)

y = iv_surfaces[['Log Return', 'Skew', 'Slope']]

x = iv_surfaces.drop(['Log Return', 'Skew', 'Slope'], axis=1)


# Normalize features
scaler = StandardScaler()
x = scaler.fit_transform(x)
y = scaler.fit_transform(y)

x = np.array(x)
y = np.array(y)

x_reshaped = x.reshape(x.shape[0],  H, W)

y_reshaped = y.reshape(y.shape[0], 1, extra_features_size)

iv_surfaces_tensor = TensorDataset(torch.Tensor(x_reshaped ), torch.Tensor(y_reshaped))

x = [x[0] for x in iv_surfaces_tensor]
y = [x[1] for x in iv_surfaces_tensor]

bought_options = []


N = 100 ##outcomes to generate


##only do ttm of 1
for i in range(32, 132): ##start from 32 so i can have enough context
    x_hat = []
    r_hat = []

    
    options_t = options.loc[i]
    x_c = x[i-31:i]
    y_c = y[i-31:i]

    x_c = torch.stack(x_c)
    y_c = torch.stack(y_c)

    for _, selected_row in options_t.iterrows():
        T = selected_row["Time to Maturity (Months)"] ##TTM in months
        if int(T) != 24:
            continue
        S = selected_row["Asset Price"] 
        K = selected_row["Strike"]
        market_price = selected_row["Call Price"]
        iv = find_iv(market_price, S, K, T / 12, r)
        #print(iv)
        if iv == np.nan:
            continue
            
        level, diff, moneyness_index, ttm_index = find_closest_moneyness_range_and_ttm(S, K, int(T))

        if moneyness_index == 99:
            continue
            
        forecast_iv = predict_iv(x_c, y_c, int(T), N, moneyness_index, ttm_index)

        if diff < 0.05: ##close to actual moneyness level compare ivs with ttm and moneyness on forecast - will be underpriced
            #print('I AM HERE')
            if abs(forecast_iv) < abs(iv): ###if profit is above a threshold here for room 
                bought_options.append((i, S, K, T, market_price)) 

    #print(i)

total_profit = 0

print(f"{len(bought_options)} options bought")
for option in bought_options:
    buy_date, S, K, T, market_price = option
    expiration_date = buy_date + int(T) * 30 #months
    if expiration_date < 5886:
        asset_price_at_expiration = options.loc[expiration_date]["Asset Price"].iloc[0]
            # If asset_price_at_expiration is a scalar value, calculate profit as before
        if asset_price_at_expiration > K:
            profit = asset_price_at_expiration - K - market_price
        else:
            profit = -market_price 
            total_profit += profit

print(f"{len(bought_options)} options bought")
print(f"Total profit: {total_profit}")
print(f"Profit per trade: {total_profit / len(bought_options)}")

Model loaded successfully.
CVAE(
  (encoder): Encoder(
    (cnn): CNN(
      (conv1): Conv2d(1, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv2): Conv2d(5, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv3): Conv2d(5, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (fc): Linear(in_features=125, out_features=5, bias=True)
    )
    (mlp): Identity()
    (lstm): LSTM(8, 100, num_layers=2, batch_first=True, dropout=0.2)
    (linear_mu): Linear(in_features=100, out_features=5, bias=True)
    (linear_sigma): Linear(in_features=100, out_features=5, bias=True)
  )
  (context_encoder): ContextEncoder(
    (cnn): CNN(
      (conv1): Conv2d(1, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv2): Conv2d(5, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv3): Conv2d(5, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (fc): Linear(in_features=125, out_features=5, bias=True)
    )
    (mlp): Identity()
    (ls

  x = iv_surfaces.drop(['Log Return', 'Skew', 'Slope'], axis=1)


KeyboardInterrupt: 