In [1]:
import torch
import torch.nn as nn

class CVAE_LSTM(nn.Module):
    def __init__(self, iv_dim, feature_dim, hidden_dim, latent_dim, num_layers, dropout_rate):
        input_dim = iv_dim + feature_dim
        super(CVAE_LSTM, self).__init__() ##call the constructor of the pytorch nn module for the nn architecture
        self.iv_dim = iv_dim
        self.feature_dim = feature_dim
        
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.latent_dim = latent_dim
        self.num_layers = num_layers
        self.dropout_rate = dropout_rate #probability a neuron wont activate/be dropped out - prevents overfitting and coadaption btw nn

        # Encoder

        ##we define an an lstm layer for the encoder consuming the ts data and providing a hidden state
        ##the mu and logvar layers provide nns to predict the parameters of the prob distribution for the latent represenation z given context x
        ##these consume the hidden state to produce the predictions
        self.encoder_lstm = nn.LSTM(iv_dim + 1, hidden_dim, num_layers, batch_first=True, dropout=dropout_rate) ##only encode log returns for future
        self.encoder_mu = nn.Linear(hidden_dim, latent_dim)
        self.encoder_logvar = nn.Linear(hidden_dim, latent_dim)

        # Context Encoder
        ##the encoder learns the probability distribution then when we have an data to encode we use this, maybe use the 
        self.context_encoder_lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=dropout_rate) #why only to lstm? - smth to write about

        # Decoder
        self.decoder_lstm = nn.LSTM(latent_dim + hidden_dim, hidden_dim, num_layers, batch_first=True)
        self.decoder_output = nn.Linear(hidden_dim, input_dim) #output is not input dim

    def encode(self, x):
        _, (h_n, _) = self.encoder_lstm(x)
        h_n = h_n.view(self.num_layers, -1, self.hidden_dim)[-1]
        mu = self.encoder_mu(h_n)
        logvar = self.encoder_logvar(h_n)
        return mu, logvar

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def decode(self, z, context):
        decoder_input = torch.cat((z, context), dim=-1)
        output, _ = self.decoder_lstm(decoder_input)
        output = self.decoder_output(output)
        return output

    def forward(self, x_c, y_c, x_n, r_n):
        # Encode historical context
        _, (context, _) = self.context_encoder_lstm(torch.cat((x_c, y_c), dim=-1))
        context = context.view(self.num_layers, -1, self.hidden_dim)[-1]
        
        # Encode future values - will have a different dimensionality 25 + 1 as we only predict future log returns
        mu, logvar = self.encode(torch.cat((x_n, r_n), dim=-1))
        z = self.reparameterize(mu, logvar)
        # Decode future values
        x_hat, r_hat = self.decode(z, context).split(self.iv_dim, dim=-1)

        return x_hat, r_hat, mu, logvar

In [2]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Load the DataFrame from the provided CSV file
file_path = 'combined_iv_data.csv'
df = pd.read_csv(file_path, header=[0,1], index_col=0)

#extra features will be these below
Y = df[['Log Return', 'Skew', 'Slope']]

# X will be all iv surface data - no features
X = df.drop(['Log Return', 'Skew', 'Slope'], axis=1)

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
Y_scaled = scaler.fit_transform(Y)

# Split data into training and temporary set
#tarining will have 4000 (saved 2000 for test and validation in temp)
X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, Y_scaled, test_size=2000, random_state=42)

#X_train has all iv surface data to train with
#y_train has all the extra features

# Split the temporary set into validation and test sets, by splitting temp in half -> 1000 each
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Output the shapes of the splits to confirm the operation
print("Training set shape:", X_train.shape, y_train.shape)
print("Validation set shape:", X_val.shape, y_val.shape)
print("Test set shape:", X_test.shape, y_test.shape)

Training set shape: (3885, 25) (3885, 3)
Validation set shape: (1000, 25) (1000, 3)
Test set shape: (1000, 25) (1000, 3)


  X = df.drop(['Log Return', 'Skew', 'Slope'], axis=1)


In [3]:
import random

def generate_random_day(n, arr):
    """
    Generates a random number between 1 and n,
    and checks if it is not already in the given array.
    """
    while True:
        # Generate a random number between 1 and n
        random_number = random.randint(1, n - 1)
        
        # Check if the random number is not in the array
        if random_number not in arr:
            arr.append(random_number)
            return random_number

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

# Set random seeds for reproducibility
torch.manual_seed(0)

# Define hyperparameters
latent_dim = 5
context_dim = 5
hidden_dim = 100
num_layers = 2
dropout_rate = 0.2
kl_weight = 1e-5
num_epochs = 500
batch_size = 32
learning_rate = 1e-3

train_data = TensorDataset(torch.Tensor(X_train), torch.Tensor(y_train))
val_data = TensorDataset(torch.Tensor(X_val), torch.Tensor(y_val))
test_data = TensorDataset(torch.Tensor(X_test), torch.Tensor(y_test))

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

#input is historical iv and extra features
model = CVAE_LSTM(iv_dim=(X_train.shape[-1]), feature_dim=(y_train.shape[-1]), hidden_dim=hidden_dim, latent_dim=latent_dim,
                  num_layers=num_layers, dropout_rate=dropout_rate)


# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate) #step in right direction

arr = []
# Training loop
best_val_loss = float('inf')
for epoch in range(num_epochs):
    model.train()
    train_surface_loss = 0.0
    train_return_loss = 0.0
    train_kl_loss = 0.0

    for batch in train_loader:
        #this gets us all iv surface and extra feature data for the batch
        X_batch, Y_batch = batch

        n = Y_batch.shape[0] #days to sample from
        #generate a random t not already generated
        t = generate_random_day(n, arr)

        #up to t-1 is context from t -> end is future
        x_c, x_n = X_batch[:t], X_batch[t:]
        y_c, r_n = Y_batch[:t], Y_batch[t:] #need only the asset returns here

        #now from here reassing r_n to only have the log return feature - log returns at 0th index
        r_n = r_n[:, 0].unsqueeze(1)
    
        x_hat, r_hat, mu, logvar = model(x_c, y_c, x_n, r_n)


        surface_loss = criterion(x_hat, x_n)
        return_loss = criterion(r_hat, r_n)
        kl_loss = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())

        loss = surface_loss + return_loss + kl_weight * kl_loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_surface_loss += surface_loss.item()
        train_return_loss += return_loss.item()
        train_kl_loss += kl_loss.item()
        print(f"IV Surface Loss: {surface_loss.item()}")
        print(f"Log Return Loss: {return_loss.item()}")
        print(f"KL Loss: {kl_loss.item()}\n")

    # Validation
    model.eval()
    val_surface_loss = 0.0
    val_return_loss = 0.0
    val_kl_loss = 0.0
    arr = []
    
    with torch.no_grad():
        for batch in val_loader:
            #this gets us all iv surface and extra feature data for the batch
            X_batch, Y_batch = batch
    
            n = Y_batch.shape[0] #days to sample from
            #generate a random t not already generated
            t = generate_random_day(n, arr)
    
            #up to t-1 is context from t -> end is future
            x_c, x_n = X_batch[:t], X_batch[t:]
            y_c, r_n = Y_batch[:t], Y_batch[t:] #need only the asset returns here
    
            #now from here reassing r_n to only have the log return feature - log returns at 0th index
            r_n = r_n[:, 0].unsqueeze(1)
            
            x_hat, r_hat, mu, logvar = model(x_c, y_c, x_n, r_n)

            surface_loss = criterion(x_hat, x_n)
            return_loss = criterion(r_hat, r_n)
            kl_loss = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())

            val_surface_loss += surface_loss.item()
            val_return_loss += return_loss.item()
            val_kl_loss += kl_loss.item()
            print("ahahha")

    # Print epoch statistics
    print(f"Epoch [{epoch+1}/{num_epochs}], "
          f"Train Surface Loss: {train_surface_loss / len(train_loader):.4f}, "
          f"Train Return Loss: {train_return_loss / len(train_loader):.4f}, "
          f"Train KL Loss: {train_kl_loss / len(train_loader):.4f}, "
          f"Val Surface Loss: {val_surface_loss / len(val_loader):.4f}, "
          f"Val Return Loss: {val_return_loss / len(val_loader):.4f}, "
          f"Val KL Loss: {val_kl_loss / len(val_loader):.4f}")

    # Save the best model based on validation loss
    val_loss = val_surface_loss + val_return_loss + kl_weight * val_kl_loss
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), 'best_model.pth')

# Evaluation on the test set
model.load_state_dict(torch.load('best_model.pth'))
model.eval()
test_surface_loss = 0.0
test_return_loss = 0.0
test_kl_loss = 0.0
arr = []
with torch.no_grad():
    for batch in test_loader:
        #this gets us all iv surface and extra feature data for the batch
        X_batch, Y_batch = batch

        n = Y_batch.shape[0] #days to sample from
        #generate a random t not already generated
        t = generate_random_day(n, arr)

        #up to t-1 is context from t -> end is future
        x_c, x_n = X_batch[:t], X_batch[t:]
        y_c, r_n = Y_batch[:t], Y_batch[t:] #need only the asset returns here

        #now from here reassing r_n to only have the log return feature - log returns at 0th index
        r_n = r_n[:, 0].unsqueeze(1)
        x_hat, r_hat, mu, logvar = model(x_c, y_c, x_n, r_n)

        surface_loss = criterion(x_hat, x_n)
        return_loss = criterion(r_hat, r_n)
        kl_loss = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())

        test_surface_loss += surface_loss.item()
        test_return_loss += return_loss.item()
        test_kl_loss += kl_loss.item()

print(f"Test Surface Loss: {test_surface_loss / len(test_loader):.4f}, "
      f"Test Return Loss: {test_return_loss / len(test_loader):.4f}, "
      f"Test KL Loss: {test_kl_loss / len(test_loader):.4f}")

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(inpu

IV Surface Loss: 1.0273935794830322
Log Return Loss: 0.9186657667160034
KL Loss: 0.011667430400848389

IV Surface Loss: 0.9749563932418823
Log Return Loss: 0.7491581439971924
KL Loss: 0.010328978300094604

IV Surface Loss: 2.118133306503296
Log Return Loss: 0.7288910746574402
KL Loss: 0.013163387775421143

IV Surface Loss: 0.876727819442749
Log Return Loss: 0.8127350807189941
KL Loss: 0.013361185789108276

IV Surface Loss: 1.038958191871643
Log Return Loss: 0.8238745331764221
KL Loss: 0.015007048845291138

IV Surface Loss: 1.2281951904296875
Log Return Loss: 1.2625442743301392
KL Loss: 0.016469061374664307

IV Surface Loss: 1.2127280235290527
Log Return Loss: 0.7841317653656006
KL Loss: 0.015723377466201782

IV Surface Loss: 1.0995434522628784
Log Return Loss: 2.412966012954712
KL Loss: 0.01393178105354309

IV Surface Loss: 0.8995498418807983
Log Return Loss: 0.9098942279815674
KL Loss: 0.01361304521560669

IV Surface Loss: 1.0473361015319824
Log Return Loss: 0.8535180687904358
KL Loss

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(inpu

IV Surface Loss: 0.8183916211128235
Log Return Loss: 1.567029356956482
KL Loss: 0.02040991187095642

IV Surface Loss: 0.8264096975326538
Log Return Loss: 1.4791979789733887
KL Loss: 0.027158737182617188

IV Surface Loss: 1.146388292312622
Log Return Loss: 1.1815879344940186
KL Loss: 0.026656419038772583

IV Surface Loss: 0.4098665416240692
Log Return Loss: 0.7016210556030273
KL Loss: 0.014772117137908936

IV Surface Loss: 0.24276329576969147
Log Return Loss: 0.5029217004776001
KL Loss: 0.011256992816925049

IV Surface Loss: 1.3103806972503662
Log Return Loss: 1.279817819595337
KL Loss: 0.026537925004959106

IV Surface Loss: 0.9464860558509827
Log Return Loss: 0.9408299326896667
KL Loss: 0.02828758955001831

IV Surface Loss: 1.09833824634552
Log Return Loss: 0.664018452167511
KL Loss: 0.030503660440444946

IV Surface Loss: 1.2054194211959839
Log Return Loss: 0.966587483882904
KL Loss: 0.02053019404411316

IV Surface Loss: 0.5995274782180786
Log Return Loss: 1.1887192726135254
KL Loss: 0

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(inpu

IV Surface Loss: 0.857948362827301
Log Return Loss: 1.1669273376464844
KL Loss: 0.03228771686553955

IV Surface Loss: 0.5402281880378723
Log Return Loss: 1.1719120740890503
KL Loss: 0.03198656439781189

IV Surface Loss: 0.7334268689155579
Log Return Loss: 0.7508378624916077
KL Loss: 0.0427497923374176

IV Surface Loss: 0.6305726170539856
Log Return Loss: 0.6677422523498535
KL Loss: 0.0354803204536438

IV Surface Loss: 0.9311909675598145
Log Return Loss: 1.767519235610962
KL Loss: 0.03237342834472656

IV Surface Loss: 0.6642863154411316
Log Return Loss: 0.3541199564933777
KL Loss: 0.03373616933822632

IV Surface Loss: 0.7717457413673401
Log Return Loss: 0.7232272028923035
KL Loss: 0.043373048305511475

IV Surface Loss: 1.4280444383621216
Log Return Loss: 1.362480878829956
KL Loss: 0.05516138672828674

IV Surface Loss: 1.2730417251586914
Log Return Loss: 0.8026449680328369
KL Loss: 0.045615196228027344

