In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F



##The encoder in the paper takes x in R^(TxHxW) and y in R^(TxE) and maps them to R^(TxL) the dimensionality of the context encoder
##and encoder is set to 5. So this cnn needs to output (32x5) so each (surface in R^(5x5) -> (z in R^5)

##we enhance the dimensionality of the iv surface first by upgrading the number of channels. The reasoning behind this is similar to why 
##we do this in transformer architecture. A larger dimensional space will be able to capture more nuanced information and represent it in number form
##then we compress this to something digestable
class CNN(nn.Module):
    #input_size and output_size represent the number of channels in the input and output data
    #channels is the number of dimensions a single data point will have ie RGB = 3 channelss
    def __init__(self, input_size, output_size):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(input_size, output_size, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(output_size, output_size, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(output_size, output_size, kernel_size=3, stride=1, padding=1)
        self.fc = nn.Linear(output_size * 5 * 5, 5)

    def forward(self, x):
        batch_size, H, W = x.shape
        x = x.reshape(batch_size, 1, H, W)
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        #print(f"x shape before resize and convultion passthrough is {x.shape}")
        #x = x.reshape(batch_size, 1, 5)
        x = x.view(batch_size, -1)
        x = self.fc(x)
        #print(f"after passthrough into convultion layers and fully connected layer ther shape of x is {x.shape}")
        return x

class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, y):
        x = F.relu(self.fc1(y))
        x = self.fc2(y)
        return y

In [2]:
##had to make another cnn for decoder due difference in dimensionality

##in here the output size should be the number of days in the future?

##for now output_size = 1 so we predict 1 day into the future?
class TCNN(nn.Module):
    def __init__(self, input_size, output_size, num_surfaces):
        super(TCNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 5 * 5 * num_surfaces)
        self.output_size = output_size
        self.num_surfaces = num_surfaces
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        x = x.view(-1, self.num_surfaces, 5, 5)
        return x

In [3]:
class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size, latent_size):
        super(Encoder, self).__init__()
        self.cnn = CNN(input_size, 5)
        self.mlp = nn.Identity()
        self.lstm = nn.LSTM(5 + 3, hidden_size, num_layers=2, batch_first=True, dropout=0.2)
        self.linear_mu = nn.Linear(hidden_size, latent_size)
        self.linear_sigma = nn.Linear(hidden_size, latent_size)

    def forward(self, x, y):
        x_encoded = self.cnn(x)
        y_encoded = self.mlp(y)
        y_encoded = torch.squeeze(y_encoded, dim=1)
        #print(f"x_encoded shape is {x_encoded.shape}  y_encoded shape is {y_encoded.shape}")
        encoded = torch.cat((x_encoded, y_encoded), dim=-1)
        #print(f"concatenated vector is of size {encoded.shape}")
        _, (hidden, _) = self.lstm(encoded)
        #print("hidden state created")
        hidden = hidden[-1]  # Take the last hidden state
        mu = self.linear_mu(hidden)
        log_var = self.linear_sigma(hidden)
        z = self.reparameterize(mu, log_var)
        #print("encoding successful")
        return z, mu, log_var

    def reparameterize(self, mu, log_var):
        std = torch.exp(0.5 * log_var)
        eps = torch.randn_like(std)
        return mu + eps * std

class ContextEncoder(nn.Module):
    def __init__(self, input_size, hidden_size, context_size):
        super(ContextEncoder, self).__init__()
        self.cnn = CNN(input_size, 5)
        self.mlp = nn.Identity()
        self.lstm = nn.LSTM(5 + 3, hidden_size, num_layers=2, batch_first=True, dropout=0.2)
        self.linear = nn.Linear(hidden_size, context_size)

    def forward(self, x_c, y_c):
        x_encoded = self.cnn(x_c)
        y_encoded = self.mlp(y_c)
        y_encoded = torch.squeeze(y_encoded, dim=1)
        #print(f"x_encoded size = {x_encoded.shape} ||y_encoded size = {y_encoded.shape}")
        encoded = torch.cat((x_encoded, y_encoded), dim=-1)
        _, (hidden, _) = self.lstm(encoded)
        hidden = hidden[-1]  # Take the last hidden state
        zeta = self.linear(hidden)
        #print("context encoding successful")
        return zeta

class Decoder(nn.Module):
    def __init__(self, latent_size, hidden_size, output_size, num_surfaces):
        super(Decoder, self).__init__()
        self.lstm = nn.LSTM(latent_size + 5, hidden_size, num_layers=2, batch_first=True, dropout=0.2) #latent
        self.tcnn = TCNN(hidden_size, output_size, num_surfaces)
        self.mlp = nn.Linear(hidden_size, 1)

    def forward(self, z, zeta):
        # Reshape z and zeta to have shape (1, 5)
        z = z.view(1, -1)
        zeta = zeta.view(1, -1)
        # Concatenate z and zeta along the second dimension to get shape (1, 10)
        z_concat = torch.cat((z, zeta), dim=1)
        
        #print(f"z_concat is of size {z_concat.shape}")
        hidden, _ = self.lstm(z_concat)
        #print('i am here')
        #print(f"hidden state published, hidden state shape {hidden.shape}")
        x_n = self.tcnn(hidden) 
        #print('i am here')
        r_n = self.mlp(hidden)
        #print("decoding successful")
        return torch.squeeze(x_n, dim=0), torch.squeeze(r_n, dim=0)

In [4]:
class CVAE(nn.Module):
    def __init__(self, input_size, hidden_size, latent_size, context_size, output_size, num_surfaces):
        super(CVAE, self).__init__()
        self.encoder = Encoder(input_size, hidden_size, latent_size)
        self.context_encoder = ContextEncoder(input_size, hidden_size, context_size)
        self.decoder = Decoder(latent_size, hidden_size, output_size, num_surfaces)
        self.latent_size = latent_size
        self.context_size = context_size

    def forward(self, x, y, x_c, y_c):
        #print('1')
        z, mu, log_var = self.encoder(x, y) ##we have sampled z from distribution here
        #print(f"The shape of latent representation z is {z.shape}")
        #print('2')
        zeta = self.context_encoder(x_c, y_c) ##we have sampled zeta from distribution here
        #print(f"the shape of zeta (encoded context) is {zeta.shape}")
        x_n, r_n = self.decoder(z, zeta)
        return x_n, r_n, mu, log_var

    def generate(self, x_c, y_c):
        z = torch.tensor(np.array([np.random.normal(loc=0, scale=1, size=5)]), dtype=torch.float32)  
        zeta = self.context_encoder(x_c,y_c)  
        x_n, r_n = self.decoder(z, zeta) 
        print
        return x_n, r_n

In [5]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Load the DataFrame from the provided CSV file
file_path = 'combined_iv_data.csv'
df = pd.read_csv(file_path, header=[0, 1], index_col=0)

# Extract extra features
Y = df[['Log Return', 'Skew', 'Slope']]

# Extract IV surface data
X = df.drop(['Log Return', 'Skew', 'Slope'], axis=1)

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
Y_scaled = scaler.fit_transform(Y)

# Split data into training and temporary set
X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, Y_scaled, test_size=2000, random_state=42)

# Split the temporary set into validation and test sets
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

  X = df.drop(['Log Return', 'Skew', 'Slope'], axis=1)


In [6]:
import random

def generate_random_day(n):
    """
    Generates a random number between 1 and n
    """
    # Generate a random number between 1 and n
    random_number = random.randint(1, n - 1)
    return random_number

In [7]:
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from torch.utils import mkldnn as mkldnn_utils

# Hyperparameters
input_size = 25
hidden_size = 100
latent_size = 5
context_size = 5
input_size = 1
output_size = 5
learning_rate = 1e-3
num_epochs = 500
batch_size = 32
alpha = 1.0
beta = 1e-5


# Reshape the input data to the desired 5-dimensional shape
batch_size_train = X_train.shape[0]
batch_size_val = X_val.shape[0]
batch_size_test = X_test.shape[0]
sequence_length = 1  # 
num_surfaces = 1  # the number of surfaces to generate


H = 5  # Height of the IV surface grid
W = 5  # Width of the IV surface grid

#print(X_train.shape)
X_train_reshaped = X_train.reshape(batch_size_train,  H, W)
X_val_reshaped = X_val.reshape(batch_size_val, H, W)
X_test_reshaped = X_test.reshape(batch_size_test, H, W)
#print(X_train_reshaped.shape)
extra_features_size = 3

y_train_reshaped = y_train.reshape(y_train.shape[0], 1, extra_features_size)
y_val_reshaped = y_val.reshape(y_val.shape[0], 1, extra_features_size)
y_test_reshaped = y_test.reshape(y_test.shape[0], 1, extra_features_size)

train_data = TensorDataset(torch.Tensor(X_train_reshaped), torch.Tensor(y_train_reshaped))
val_data = TensorDataset(torch.Tensor(X_val_reshaped), torch.Tensor(y_val_reshaped))
test_data = TensorDataset(torch.Tensor(X_test_reshaped), torch.Tensor(y_test_reshaped))

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

# Create the model
model = CVAE(input_size, hidden_size, latent_size, context_size, input_size, num_surfaces)

In [None]:
def loss_function(x_n, x_n_recon, r_n, r_n_recon, mu, log_var):
    # Ensure x_n and r_n are unsqueezed if needed (depends on data shape handling in other parts of your code)
    x_n = torch.unsqueeze(x_n, dim=0) if len(x_n.shape) < 3 else x_n
    r_n = torch.unsqueeze(r_n, dim=0) if len(r_n.shape) < 3 else r_n

    # Calculate the mean squared error, normalized by the number of elements (H*W)
    recon_loss_x = F.mse_loss(x_n_recon, x_n, reduction='mean') / (H * W)
    recon_loss_r = F.mse_loss(r_n_recon, r_n, reduction='mean')

    # Calculate the Kullback-Leibler divergence
    kl_loss = -0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp())

    # Combine the losses with the scaling factors
    total_loss = recon_loss_x + alpha * recon_loss_r + beta * kl_loss

    return total_loss

# Define the optimizer
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
best_val_loss = float('inf')
for epoch in range(num_epochs):
    # Training
    model.train()
    train_loss = 0.0
    for batch in train_loader:
        x, y = batch
        batch_size = x.shape[0] #batch_size, context_length, num_surfaces, H, W = x.shape
        
        # Generate a random day t to split the sequence into context and future
        t = batch_size - num_surfaces - 1
        #print(t)

        x_c, x_n = x[:t, :], x[t:, :]
        y_c, y_n = y[:t, :], y[t:, :]
            
        r_n = y_n[:, :, 0]
        
        x_n_recon, r_n_recon, mu, log_var = model(x, y, x_c, y_c)
        
        # Compute the loss
        loss = loss_function(x_n[0], x_n_recon, r_n[0], r_n_recon, mu, log_var)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
    
    # Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for batch in val_loader:
            x, y = batch
            batch_size = x.shape[0]  # Number of days in the sequence
            
            # Generate a random day t to split the sequence into context and future
            #t = generate_random_day(batch_size)
            t = batch_size - num_surfaces - 1
            
            x_c, x_n = x[:t, :], x[t:, :]
            y_c, y_n = y[:t, :], y[t:, :]
            
            # Extract the log return feature from y_n
            r_n = y_n[:, :, 0]
            
            x_n_recon, r_n_recon, mu, log_var = model(x, y, x_c, y_c)
            #print(f"x_n_recon: {x_n_recon}")
            
            # Compute the loss
            loss = loss_function(x_n[0], x_n_recon, r_n[0], r_n_recon, mu, log_var)
            
            val_loss += loss.item()
    
    # Print the losses for each epoch
    print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss/len(train_loader):.4f}, Val Loss: {val_loss/len(val_loader):.4f}")
    
    # Save the best model based on validation loss
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), 'best_model.pth')

# Testing
model.load_state_dict(torch.load('best_model.pth'))
model.eval()
#test_loss = 0.0
#with torch.no_grad():
 #   for batch in test_loader:
  #      x, y = batch
   #     batch_size = x.shape[0]  # Number of days in the sequence
    #    
        # Generate a random day t to split the sequence into context and future
     #   t = batch_size - num_surfaces - 1
      #  
       # x_c, x_n = x[:, :t], x[:, t:]
        #y_c, y_n = y[:, :t], y[:, t:]
        
        # Extract the log return feature from y_n
        #r_n = y_n[:, :, 0]
        
        #x_n_recon, r_n_recon, mu, log_var = model(x, y, x_c, y_c)
        
        # Compute the loss
        #loss = loss_function(x_n[0], x_n_recon, r_n[0], r_n_recon, mu, log_var)
        
        #test_loss += loss.item()

#ßprint(f"Test Loss: {test_loss/len(test_loader):.4f}") 

In [None]:
import time
from scipy.optimize import brentq
from scipy.stats import norm

moneyness_levels = [0.7, 0.85, 1, 1.15, 1.3]
ttms = [1, 3, 6, 12, 24]

def black_scholes_call(S, K, T, r, sigma):
    d1 = (np.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))
    d2 = d1 - sigma * np.sqrt(T)
    return S * norm.cdf(d1) - K * np.exp(-r * T) * norm.cdf(d2)
    
def find_iv(market_price, S, K, T, r):
    def objective(sigma): return market_price - black_scholes_call(S, K, T, r, sigma)
    try:
        return brentq(objective, 1e-6, 3.0, xtol=1e-6)
    except ValueError:
        return np.nan  # or any other default value

def find_closest_moneyness_range_and_ttm(underlying_price, strike_price, T):
    moneyness = underlying_price / strike_price
    closest_range = None
    min_diff = float('inf')
    moneyness_index = 99
    ttm_index = 99
    
    counter = 0
    for level in moneyness_levels:
        diff = abs(moneyness - level)
        if diff < min_diff:
            min_diff = diff
            closest_range = level
            moneyness_index = counter
    counter = counter + 1

    counter=0
    for ttm in ttms:
        if T == ttm:
            ttm_index = counter
        counter = counter + 1

    return closest_range, min_diff, moneyness_index, ttm_index


    
#underlying params for black scholes
r = 0.05 #risk_free_rate
volatility = 0.2
time_to_maturity = 1.0

file_path = 'option_prices_timeseries.csv'
options = pd.read_csv(file_path, index_col=0)

file_path = 'combined_iv_data_new.csv'
iv_surfaces = pd.read_csv(file_path, header=[0, 1], index_col=0)

y = iv_surfaces[['Log Return', 'Skew', 'Slope']]

x = iv_surfaces.drop(['Log Return', 'Skew', 'Slope'], axis=1)


# Normalize features
scaler = StandardScaler()
x = scaler.fit_transform(x)
y = scaler.fit_transform(y)

x = np.array(x)
y = np.array(y)

x_reshaped = x.reshape(x.shape[0],  H, W)

y_reshaped = y.reshape(y.shape[0], 1, extra_features_size)

iv_surfaces_tensor = TensorDataset(torch.Tensor(x_reshaped ), torch.Tensor(y_reshaped))

x = [x[0] for x in iv_surfaces_tensor]
y = [x[1] for x in iv_surfaces_tensor]

for i in range(32, 5886): ##start from 32 so i can have enough context
    #print('hi')
    options_t = options.loc[i]
    x_c = x[i-31:i]
    y_c = y[i-31:i]

    x_c = torch.stack(x_c)
    y_c = torch.stack(y_c)
    x_n, r_n = model.generate(x_c, y_c)

    for _, selected_row in options_t.iterrows():
        #print(selected_row)
        S = selected_row["Asset Price"] 
        K = selected_row["Strike"]
        T = selected_row["Time to Maturity (Months)"] ##TTM in months
        market_price = selected_row["Call Price"]
        iv = find_iv(market_price, S, K, T / 12, r)
        print(f"black scholes iv: {iv}")
        if iv == np.nan:
            continue

        #print(iv)
        level, diff, moneyness_index, ttm_index = find_closest_moneyness_range_and_ttm(S, K, int(T))
        if moneyness_index == 99:
            continue

       # print(moneyness_index)
        #print("I am here")
        if diff < 1: ##close to actual moneyness level compare ivs with ttm and moneyness on forecast - will be underpriced
           # print(T)
            forecast_iv = x_n[0][moneyness_index][ttm_index]
            print(f"The forecast iv: {forecast_iv}\n\n")
            if abs(forecast_iv) < iv:
                print('buy')
            else: 
                print('do nothing')

  x = iv_surfaces.drop(['Log Return', 'Skew', 'Slope'], axis=1)


hi
black scholes iv: 0.6086691968649959
I am here
The forecast iv: -0.0546785332262516
buy
black scholes iv: 0.40057981764749645
I am here
The forecast iv: 0.025480907410383224
buy
black scholes iv: nan
I am here
The forecast iv: -0.018116645514965057
do nothing
black scholes iv: 0.2460180765237677
I am here
The forecast iv: -0.10227195173501968
buy
black scholes iv: 0.15910488247396432
I am here
The forecast iv: -0.00835330504924059
buy
black scholes iv: 0.3705230789682085
I am here
The forecast iv: -0.0546785332262516
buy
black scholes iv: 0.22406719122267918
I am here
The forecast iv: 0.025480907410383224
buy
black scholes iv: 0.2151409444450977
I am here
The forecast iv: -0.018116645514965057
buy
black scholes iv: 0.1963122623068163
I am here
The forecast iv: -0.10227195173501968
buy
black scholes iv: 0.19860274127127445
I am here
The forecast iv: -0.00835330504924059
buy
black scholes iv: 0.19547029723091291
I am here
The forecast iv: -0.0546785332262516
buy
black scholes iv: 0.20