In [27]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import grad
from torch.utils.data import DataLoader

import numpy as np
import torch
from scipy.stats import norm


In [28]:
from IPython.core.display import HTML
HTML("""
<style>
body { font-family: "Helvetica Neue", sans-serif; font-size: 15px; }
h1, h2, h3 { color: #34495e; }
p { line-height: 1.6; }
</style>
""")

In [None]:
class OPNN(nn.Module):

    def __init__(self, input_dim = 3, hidden_dim = 3, num_hidden_layers = 2):

        super(OPNN, self).__init__()

        # input layers
        layers = [
            
            nn.Linear(input_dim, hidden_dim),
            nn.Tanh(),
            
        ]

        # hidden layers 
        for _ in range(num_hidden_layers - 1):

            layers.append(nn.Linear(hidden_dim, hidden_dim))
            layers.append(nn.Tanh())

        # output layer
        layers.append(nn.Linear(hidden_dim, 1))

        self.model = nn.Sequential(*layers) # unbounded/continuous

    def forward(self, x):

        return self.model(x)

        

note: tanh is smoother than ReLU and works better for PDE tasks (that's why no GELU)
final layer doesn't have activation, allows output range to be unconstrained
requires_grad for auto grad on the PDE residuals

S: Spot Price
K: Strike Price
T: Time to Maturity

Deeper Networks will overfit/struggle with PDE loss

In [None]:
# default 3, 3, 2
model = OPNN()

# sample batch S = 100, K = 100, T = 0.5
# output.shape = [1, 1]
sample_input = torch.tensor([[100.0, 100.0, 0.5]], requires_grad=True) # shape [1, 3]
normalized_input = sample_input / torch.tensor([100.0, 100.0, 1.0]) # normalization

output = model(normalized_input)
print(output)

tensor([[0.1164]], grad_fn=<AddmmBackward0>)


In [None]:
output = model(normalized_input)

# https://docs.pytorch.org/docs/stable/generated/torch.ones_like.html

# calc first derivatives
grad_output = torch.ones_like(output)
dC_dinput = grad(output, sample_input, create_graph=True)[0]
print(dC_dinput)

# calc second derivatives
dC_dS = dC_dinput[:, 0]
d2C_dS2 = grad(dC_dS, sample_input, grad_outputs=torch.ones_like(dC_dS), create_graph=True)[0][:, 0]
print(d2C_dS2)

tensor([[-1.1156e-04,  2.4392e-05,  1.5128e-02]], grad_fn=<DivBackward0>)
tensor([1.8571e-07], grad_fn=<SelectBackward0>)


In [None]:
# weight test
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total trainable parameters: {total_params}")

Total trainable parameters: 28


In [None]:
# checking weights

for name, param in model.named_parameters():
    if param.requires_grad:
        print(f"{name}: mean={param.data.mean():.4f}, std={param.data.std():.4f}")

model.0.weight: mean=0.0094, std=0.3078
model.0.bias: mean=-0.0264, std=0.5426
model.2.weight: mean=-0.0187, std=0.3512
model.2.bias: mean=-0.1440, std=0.2526
model.4.weight: mean=0.1386, std=0.3430
model.4.bias: mean=0.3073, std=nan


  print(f"{name}: mean={param.data.mean():.4f}, std={param.data.std():.4f}")


In [None]:
# symmetry/invariance check

# call prices should INCREASE with S, and convex



S_values = torch.tensor([[80.0, 100.0, 0.5],
                         [90.0, 100.0, 0.5],
                         [100.0, 100.0, 0.5],
                         [110.0, 100.0, 0.5]], requires_grad=True)

S_values_norm = S_values / torch.tensor([100.0, 100.0, 1.0])
prices = model(S_values_norm)
print("Monotonicity test:", prices.squeeze())


Monotonicity test: tensor([0.1187, 0.1175, 0.1164, 0.1153], grad_fn=<SqueezeBackward0>)


The output DECREASES as S increases which is wrong

In [None]:
def black_scholes_call_price(S, K, T, r, sigma):
    """Vectorized Black-Scholes formula for call options"""
    d1 = (np.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))
    d2 = d1 - sigma * np.sqrt(T)
    return S * norm.cdf(d1) - K * np.exp(-r * T) * norm.cdf(d2)

def generate_black_scholes_dataset(n_samples=10000, r=0.05, sigma=0.2, seed=42):
    np.random.seed(seed)

    # Sample inputs: S, K, T
    S = np.random.uniform(50, 150, size=n_samples)
    K = np.random.uniform(50, 150, size=n_samples)
    T = np.random.uniform(0.01, 1.0, size=n_samples)

    # Compute prices using BS formula
    C = black_scholes_call_price(S, K, T, r, sigma)

    # Stack inputs and outputs
    X = np.stack([S, K, T], axis=1)
    y = C.reshape(-1, 1)

    # Convert to torch tensors
    X_tensor = torch.tensor(X, dtype=torch.float32)
    y_tensor = torch.tensor(y, dtype=torch.float32)

    return X_tensor, y_tensor


In [None]:
X_train, y_train = generate_black_scholes_dataset(n_samples=10000)

print("Sample input (S, K, T):", X_train[:3])
print("Sample target prices (C):", y_train[:3])

Sample input (S, K, T): tensor([[ 87.4540,  87.3641,   0.7327],
        [145.0714,  83.2912,   0.1927],
        [123.1994,  67.6154,   0.3532]])
Sample target prices (C): tensor([[ 7.6124],
        [62.5787],
        [56.7675]])


Looks like the simulator did well, within market range \

Sample input (S, K, T): \
tensor([[ 87.4540,  87.3641,  0.7327],  # near-the-money, moderate T \
        [145.0714,  83.2912,  0.1927],  # deep in-the-money \
        [123.1994,  67.6154,  0.3532]]) # deep in-the-money, mid-maturity \

Sample target prices (C): \
tensor([[ 7.6124],   # makes sense for near-the-money \
        [62.5787],  # very high because S >> K \ 
        [56.7675]]) # similar case: S >> K \