In [1]:
# PyTorch basic Libraries 
import torch                                        # root package
# check version 
print(torch.__version__)

from torch.utils.data import Dataset, DataLoader    # dataset representation and loading
import torch.autograd as autograd         # computation graph
from torch import Tensor                  # tensor node in the computation graph
import torch.nn as nn                     # neural networks
import torch.nn.functional as F           # layers, activations and more
import torch.optim as optim               # optimizers e.g. gradient descent, ADAM, etc.
# from torch.jit import script, trace       # hybrid frontend decorator and tracing jit

#  Classic tools 
import numpy as np 
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder,OneHotEncoder, StandardScaler, MinMaxScaler

# Visualization 
import matplotlib.pyplot as plt 
import seaborn as sns

# Setup device dignostic code
device = "cuda" if torch.cuda.is_available() else "cpu"

# Set the device globally
# torch.set_default_device(device)

# For classification problems
# from  torchmetrics import Accuracy 
# torchmetric_accuracy = Accuracy().to(device)  

# Computer Vision 
# import torchvision 
# from torchvision import datasets 
# from torchvision import transforms 
# from torchvision.transforms import ToTensor 

# print(torchvision.__version__)

2.0.1+rocm5.4.2


In [18]:
class BlackProcess:
    def __init__(self, S0, r, sigma, n):
        self.S0 = S0
        self.r = r
        self.sigma = sigma
        self.n = n

    def generate(self):
        S0, r, sigma, n = self.S0, self.r, self.sigma, self.n
        dt = 1 / 365
        dW = np.random.normal(0, dt ** 0.5, n)
        chg = np.ones(n + 1)
        chg[1:] += r * dt + sigma * dW
        accum_chg = chg.cumprod()
        return S0 * accum_chg

In [19]:
class VanillaEnv():
    n_observation = 5

    def __init__(self, process: BlackProcess, tenor, strike):
        self.process = process
        self.tenor = tenor
        self.strike = strike
        self.t = 0
        self.path = None
        self.observations = None
        self.reset()

    def df(self):
        return exp(-self.process.r / 365)

    def mu(self):
        return exp(self.process.r / 365) - 1

    def reset(self):
        self.path = self.process.generate()
        self.t = 0
        self.observations = np.stack([self.observation(t) for t in range(self.tenor + 1)], 0)
        return self.observations[0]

    def St(self, t=None) -> np.float32:
        t = self.t if t is None else t
        return self.path[t]

    def observation(self, t=None):
        S_K = self.St(t) / self.strike
        moneyness = max(0, S_K)

        t = self.t if t is None else t
        tenor = (self.tenor - t) / 365

        obs = np.array([moneyness, moneyness ** 2, tenor, tenor ** 2, moneyness * tenor])
        assert len(obs) == self.n_observation
        return obs

    def step(self, action):
        """
        :param action: hedge ratio, i.e. delta
        :return: S_t0, S_t1, reward, terminated, can_early_exercise, payoff, dS
        """
        S_t0 = self.observations[self.t]
        self.t = self.t + 1
        dS = self.St() - self.St(self.t - 1)
        reward = dS * action
        S_t1 = self.observations[self.t]
        terminated = True if self.t >= self.tenor else False
        can_early_exercise = False
        payoff = self.payoff()
        return S_t0, S_t1, reward, terminated, can_early_exercise, payoff, dS

    def payoff(self, t=None) -> np.float32:
        """
        :return: option payoff if exercise now, regardless it can be exercised, equivalent to moneyless
        """
        return max(0, self.St(t) - self.strike)

In [17]:
S0, r, vol, days, strike = 1, 0.01, 0.3, 30, 1.1
n_samples = 2 ** 12
#n_hidden = [64, 64]
n_hidden = [8, 8]
process = BlackProcess(S0, r, vol, days)
N_OBSERVATION = VanillaEnv.n_observation
env = VanillaEnv(process, days, strike)
#print("pretrain actor")
#actor = get_pretrain_actor(env, n_hidden, n_samples)
#env = VanillaEnv(process, days, S0)
#print("pretrain critic")
#critic, _ = get_pretrain_critic(env, actor, n_hidden, N_OBSERVATION, n_samples, epoc=5)




In [20]:
def get_actor(n_hidden, N_OBS): 
    layers = nn.ModuleList() 
    param = [N_OBS] + n_hidden 
    for i in range(len(param)): 
            if i != (len(param)-1):
                layers.append(nn.Linear(in_features=param[i], out_features=param[(i+1)]))
                layers.append(nn.ReLU())
            else:
                layers.append(nn.Linear(in_features=param[i], out_features=1))
                layers.append(nn.Tanh())
    return nn.Sequential(*layers)
    

def get_critic(n_hidden, N_OBS): 
    layers = nn.ModuleList() 
    param = [N_OBS] + n_hidden 
    for i in range(len(param)): 
            if i != (len(param)-1):
                layers.append(nn.Linear(in_features=param[i], out_features=param[(i+1)]))
                layers.append(nn.ReLU())
            else:
                layers.append(nn.Linear(in_features=param[i], out_features=1))
                layers.append(nn.Sigmoid())
    return nn.Sequential(*layers)


In [21]:
class preTrainer(nn.Module): 
    def __init__(self, N_OBS): 
        super().__init__()

        self.layer1_1 = nn.Linear(N_OBS, 8)
        self.layer1_2 = nn.Linear(8, 8)
        self.layer1_3 = nn.Linear(8,1)


    def forward(self, x, y): 
        """ 
        x, y are tensors of type (None, days, N_OBS)
        """
        #print(x.shape, y.shape)
        x = self.layer1_1(x)
        #print(x.shape)
        x = self.layer1_2(x)
        #print(x.shape)
        x = self.layer1_3(x)
        #print(x.shape)
        return (x*y).sum(dim=(1,2))


In [22]:
model_123 = preTrainer(18)

In [23]:
x_tensor = torch.rand(5, 30, 18)
y_tensor = torch.rand(5, 30, 1)
model_123(x_tensor, y_tensor)
# preTrainer_sum(model_123, x_tensor.to(device), y_tensor.to(device))

tensor([-2.3800, -2.1495, -2.4478, -2.2833, -2.2343], grad_fn=<SumBackward1>)

In [24]:
from torchinfo import summary

In [51]:
model_test = preTrainer(18)


OrderedDict([('layer1_1.weight',
              tensor([[ 0.1826, -0.0834, -0.1204, -0.0122,  0.0703,  0.0254, -0.0729,  0.1486,
                       -0.1828,  0.1722, -0.2175,  0.2306,  0.2127,  0.0968, -0.2138,  0.1046,
                        0.0435, -0.2129],
                      [ 0.0236,  0.2288,  0.1919,  0.1323,  0.1585,  0.0833, -0.1170,  0.1745,
                        0.0831,  0.1277, -0.1120,  0.1026, -0.0782, -0.1238,  0.0741,  0.0292,
                       -0.0679, -0.0013],
                      [ 0.0863, -0.0057,  0.0399,  0.0308, -0.0328,  0.0494, -0.0746, -0.1883,
                        0.1139, -0.1220,  0.1959, -0.0488, -0.2176,  0.2320,  0.1880,  0.0361,
                        0.0901,  0.2088],
                      [-0.0616, -0.2311, -0.0436,  0.0084, -0.0441,  0.2217, -0.1051,  0.0415,
                        0.1876, -0.0383,  0.2030,  0.0388, -0.1228, -0.2335,  0.0141,  0.0139,
                        0.0938, -0.2250],
                      [ 0.0193, -0.2352

In [55]:

model_test.state_dict()

OrderedDict([('layer1_1.weight',
              tensor([[ 0.1179,  0.1065,  0.0575, -0.0488, -0.2139,  0.0306, -0.1286,  0.1476,
                       -0.0980,  0.2211, -0.0639,  0.1237, -0.1241, -0.1684,  0.2124,  0.1384,
                       -0.0978, -0.0672],
                      [-0.0476, -0.1924,  0.1895, -0.1690, -0.1256,  0.1083,  0.0916, -0.0957,
                       -0.0739, -0.1065, -0.0224, -0.0470, -0.1145,  0.1737,  0.0539,  0.2310,
                       -0.0784, -0.1509],
                      [ 0.0867,  0.1981,  0.0640, -0.2303, -0.1681, -0.2093, -0.1398,  0.1163,
                        0.1854, -0.2329,  0.2064,  0.2177, -0.1836,  0.1728, -0.0234,  0.0047,
                        0.2090,  0.0661],
                      [ 0.1968,  0.1889,  0.1439,  0.1949,  0.0516,  0.0695,  0.0140,  0.0842,
                       -0.1309,  0.1380, -0.0463,  0.1326,  0.0887,  0.0713, -0.1435,  0.0624,
                       -0.0010,  0.0572],
                      [-0.0828,  0.0914

In [53]:
model_123.state_dict()

OrderedDict([('layer1_1.weight',
              tensor([[ 0.1179,  0.1065,  0.0575, -0.0488, -0.2139,  0.0306, -0.1286,  0.1476,
                       -0.0980,  0.2211, -0.0639,  0.1237, -0.1241, -0.1684,  0.2124,  0.1384,
                       -0.0978, -0.0672],
                      [-0.0476, -0.1924,  0.1895, -0.1690, -0.1256,  0.1083,  0.0916, -0.0957,
                       -0.0739, -0.1065, -0.0224, -0.0470, -0.1145,  0.1737,  0.0539,  0.2310,
                       -0.0784, -0.1509],
                      [ 0.0867,  0.1981,  0.0640, -0.2303, -0.1681, -0.2093, -0.1398,  0.1163,
                        0.1854, -0.2329,  0.2064,  0.2177, -0.1836,  0.1728, -0.0234,  0.0047,
                        0.2090,  0.0661],
                      [ 0.1968,  0.1889,  0.1439,  0.1949,  0.0516,  0.0695,  0.0140,  0.0842,
                       -0.1309,  0.1380, -0.0463,  0.1326,  0.0887,  0.0713, -0.1435,  0.0624,
                       -0.0010,  0.0572],
                      [-0.0828,  0.0914

In [54]:
model_test.load_state_dict(model_123.state_dict())
model_test.state_dict()

OrderedDict([('layer1_1.weight',
              tensor([[ 0.1179,  0.1065,  0.0575, -0.0488, -0.2139,  0.0306, -0.1286,  0.1476,
                       -0.0980,  0.2211, -0.0639,  0.1237, -0.1241, -0.1684,  0.2124,  0.1384,
                       -0.0978, -0.0672],
                      [-0.0476, -0.1924,  0.1895, -0.1690, -0.1256,  0.1083,  0.0916, -0.0957,
                       -0.0739, -0.1065, -0.0224, -0.0470, -0.1145,  0.1737,  0.0539,  0.2310,
                       -0.0784, -0.1509],
                      [ 0.0867,  0.1981,  0.0640, -0.2303, -0.1681, -0.2093, -0.1398,  0.1163,
                        0.1854, -0.2329,  0.2064,  0.2177, -0.1836,  0.1728, -0.0234,  0.0047,
                        0.2090,  0.0661],
                      [ 0.1968,  0.1889,  0.1439,  0.1949,  0.0516,  0.0695,  0.0140,  0.0842,
                       -0.1309,  0.1380, -0.0463,  0.1326,  0.0887,  0.0713, -0.1435,  0.0624,
                       -0.0010,  0.0572],
                      [-0.0828,  0.0914

In [25]:
summary(model_123, ( (5, 30, 18), (5, 30, 1)))

Layer (type:depth-idx)                   Output Shape              Param #
preTrainer                               [5]                       --
├─Linear: 1-1                            [5, 30, 8]                152
├─Linear: 1-2                            [5, 30, 8]                72
├─Linear: 1-3                            [5, 30, 1]                9
Total params: 233
Trainable params: 233
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.01
Forward/backward pass size (MB): 0.02
Params size (MB): 0.00
Estimated Total Size (MB): 0.03

In [26]:
summary(get_critic([8, 8], 10))

Layer (type:depth-idx)                   Param #
Sequential                               --
├─Linear: 1-1                            88
├─ReLU: 1-2                              --
├─Linear: 1-3                            72
├─ReLU: 1-4                              --
├─Linear: 1-5                            9
├─Sigmoid: 1-6                           --
Total params: 169
Trainable params: 169
Non-trainable params: 0

In [27]:
N_list = [18] + [8,8]
days_list = [30] *3 
param = list(zip(days_list, N_list))
param

[(30, 18), (30, 8), (30, 8)]

In [35]:
def get_pretrain_model(n_hidden, days, N_OBS):
    return get_actor(n_hidden, N_OBS), preTrainer(N_OBS)

In [36]:
get_pretrain_model(n_hidden, days=30, N_OBS=18)

(Sequential(
   (0): Linear(in_features=18, out_features=8, bias=True)
   (1): ReLU()
   (2): Linear(in_features=8, out_features=8, bias=True)
   (3): ReLU()
   (4): Linear(in_features=8, out_features=1, bias=True)
   (5): Tanh()
 ),
 preTrainer(
   (layer1_1): Linear(in_features=18, out_features=8, bias=True)
   (layer1_2): Linear(in_features=8, out_features=8, bias=True)
   (layer1_3): Linear(in_features=8, out_features=1, bias=True)
 ))

In [29]:
class Buffer():
    """
    S_t0, S_t1, reward, terminated, can_early_exercise, payoff, dS
    """

    def __init__(self, size, N_OBSERVATION):
        self.size = size

        def blank_array(dim):
            assert dim <= 2
            dim = N_OBSERVATION if dim == 2 else 1
            return np.zeros((size, dim), dtype=np.float32)

        self.storage = [blank_array(2), blank_array(2),
                        blank_array(1), blank_array(1), blank_array(1), blank_array(1), blank_array(1)]
        # order: S_t0, S_t1, reward, terminated, can_early_exercise, payoff, dS
        self.count = 0

    def store(self, values):
        index = self.count % self.size
        for storage, value in zip(self.storage, values):
            storage[index, :] = value
        self.count = self.count + 1

    def sample(self, batch_size):
        indexes = np.random.choice(self.size, batch_size, False)
        return [v[indexes] for v in self.storage]

In [30]:
class EpisodeBuffer():
    def __init__(self, capacity, ep_length, N_OBSERVATION):
        self.capacity = capacity
        self.ep_count = 0

        def blank_array(dim):
            assert dim <= 2
            dim = N_OBSERVATION if dim == 2 else 1
            return np.zeros((capacity, ep_length, dim), dtype=np.float32)

        self.storage = [blank_array(2), blank_array(2),
                        blank_array(1), blank_array(1), blank_array(1), blank_array(1), blank_array(1)]
        # order: S_t0, S_t1, reward, terminated, can_early_exercise, payoff, dS

    def store(self, values, t):
        ep_index = self.ep_count % self.capacity
        for storage, value in zip(self.storage, values):
            storage[ep_index, t, :] = value
        done = values[3]
        if done:
            self.ep_count = self.ep_count + 1

    def sample(self, batch_size):
        indexes = np.random.choice(self.capacity, batch_size, False)
        return [v[indexes] for v in self.storage]

In [31]:
def gather_episode_wise(env, buffer: EpisodeBuffer, episodes, action=0.5):
    for i in range(episodes):
        env.reset()
        while True:
            data = env.step(action)  # actual delta still doesn't matter, avoid calling actor to save time
            done = data[3]
            buffer.store(data, env.t - 1)
            if done:
                break

In [32]:
def get_pretrain_actor(env, n_hidden, n_samples, epoch=12):
    N_OBSERVATION = env.n_observation
    days = env.tenor
    buffer = EpisodeBuffer(n_samples, days, VanillaEnv.n_observation)
    gather_episode_wise(env, buffer, n_samples)
    observations, dS, payoff = buffer.storage[0], buffer.storage[-1], buffer.storage[-2][:, -1, 0]
    pretrain_actor, pretrainer = get_pretrain_model(n_hidden, days, N_OBSERVATION)  #
    pretrainer.compile(loss=tf.keras.losses.mse, optimizer="Adam")
    pretrainer.fit((observations, dS), payoff, batch_size = 64, epoch)
    actor = get_actor(n_hidden, N_OBSERVATION)
    actor.set_weights(pretrain_actor.get_weights())
    return actor

In [42]:
buffer  = EpisodeBuffer(n_samples, days, VanillaEnv.n_observation)

In [44]:

observations, dS, payoff = buffer.storage[0], buffer.storage[-1], buffer.storage[-2][:, -1, 0]

In [47]:
observations.shape, dS.shape, payoff.shape

((4096, 30, 5), (4096, 30, 1), (4096,))