In [None]:
#FIN 553 Group Project 1 - Part 2
#Takumi Li (feiyang3), Pengjie Ren (pengjie2), Wenqing Ni(wn5)

import tensorflow as tf
from tensorflow import keras

Spot = 36   # stock price
σ = 0.2     # stock volatility
K = 40      # strike price
r = 0.06    # risk free rate
n = 20000 # Number of simualted paths
m = 50      # number of exercise dates
T = 1       # maturity
order = 6   # Polynmial order
Δt = T / m  # interval between two exercise dates


def create_polynomials():
    model = tf.keras.Sequential([tf.keras.layers.Dense(1)])
    return model

networks = {t: create_polynomials() for t in range(1, m)}


# simulates the stock price evolution
def advance(S, r, σ, Δt, n):
    dB = tf.sqrt(Δt) * tf.random.normal(shape=tf.shape(S))
    out = S + r * S * Δt + σ * S * dB
    return out


optimizer = keras.optimizers.RMSprop(learning_rate=0.01)
# Adam, nadam, SGD, Adadelta were also tested, 
# and different learning rate ranging from 0.001 to 0.5 have been tuned.
# It turns out that RMSprop helped us achieve the better result of 4.454471 (compared to the numpy code result of 4.47350742)


# LSMC algorithm
#@tf.function
def train_step(order, Spot, σ, K, r):
    #...Simulate S ...
    tf.random.set_seed(0)
    S0 = Spot * tf.ones((n,1))
    S = {0: S0}

    for t in range(m):
        S[t + 1] = advance(S[t], r, σ, Δt, n)

    discount = tf.exp(-r * Δt)
    CFL = {t: tf.maximum(0., K - S[t]) for t in range(m + 1)}
    value_tp1 = CFL[m] * discount
    CV = {m: tf.zeros_like(S[0])}

    for t in range(m - 1, 0, -1):

        with tf.GradientTape() as tape:
            features = S[t]
            target = value_tp1
            CV[t] =  networks[t](S[t])# the continuation value predicted by the network
            mes_loss =  tf.reduce_sum(tf.keras.losses.mean_squared_error(CV[t], target)) 
        
        theta = networks[t].variables 
        grads = tape.gradient(mes_loss, theta)
        optimizer.apply_gradients(zip(grads, theta))

        CV[t] = networks[t](S[t])
        

        
        value_t = tf.where(CFL[t] > CV[t], CFL[t], value_tp1)
        value_tp1 = discount * value_t
    
    
    POF = {t: tf.where(CV[t] < CFL[t], CFL[t], 0) for t in range(1, m + 1)}
    

    POF = tf.stack(list(POF.values()),axis=0)
    
    POF = tf.constant(POF, shape=(m,n))
    

    
    idx_payoffs = tf.math.argmax(POF > 0, axis=0)
    FPOF = tf.transpose(tf.one_hot(idx_payoffs, m)) * POF
    m_range = tf.constant(range(0, m), shape=(m,1),dtype=tf.float32)
    
    dFPOF = FPOF * tf.exp(-r * m_range * Δt)

    dFPOF = tf.reduce_sum(dFPOF)
    PRICE = dFPOF / n
    

    return PRICE


for iteration in range(1000000):
    PRICE = train_step(order, Spot, σ, K, r)
    if iteration % 100 == 0:
        print(PRICE)


tf.Tensor(3.887152, shape=(), dtype=float32)
tf.Tensor(3.9392426, shape=(), dtype=float32)
tf.Tensor(4.279408, shape=(), dtype=float32)
tf.Tensor(4.3180165, shape=(), dtype=float32)
tf.Tensor(4.3184466, shape=(), dtype=float32)
tf.Tensor(4.319208, shape=(), dtype=float32)
tf.Tensor(4.3189483, shape=(), dtype=float32)
tf.Tensor(4.31926, shape=(), dtype=float32)
tf.Tensor(4.322677, shape=(), dtype=float32)
tf.Tensor(4.3239107, shape=(), dtype=float32)
tf.Tensor(4.32748, shape=(), dtype=float32)
tf.Tensor(4.327509, shape=(), dtype=float32)
tf.Tensor(4.33301, shape=(), dtype=float32)
tf.Tensor(4.33299, shape=(), dtype=float32)
tf.Tensor(4.3339567, shape=(), dtype=float32)
tf.Tensor(4.33679, shape=(), dtype=float32)
tf.Tensor(4.3383327, shape=(), dtype=float32)
tf.Tensor(4.3412924, shape=(), dtype=float32)
tf.Tensor(4.340547, shape=(), dtype=float32)
tf.Tensor(4.3414416, shape=(), dtype=float32)
tf.Tensor(4.346652, shape=(), dtype=float32)
tf.Tensor(4.348467, shape=(), dtype=float32)
tf.Ten