In [8]:
import yfinance as yf
import numpy as np

ticker = 'AAPL'

start_date = "2015-01-01"
df = yf.download(
    ticker, 
    interval='1d',
    start=start_date,
    # period='10y',
)
df.columns = df.columns.get_level_values(0)
df = df[['Close']] # keep only close prices
df['return'] = df['Close'].pct_change()
# df['state'] = np.where(df['return'] >= 0, 0, 1) # for 2 states

df.to_csv('data.csv')

[*********************100%***********************]  1 of 1 completed


In [108]:
import numpy as np
import pandas as pd

df = pd.read_csv('data.csv', index_col = 'Date')

In [111]:
def getReturnVolatility_1(df, state, period=50): 
    '''For 2 state trinomial lattice'''

    if state not in df['state'].unique():
        raise ValueError('The state does not exist.')
    
    local_df = df.copy()
    local_df['state'] = np.where(df['return'] >= 0, 0, 1)
    local_df['segment'] = pd.qcut(range(len(local_df)), q=period, labels=False)

    volatility_per_segment = local_df[local_df['state'] == state].groupby('segment')['return'].var()
    weight = pd.Series([2**(-(period-i)) for i in range(period)])
    return np.sqrt(volatility_per_segment.dot(weight) * 252)


def getReturnVolatility_2(df, state, period=20):
    '''For 3-state trinomial lattice'''
    if state not in (0, 1, 2):
        raise ValueError('The state does not exist.')
    
    local_df = df.copy()
    local_df['state'] = 0

    local_df['state'] = local_df['return'].rolling(3).agg(lambda x: 1 if np.all(x >= 0) else (2 if np.all(x < 0) else 0))
    local_df['segment'] = pd.qcut(range(len(local_df)), q=period, labels=False)

    volatility_per_segment = local_df[local_df['state'] == state].groupby('segment')['return'].var().reindex(range(period))
    weight = pd.Series([2**(-(period-i)) for i in range(period)])
    assert abs(sum(weight) - 1) <= 1e-6

    return np.sqrt(volatility_per_segment.dot(weight) * 252)
    

In [112]:
vols = [ getReturnVolatility_2(df, i, 20).item() for i in range(3) ]

vols

[0.25043223418097477, 0.12998472827772556, 0.16196837614868023]

### Phase 2: building matrices

In [113]:
import numpy as np
import pandas as pd
from functools import cache

In [116]:
# generating matrix A
def get_markov_transition_prob(delta_t, A, max_iter=50):
    res = np.eye(A.shape[0])
    prev = np.eye(A.shape[0])
    for i in range(max_iter):
        prev = prev @ A * delta_t / (i+1)
        res += prev
    return res

def get_risk_neu_prob(sigma, delta_t, risk_free_int):
    risk_neutral_prob = { i: [-1]*3 for i in range(P.shape[0]) } # [up, mid, down]

    for i in range(P.shape[0]):
        risk_neutral_prob[i][1] = 1 - (vols[i]/sigma) ** 2
        risk_neutral_prob[i][0] = (np.exp(risk_free_int[i]*delta_t)- np.exp(-sigma * np.sqrt(delta_t))- \
                                (1-(vols[i]/sigma)**2)*(1-np.exp(-sigma * np.sqrt(delta_t))))/(np.exp(sigma * np.sqrt(delta_t)) - np.exp(-sigma * np.sqrt(delta_t)))
        risk_neutral_prob[i][2] = (np.exp(sigma * np.sqrt(delta_t)) - np.exp(risk_free_int[i]*delta_t)- \
                                (1-(vols[i]/sigma)**2)*(np.exp(sigma * np.sqrt(delta_t))-1))/(np.exp(sigma * np.sqrt(delta_t)) - np.exp(-sigma * np.sqrt(delta_t)))
        
    return risk_neutral_prob

def trinomial_call_value(time, node, state, K, american=False):
    """
    V(t,n,j) be the value of the derivative at the nth node at time step t under the jth regime state.
    time starts from 0
    node index starts from 1 (from bottom)
    """
    if state >= P.shape[0]:
        raise Exception('Invalid state')

    assert node <= 2 * time + 1 
    delta_t = T / N
    
    @cache
    def V(t, n, i):
        if t == N:
            return max(0, S0*np.exp((n - 1 - N)*sigma*np.sqrt(delta_t))-K)
        res = 0
        for j in range(P.shape[0]): # regimes
            res += P[i][j] * (
                risk_neutral_prob[i][0] * V(t+1, n+2, j) +
                risk_neutral_prob[i][1] * V(t+1, n+1, j) +
                risk_neutral_prob[i][2] * V(t+1, n, j)
            )
        # number of node = 2n+1
        res *= np.exp(-risk_free_int[i]*delta_t)
        if american:
            intrinsic = max(0, S0*np.exp((n - 1 - t) * sigma * np.sqrt(delta_t)) - K)
            res = max(res, intrinsic)
        
        return res
    
    return V(time, node, state)
    

def trinomial_put_value(time, node, state, K, american=False):
    if state >= P.shape[0]:
        raise Exception('Invalid state')
    
    assert node <= 2 * time + 1 
    delta_t = T / N
    
    @cache
    def V(t, n, i):
        if t == N:
            return max(0, K - S0*np.exp((n - 1 - N)*sigma*np.sqrt(delta_t)))
        
        res = 0
        for j in range(P.shape[0]):  # Iterate over regimes
            res += P[i][j] * (
                risk_neutral_prob[i][0] * V(t+1, n+2, j) +
                risk_neutral_prob[i][1] * V(t+1, n+1, j) +
                risk_neutral_prob[i][2] * V(t+1, n, j)
            )
        
        discounted_value = np.exp(-risk_free_int[i] * delta_t) * res
        
        if american:
            intrinsic_value = max(0, K - S0*np.exp((n - 1 - t) * sigma * np.sqrt(delta_t)))
            return max(discounted_value, intrinsic_value)
        
        return discounted_value
    
    return V(time, node, state)

T = 1 # years
N = 1000
S0 = df['Close'].loc['2025-01-16'] # take a close price
sigma = max(vols) + (np.sqrt(1.5)-1) * np.mean(vols) # suggested by the paper
risk_free_int = [.04] * 3 # adjust the state numbers
# A = np.array([
#     [-0.5,  0.5],
#     [ 0.5, -0.5],
# ])
A = np.array([
    [-1,  0.5,  0.5],
    [ 0.5, -1,  0.5],
    [ 0.5,  0.5, -1]
])
P = get_markov_transition_prob(T/N, A)
risk_neutral_prob = get_risk_neu_prob(sigma, T/N, risk_free_int)

In [None]:
data = []

for state in range(3):
    for is_american in [True, False]:
        option_type = "American" if is_american else "European"
        for strike in range(240, 281, 10):
            call_price = trinomial_call_value(0, 1, state, strike, is_american)
            put_price = trinomial_put_value(0, 1, state, strike, is_american)

            data.append({
                "State": state,
                "Type": option_type,
                "Strike": strike,
                "Call Price": call_price,
                "Put Price": put_price
            })

df = pd.DataFrame(data)
df.to_csv("trinomial_result_3_regimes.csv", index=False)


## American Monte Carlo

In [77]:
import numpy as np

class AmericanOptionsLSMC:
    """ Class for American options pricing using Longstaff-Schwartz (2001):
    "Valuing American Options by Simulation: A Simple Least-Squares Approach."
    Review of Financial Studies, Vol. 14, 113-147.
    S0 : float : initial stock/index level
    strike : float : strike price
    T : float : time to maturity (in year fractions)
    M : int : grid or granularity for time (in number of total points)
    r : float : constant risk-free short rate
    div :    float : dividend yield
    sigma :  float : volatility factor in diffusion term 
    
    Unitest(doctest): 
    >>> AmericanPUT = AmericanOptionsLSMC('put', 36., 40., 1., 50, 0.06, 0.06, 0.2, 10000)
    >>> AmericanPUT.price
    4.4731177017712209
    """

    def __init__(self, option_type, S0, strike, T, M, r, div, sigma, simulations):
        try:
            self.option_type = option_type
            assert isinstance(option_type, str)
            self.S0 = float(S0)
            self.strike = float(strike)
            assert T > 0
            self.T = float(T)
            assert M > 0
            self.M = int(M)
            assert r >= 0
            self.r = float(r)
            assert div >= 0
            self.div = float(div)
            assert sigma > 0
            self.sigma = float(sigma)
            assert simulations > 0
            self.simulations = int(simulations)
        except ValueError:
            print('Error passing Options parameters')


        if option_type != 'call' and option_type != 'put':
            raise ValueError("Error: option type not valid. Enter 'call' or 'put'")
        if S0 < 0 or strike < 0 or T <= 0 or r < 0 or div < 0 or sigma < 0:
            raise ValueError('Error: Negative inputs not allowed')

        self.time_unit = self.T / float(self.M)
        self.discount = np.exp(-self.r * self.time_unit)

    @property
    def MCprice_matrix(self, seed = 123):
        """ Returns MC price matrix rows: time columns: price-path simulation """
        np.random.seed(seed)
        MCprice_matrix = np.zeros((self.M + 1, self.simulations), dtype=np.float64)
        MCprice_matrix[0,:] = self.S0
        for t in range(1, self.M + 1):
            brownian = np.random.standard_normal( self.simulations // 2)
            brownian = np.concatenate((brownian, -brownian))
            MCprice_matrix[t, :] = (MCprice_matrix[t - 1, :]
                                  * np.exp((self.r - self.sigma ** 2 / 2.) * self.time_unit
                                  + self.sigma * brownian * np.sqrt(self.time_unit)))
        return MCprice_matrix

    @property
    def MCpayoff(self):
        """Returns the inner-value of American Option"""
        if self.option_type == 'call':
            payoff = np.maximum(self.MCprice_matrix - self.strike,
                           np.zeros((self.M + 1, self.simulations),dtype=np.float64))
        else:
            payoff = np.maximum(self.strike - self.MCprice_matrix,
                            np.zeros((self.M + 1, self.simulations),
                            dtype=np.float64))
        return payoff

    @property
    def value_vector(self):
        value_matrix = np.zeros_like(self.MCpayoff)
        value_matrix[-1, :] = self.MCpayoff[-1, :]
        for t in range(self.M - 1, 0 , -1):
            regression = np.polyfit(self.MCprice_matrix[t, :], value_matrix[t + 1, :] * self.discount, 5)
            continuation_value = np.polyval(regression, self.MCprice_matrix[t, :])
            value_matrix[t, :] = np.where(self.MCpayoff[t, :] > continuation_value,
                                          self.MCpayoff[t, :],
                                          value_matrix[t + 1, :] * self.discount)

        return value_matrix[1,:] * self.discount


    @property
    def price(self): return np.sum(self.value_vector) / float(self.simulations)

In [98]:
data = []
for state in range(3):
    for strike in range(240, 281, 10):
        call_price = AmericanOptionsLSMC('call', S0, strike, 1, 500, risk_free_int[state], 0, vols[state], 10000).price
        put_price = AmericanOptionsLSMC('put', S0, strike, 1, 500, risk_free_int[state], 0, vols[state], 10000).price

        # Append a dictionary with results
        data.append({
            "State": state,
            "Type": 'American',
            "Strike": strike,
            "Call Price": call_price,
            "Put Price": put_price
        })


df = pd.DataFrame(data)
df.to_csv("MC_3states.csv", index=False, mode='a', header=False)

  regression = np.polyfit(self.MCprice_matrix[t, :], value_matrix[t + 1, :] * self.discount, 5)
  regression = np.polyfit(self.MCprice_matrix[t, :], value_matrix[t + 1, :] * self.discount, 5)
  regression = np.polyfit(self.MCprice_matrix[t, :], value_matrix[t + 1, :] * self.discount, 5)
  regression = np.polyfit(self.MCprice_matrix[t, :], value_matrix[t + 1, :] * self.discount, 5)
  regression = np.polyfit(self.MCprice_matrix[t, :], value_matrix[t + 1, :] * self.discount, 5)
  regression = np.polyfit(self.MCprice_matrix[t, :], value_matrix[t + 1, :] * self.discount, 5)
  regression = np.polyfit(self.MCprice_matrix[t, :], value_matrix[t + 1, :] * self.discount, 5)
  regression = np.polyfit(self.MCprice_matrix[t, :], value_matrix[t + 1, :] * self.discount, 5)
  regression = np.polyfit(self.MCprice_matrix[t, :], value_matrix[t + 1, :] * self.discount, 5)
  regression = np.polyfit(self.MCprice_matrix[t, :], value_matrix[t + 1, :] * self.discount, 5)


For 2-states model, each simulation contains 500 timesteps, for a price we ran 10k simulations.

On average, each option price takes 3m 34s. All 20 invocations took 71m20s.

For 3-states model, total time: 113m 3s, average 3m 46s.

Least square fitting is not ideal, prices of LSMC under this degree (5) is not suitable.

In [89]:
# Standard Monte Carlo for European Options
# https://www.kaggle.com/code/ypark4857/monte-carlo-simulation-of-european-option-pricing

import numpy as np

rng = np.random.default_rng(123)

def mc_call(S, K, T, r, sigma, N):
    
    # Initial Asset Pricie
    S_init = S
    
    # X follows a standard normal distribution
    X = rng.normal(0, 1, N)
    
    # The Distribution of asset prices at the Expiration of the Option
    ST = S_init * np.exp((r-0.5*sigma**2)*T + sigma*np.sqrt(T)*X)
    
    # The Discounted payoff of European call option at expiration
    fST = np.exp(-r*T) * np.maximum(ST-K, 0)
    
    # The option value by taking the expected discounted payoff
    price = np.mean(fST)
    variance = np.var(fST)

    return price, variance

def mc_put(S, K, T, r, sigma, N):
    S_init = S

    # X follows a standard normal distribution
    X = rng.normal(0, 1, N)
    
    # The Distribution of asset prices at the Expiration of the Option
    ST = S_init * np.exp((r-0.5*sigma**2)*T + sigma*np.sqrt(T)*X)
    
    # The Discounted payoff of European call option at expiration
    fST = np.exp(-r*T) * np.maximum(K-ST, 0)

    # The option value by taking the expected discounted payoff
    price = np.mean(fST)
    variance = np.var(fST)
    
    return price, variance

In [102]:
data = []

for state in range(3):
    for strike in range(240, 281, 10):
        call_price, _ = mc_call(S0, strike, 1, risk_free_int[state], vols[state], 10000)
        put_price, _ = mc_put(S0, strike, 1, risk_free_int[state], vols[state], 10000)

        # Append a dictionary with results
        data.append({
            "State": state,
            "Type": 'European',
            "Strike": strike,
            "Call Price": call_price.item(),
            "Put Price": put_price.item()
        })

df = pd.DataFrame(data)
# df.to_csv('MC_3states.csv', mode='a', index=False, header=False)
df.to_csv('tmp.csv', index=False)

In [99]:
price_lsmc = pd.read_csv('MC_3states.csv')
price_tri = pd.read_csv('trinomial_result_3_regimes.csv')

price_merge = pd.merge(price_lsmc, price_tri, on=['State', 'Strike', 'Type'], suffixes=('_MC', '_Trinomial'), how='right')
price_merge.round(5).to_csv('combined_result_3.csv', index=False)

In [101]:
pd.read_csv('combined_result_2.csv')

Unnamed: 0,State,Type,Strike,Call Price_MC,Put Price_MC,Call Price_Trinomial,Put Price_Trinomial
0,0,American,240,11.28845,16.38719,13.06711,17.28587
1,0,American,250,7.65826,23.43201,9.33162,24.18292
2,0,American,260,5.05551,32.24591,6.49796,32.43139
3,0,American,270,3.12289,42.00703,4.42056,41.99069
4,0,American,280,1.90025,51.98466,2.94513,51.99069
5,1,American,240,15.71309,20.70587,16.03997,20.25279
6,1,American,250,12.07628,27.30075,12.23388,26.81593
7,1,American,260,9.07719,34.53872,9.18583,34.3604
8,1,American,270,6.6601,43.00588,6.79725,42.81014
9,1,American,280,4.73648,52.28645,4.963,52.0756


In [100]:
pd.read_csv('combined_result_3.csv')

Unnamed: 0,State,Type,Strike,Call Price_MC,Put Price_MC,Call Price_Trinomial,Put Price_Trinomial
0,0,American,240,20.31684,25.04102,18.8194,23.0706
1,0,American,250,16.71321,31.58345,14.98932,29.49229
2,0,American,260,13.4854,38.55017,11.83695,36.72925
3,0,American,270,10.84263,46.049,9.27327,44.69274
4,0,American,280,8.45133,54.3806,7.2162,53.28556
5,0,European,240,21.75598,23.90369,18.8194,21.39956
6,0,European,250,17.14765,30.58595,14.98932,27.17738
7,0,European,260,14.39821,36.25659,11.83695,33.6329
8,0,European,270,11.98742,43.03303,9.27327,40.67711
9,0,European,280,9.09741,49.68284,7.2162,48.22793


### For MC, American options (priced using LSMC) are cheaper than European options due to poor fitting. (may not be good source)