In [1]:
import numpy as np
import math
from tqdm.notebook import tnrange
# Define the parameters of the problem
S0 = 105   # initial stock price
K = 105    # strike price
r = 0.005   # risk-free interest rate     
sigma = 0.3   # volatility
N = 10# number of time steps

# Calculation of parameters
dt = 0.003
u = np.exp(sigma * np.sqrt(dt))
d = 1 / u
p = (np.exp(r * dt) - d) / (u - d)
q = 1-p
# Define the state and action spaces
stock_price = np.zeros((N + 1, N + 1))
stock_price[0, 0] = S0

for i in range(1, N + 1):
    stock_price[i, 0] = stock_price[i-1, 0] * u
    for j in range(1, i + 1):
        stock_price[i, j] = stock_price[i-1, j-1] * d

actions = np.linspace(-1,0,1001)  # hold or buy stock
actions_1 = np.linspace(0,K,1001)


In [2]:
def Payoff(s):
    """Compute the payoff for being in state s at the terminal time"""
    return max(K-s, 0) 

In [3]:
states = []
Payoff_terminal = []
for i in range(len(stock_price)):
    for j in range(len(stock_price[0])):
        if stock_price[i][j] !=0:
            states.append([stock_price[i][j],i])
        if i == N:
            Payoff_terminal.append(Payoff(stock_price[i][j]))
states = np.array(states)

In [4]:
states

array([[105.        ,   0.        ],
       [106.73957902,   1.        ],
       [103.28877162,   1.        ],
       [108.50797837,   2.        ],
       [105.        ,   2.        ],
       [101.60543184,   2.        ],
       [110.30567553,   3.        ],
       [106.73957902,   3.        ],
       [103.28877162,   3.        ],
       [ 99.94952614,   3.        ],
       [112.1331559 ,   4.        ],
       [108.50797837,   4.        ],
       [105.        ,   4.        ],
       [101.60543184,   4.        ],
       [ 98.32060742,   4.        ],
       [113.9909129 ,   5.        ],
       [110.30567553,   5.        ],
       [106.73957902,   5.        ],
       [103.28877162,   5.        ],
       [ 99.94952614,   5.        ],
       [ 96.71823587,   5.        ],
       [115.87944814,   6.        ],
       [112.1331559 ,   6.        ],
       [108.50797837,   6.        ],
       [105.        ,   6.        ],
       [101.60543184,   6.        ],
       [ 98.32060742,   6.        ],
 

In [5]:
Optimal_val = np.zeros(len(states))
Optimal_val[-(N+1):] = Payoff_terminal 
Optimal_val

array([ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  3.39456816,  6.67939258,  9.85802118, 12.9338872 ,
       15.91031287])

In [6]:
Optimal = {}
for i in range(len(states)):
    Optimal[str(states[i].tolist())] = Optimal_val[i]

In [7]:
def transition(s,t, a):
    """Compute the next state and reward for taking action a in state s"""
    s_u = s * u
    s_d = s * d
    Payoff_u = Optimal[str([s_u,t+1])]
    Payoff_d = Optimal[str([s_d,t+1])]
    r_u = (Payoff_u-a[0]*s_u-a[1]*np.exp(dt*r))**2
    Payoff_d = Payoff(s_d)
    r_d = (Payoff_d-a[0]*s_d-a[1]*np.exp(dt*r))**2
    #Optimal[str([s,t])] = max(Payoff(s),(p*Payoff_u+q*Payoff_d)*np.exp(-dt*r))
    return [(s_u,t+1,r_u, p), (s_d,t+1,r_d, q)]

In [8]:
V_new = np.zeros(len(states))
pi_new = {}
for i, s in enumerate(np.flip(states)):
    Q = np.zeros((len(actions), len(actions_1)))
    if s[0]<N:
        for j, a in enumerate(actions):
                for k,b in enumerate(actions_1):
                        next_states = transition(s[1],s[0],[a,b])
                        Q[j][k] = sum([p_next * (reward_next) for s_next,t_next,reward_next, p_next in next_states])
        Optimal[str([s[1],s[0]])] = max(Payoff(s[1]),(p*Optimal[str([s[1]*u,s[0]+1])]+q*Optimal[str([s[1]*d,s[0]+1])])\
                                                    *np.exp(-dt*r))
        V_new[i] = np.min(Q)
        optimal_action = np.unravel_index(Q.argmin(), Q.shape)
        pi_new[str([s[1],s[0]])] = [actions[optimal_action[0]],actions_1[optimal_action[1]]]


In [9]:
Optimal

{'[105.0, 0.0]': 2.1166775214671194,
 '[106.73957901580232, 1.0]': 1.2664253250167863,
 '[103.28877162207843, 1.0]': 2.954664318611434,
 '[108.50797836638769, 2.0]': 0.6415731289912847,
 '[105.0, 2.0]': 1.8822550413569445,
 '[101.60543183997974, 2.0]': 4.011611933148603,
 '[110.30567553032388, 3.0]': 0.24762064040946175,
 '[106.73957901580232, 3.0]': 1.029832514882577,
 '[103.28877162207843, 3.0]': 2.7223737175533675,
 '[99.94952614173641, 3.0]': 5.282276000570676,
 '[112.13315589676627, 4.0]': 0.05545307902909928,
 '[108.50797836638769, 4.0]': 0.4370091960380993,
 '[105.0, 4.0]': 1.6140907182030875,
 '[101.60543183997974, 4.0]': 3.814667976128751,
 '[98.32060742275019, 4.0]': 6.728761403999475,
 '[113.99091289651584, 5.0]': 0,
 '[110.30567553032388, 5.0]': 0.11010375411623814,
 '[106.73957901580232, 5.0]': 0.7591876078536138,
 '[103.28877162207843, 5.0]': 2.456646023776968,
 '[99.94952614173641, 5.0]': 5.153112568160301,
 '[96.71823586507116, 5.0]': 8.281764134928835,
 '[115.879448135

In [10]:
pi_new

{'[90.56567332486303, 9.0]': [-1.0, 105.0],
 '[93.59141068449117, 9.0]': [-1.0, 105.0],
 '[96.71823586507116, 9.0]': [-1.0, 105.0],
 '[99.94952614173641, 9.0]': [-1.0, 105.0],
 '[103.28877162207843, 9.0]': [-1.0, 105.0],
 '[106.73957901580232, 9.0]': [0.0, 0.0],
 '[110.30567553032388, 9.0]': [0.0, 0.0],
 '[113.99091289651584, 9.0]': [0.0, 0.0],
 '[117.79927152895165, 9.0]': [0.0, 0.0],
 '[121.73486482514014, 9.0]': [0.0, 0.0],
 '[92.06611279979579, 8.0]': [-1.0, 105.0],
 '[95.14197881864429, 8.0]': [-1.0, 105.0],
 '[98.32060742275019, 8.0]': [-1.0, 105.0],
 '[101.60543183997974, 8.0]': [-1.0, 105.0],
 '[105.0, 8.0]': [-0.496, 52.919999999999995],
 '[108.50797836638769, 8.0]': [0.0, 0.0],
 '[112.13315589676627, 8.0]': [0.0, 0.0],
 '[115.8794481352485, 8.0]': [0.0, 0.0],
 '[119.75090144160464, 8.0]': [0.0, 0.0],
 '[93.59141068449117, 7.0]': [-1.0, 105.0],
 '[96.71823586507116, 7.0]': [-1.0, 105.0],
 '[99.94952614173641, 7.0]': [-1.0, 105.0],
 '[103.28877162207843, 7.0]': [-0.752, 79.8],


In [12]:
# Print the results
print("Optimal hedging portfolio[price , time] (buy stock, hold cash):")
for x, y in pi_new.items():
    print(x,": (%.3f, %.3f)" % ( y[0], y[1]))


Optimal hedging portfolio[price , time] (buy stock, hold cash):
[90.56567332486303, 9.0] : (-1.000, 105.000)
[93.59141068449117, 9.0] : (-1.000, 105.000)
[96.71823586507116, 9.0] : (-1.000, 105.000)
[99.94952614173641, 9.0] : (-1.000, 105.000)
[103.28877162207843, 9.0] : (-1.000, 105.000)
[106.73957901580232, 9.0] : (0.000, 0.000)
[110.30567553032388, 9.0] : (0.000, 0.000)
[113.99091289651584, 9.0] : (0.000, 0.000)
[117.79927152895165, 9.0] : (0.000, 0.000)
[121.73486482514014, 9.0] : (0.000, 0.000)
[92.06611279979579, 8.0] : (-1.000, 105.000)
[95.14197881864429, 8.0] : (-1.000, 105.000)
[98.32060742275019, 8.0] : (-1.000, 105.000)
[101.60543183997974, 8.0] : (-1.000, 105.000)
[105.0, 8.0] : (-0.496, 52.920)
[108.50797836638769, 8.0] : (0.000, 0.000)
[112.13315589676627, 8.0] : (0.000, 0.000)
[115.8794481352485, 8.0] : (0.000, 0.000)
[119.75090144160464, 8.0] : (0.000, 0.000)
[93.59141068449117, 7.0] : (-1.000, 105.000)
[96.71823586507116, 7.0] : (-1.000, 105.000)
[99.94952614173641, 7