In [504]:
import time
import numpy as np

In [505]:
BERTH = 10
numIteration = 1000

radio_REVENUE = 1.2 # > 0
radio_ARRIVAL = 0.5 # (0, 1)

REVENUE_T = 0.1
REVENUE_C = radio_REVENUE * REVENUE_T

p_C = radio_ARRIVAL * 0.52
p_T = 0.52 - p_C
p_deC = p_C - 0.02
p_deT = p_T - 0.02

In [506]:
# action = np.ones((BERTH + 1, BERTH + 1))
# r = np.zeros((BERTH + 1, BERTH + 1, 2))
# p = np.zeros((BERTH + 1, BERTH + 1, 2))

In [507]:
def discharge(y):
  z = p_C/(p_C+p_T)*REVENUE_C + p_T/(p_C+p_T)*REVENUE_T - p_C/(p_C+p_T)*REVENUE_C * y / BERTH
  return z

def initial_a():
    a = np.zeros((BERTH + 1, BERTH + 1))
    for x in range(BERTH + 1):
        for y in range(BERTH - x + 1):
            a[x,y] = 1
    a[BERTH,0] = 3
    return a

def initial_r(p_c, p_t):
    r = np.zeros((BERTH + 1, BERTH + 1, 2))
    for x in range(BERTH + 1):
        for y in range(BERTH - x + 1):
            if x + y < BERTH:
                r[x,y,0] = p_c * REVENUE_C + p_t * REVENUE_T
            elif x + y == BERTH and y > 0:
                r[x,y,1] = p_c * REVENUE_C + p_t * REVENUE_T - discharge(y)
    return r
                    
def initial_p(p_c, p_t, p_dec, p_det):
    p = np.zeros((BERTH + 1, BERTH + 1, BERTH + 1, BERTH + 1, 2))
    for x in range(BERTH + 1):
        for y in range(BERTH - x + 1):
            for a in range(2):
                if x + y < BERTH and a == 0:
                    p[x+1,y  ,x,y,a] = p_c
                    p[x  ,y+1,x,y,a] = p_t
                    p[x-1,y  ,x,y,a] = p_dec * (x / BERTH)
                    p[x,y-1  ,x,y,a] = p_det * (y / BERTH)
                    p[x,  y  ,x,y,a] = 1. - p_c - p_t - p_dec * (x / BERTH) - p_det * (y / BERTH)
                elif x + y == BERTH and y == 0 and a == 0:
                    p[x-1,y  ,x,y,a] = p_dec * (x / BERTH)
                    p[x  ,y  ,x,y,a] = 1. - p_dec * (x / BERTH)
                elif x + y == BERTH and y > 0:
                    p[x+1,y-1,x,y,a] = p_c * a
                    p[x-1,  y,x,y,a] = p_dec * (x / BERTH)
                    p[x  ,y-1,x,y,a] = p_det * (y / BERTH)
                    p[x  ,y  ,x,y,a] = 1. - p_c * a - p_dec * (x / BERTH) - p_det * (y / BERTH)
    return p

In [508]:
def one_step_likeahead(V, R, P, x, y, discount_factor=1.0):
    A = np.zeros(2)
    for a in range(2):
        A[a] = R[x, y, a] + discount_factor * np.sum(np.multiply(P[:, :, x, y, a], V))
    return A

In [509]:
def value_iteration(p_c, p_t, a, r, p, max_iteration=-1, theta=0.0001, discount_factor=1.0):
    
    v = np.zeros((BERTH + 1, BERTH + 1))
    count = 0
    
    while True:
        count += 1
        delta = 0
        for x in range(BERTH + 1):
            for y in range(BERTH - x + 1):
                actions = one_step_likeahead(v, r, p, x, y)
                best_action = max(actions)
                delta = max(delta, np.abs(best_action - v[x,y]))
                v[x,y] = best_action
        if delta < theta:
            break
        if max_iteration > 0 and count >= max_iteration:
            break
    
#     policy = np.zeros((BERTH + 1, BERTH + 1))
    for x in range(BERTH + 1):
        for y in range(BERTH - x + 1):
            if x + y == BERTH and y > 0:
                actions = one_step_likeahead(v, r, p, x, y)
                if np.argmax(actions) == 0:
                    a[x][y] = 3
                else:
                    a[x][y] = 2
                    
    return v, a

In [510]:
t1 = time.time()
# a = np.ones((BERTH + 1, BERTH + 1))
a = initial_a()
r = initial_r(p_C, p_T)
p = initial_p(p_C, p_T, p_deC, p_deT)
# p = initial_p()
Value, Action = value_iteration(p_C, p_T, a, r, p, numIteration)
print(Value)
print(Action)
print(time.time() - t1)

[[32.37461726 32.32566127 32.27301269 32.21634945 32.1553199  32.08955172
  32.01867833 31.94240628 31.86067804 31.77405884 31.68465242]
 [32.32550748 32.27279863 32.21603758 32.15484261 32.08878264 32.01737268
  31.94007312 31.85630033 31.76546692 31.66710113  0.        ]
 [32.27269093 32.215896   32.15464932 32.08850727 32.01696145 31.9394268
  31.85522726 31.76357824 31.66356346  0.          0.        ]
 [32.21581649 32.15454995 32.08837961 32.01679241 31.93919559 31.85490039
  31.76310171 31.66285215  0.          0.          0.        ]
 [32.15448815 32.08830531 32.01670126 31.9390813  31.85475376 31.76290915
  31.66259366  0.          0.          0.          0.        ]
 [32.08825523 32.01664273 31.93901186 31.85467002 31.76280638 31.66246525
   0.          0.          0.          0.          0.        ]
 [32.0166008  31.93896389 31.85461448 31.76274126 31.66238782  0.
   0.          0.          0.          0.          0.        ]
 [31.93892788 31.85457395 31.7626952  31.66233493 