In [None]:
import numpy as np

In [2]:
def value_iteration(grid_size=4, gamma=1.0, theta=1e-4):
    num_states = grid_size * grid_size
    V = np.zeros(num_states)  # Initialize value function
    actions = [-grid_size, grid_size, -1, 1]  # Up, Down, Left, Right
    terminal_state = num_states - 1  # Bottom-right corner
    
    while True:
        delta = 0
        V_new = np.copy(V)
        
        for s in range(num_states):
            if s == terminal_state:
                continue  # Skip terminal state
            
            v_old = V[s]
            action_values = []
            
            for a in actions:
                s_next = s + a
                if s_next < 0 or s_next >= num_states:  # Out of bounds vertically
                    s_next = s  # Stay in place
                if (s % grid_size == 0 and a == -1) or ((s + 1) % grid_size == 0 and a == 1):  # Out of bounds horizontally
                    s_next = s  # Stay in place
                
                action_values.append(-1 + gamma * V[s_next])  # Bellman equation update
            
            V_new[s] = np.mean(action_values)  # Average over equal-probability actions
            delta = max(delta, abs(V_new[s] - v_old))
        
        V = V_new  # Update value function
        
        if delta < theta:
            break  # Convergence condition met
    
    return V.reshape((grid_size, grid_size))

In [3]:
# Run value iteration
final_values = value_iteration()
print(final_values)

[[-59.42367735 -57.42387125 -54.2813141  -51.71012579]
 [-57.42387125 -54.56699476 -49.71029394 -45.13926711]
 [-54.2813141  -49.71029394 -40.85391609 -29.99766609]
 [-51.71012579 -45.13926711 -29.99766609   0.        ]]
