In [8]:
import numpy as np

gamma = 0.9

# Order: A, B, C, D
# V = R + gamma * P * V  â†’  (I - gamma P)V = R

P = np.array([
    [0.5, 0.25, 0.25, 0.0],  # A
    [0.0, 1.0, 0.0, 0.0],    # B
    [0.25, 0.0, 0.5, 0.25],  # C
    [0.0, 0.0, 0.0, 1.0]     # D
])

R = np.array([0, 5, 0, 0])

I = np.eye(4)

V = np.linalg.solve(I - gamma * P, R)

for s, v in zip(["A", "B", "C", "D"], V):
    print(f"V({s}) = {v:.2f}")


V(A) = 24.57
V(B) = 50.00
V(C) = 10.05
V(D) = 0.00


In [9]:
# Define the states and initialize value function
states = ["A", "B", "C", "D"]
V = {s: 0.0 for s in states}  # Initialize the value function with 0 for all states
gamma = 0.9  # Discount factor

# Reward function for specific states
rewards = {"B": 5}  # State B has a reward of 5, others implicitly have a reward of 0

# Define the transition function (state, action) -> next_state
def next_state(s, a):
    transitions = {
        "A": ["A", "B", "C", "A"],  # From state A, action 0 -> A, action 1 -> B, etc.
        "B": ["B"],  # From state B, action 0 -> B
        "C": ["A", "C", "D", "C"],  # From state C, action 0 -> A, action 1 -> C, etc.
        "D": ["D"]   # From state D, action 0 -> D
    }
    return transitions[s][a]

# Value iteration algorithm (100 iterations)
for _ in range(100):
    V_new = {}  # A dictionary to hold the updated values for each state
    
    # For each state, calculate the new value by checking all possible actions
    for s in states:
        possible_actions = len(next_state(s, 0))  # Get the number of actions available for the state
        V_new[s] = max(
            rewards.get(next_state(s, a), 0)  # Get reward of next state, default to 0
            + gamma * V[next_state(s, a)]     # Future value (discounted)
            for a in range(possible_actions)  # Loop over all possible actions for the state
        )
    
    # Update the value function with the new values
    V = V_new

# Print the final value function after 100 iterations
print("Final Value Function:")
print(V)


Final Value Function:
{'A': 0.0, 'B': 49.99867193005562, 'C': 0.0, 'D': 0.0}
