In [5]:
# Bellmann equation implementation for Gridworld problem

import numpy as np

# Define the grid-world environment
grid_world = np.array([
    [0, 0, 0, 0],
    [0, -1, 0, 0],
    [0, 0, 0, 0],
    [0, 0, 0, 0]
])

# Define the rewards for each cell
rewards = np.array([
    [0, 0, 0, 0],
    [0, 0, 0, 10],
    [0, 0, 0, 0],
    [0, 0, 0, 0]
])

# Define the discount factor
discount_factor = 0.9

# Define the number of iterations for the Bellman equation
num_iterations = 100

# Initialize the value function array
value_function = np.zeros_like(grid_world, dtype=np.float32)

# Perform value iteration
for _ in range(num_iterations):
    updated_value_function = np.copy(value_function)
    for i in range(grid_world.shape[0]):
        for j in range(grid_world.shape[1]):
            if grid_world[i, j] == -1:  # Skip walls or obstacles
                continue
            up_value = value_function[max(i - 1, 0), j]
            down_value = value_function[min(i + 1, grid_world.shape[0] - 1), j]
            left_value = value_function[i, max(j - 1, 0)]
            right_value = value_function[i, min(j + 1, grid_world.shape[1] - 1)]
            max_value = max(up_value, down_value, left_value, right_value)
            updated_value_function[i, j] = rewards[i, j] + discount_factor * max_value
    value_function = updated_value_function

# Find the optimal policy
optimal_policy = np.zeros_like(grid_world, dtype=np.str)
for i in range(grid_world.shape[0]):
    for j in range(grid_world.shape[1]):
        if grid_world[i, j] == -1:
            optimal_policy[i, j] = "W"  # Mark walls or obstacles
        else:
            up_value = value_function[max(i - 1, 0), j]
            down_value = value_function[min(i + 1, grid_world.shape[0] - 1), j]
            left_value = value_function[i, max(j - 1, 0)]
            right_value = value_function[i, min(j + 1, grid_world.shape[1] - 1)]
            max_value = max(up_value, down_value, left_value, right_value)
            if max_value == up_value:
                optimal_policy[i, j] = "U"
            elif max_value == down_value:
                optimal_policy[i, j] = "D"
            elif max_value == left_value:
                optimal_policy[i, j] = "L"
            elif max_value == right_value:
                optimal_policy[i, j] = "R"

# Print the results
print("Optimal Value Function:")
print(value_function)
print("\nOptimal Policy:")
print(optimal_policy)


Optimal Value Function:
[[65.607346 72.89734  80.997345 89.997345]
 [59.046345  0.       89.997345 99.997345]
 [65.607346 72.89734  80.997345 89.997345]
 [59.046345 65.607346 72.89734  80.997345]]

Optimal Policy:
[['R' 'R' 'D' 'D']
 ['U' 'W' 'R' 'R']
 ['R' 'R' 'U' 'U']
 ['U' 'U' 'U' 'U']]


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  optimal_policy = np.zeros_like(grid_world, dtype=np.str)


In [13]:
# TODO: Write a Python code based on Bellman equation (include value function) as follows: 
#an agent can performs two types of actions endurnace (weight lifting) and performance (running, swimming). 
#The agent get a user data with energy and interested (endurance and performance), 
#then it distributes the actions based on the given energy. 
#For example a user with 10 units of energy, and performance interest will be 7 units performance and 3 units endurance. 
# the output should be a list of actions for the given number of energy. For example, energy=3, interested=performance. The output will be 1: weight lifting, 2: swimming, 3: running.

import random

def bellman_equation(energy, interested):
  """
  Performs Bellman equation to distribute actions based on given energy and interested.

  Args:
    energy: The amount of energy the user has.
    interested: The user's interest in endurance and performance.

  Returns:
    A list of actions for the given number of energy.
  """

  actions = []
  if interested == "endurance":
    actions = ["weight lifting"]
  elif interested == "performance":
    actions = ["running", "swimming"]
  else:
    raise ValueError("Invalid interested value")

  # Randomly distribute the actions based on the user's energy.

  for i in range(energy):
    actions.append(random.choice(actions))

  return actions

if __name__ == "__main__":
  energy = 10
  interested = "performance"
  actions = bellman_equation(energy, interested)
  print(actions)



['running', 'swimming', 'swimming', 'running', 'running', 'running', 'running', 'running', 'running', 'running', 'running', 'running']
