In [2]:
import numpy as np
import random

In [3]:
class GridWorld:
    def __init__(self, size=5, max_steps=50):
        self.size = size
        self.max_steps = max_steps
        self.start = (0, 0)
        self.goal = (size - 1, size - 1)
        self.reset()

    def reset(self):
        self.agent_pos = list(self.start)
        self.steps = 0
        return self.agent_pos

    def step(self, action):
        if action == "UP":
            self.agent_pos[0] = max(0, self.agent_pos[0] - 1)
        elif action == "DOWN":
            self.agent_pos[0] = min(self.size - 1, self.agent_pos[0] + 1)
        elif action == "LEFT":
            self.agent_pos[1] = max(0, self.agent_pos[1] - 1)
        elif action == "RIGHT":
            self.agent_pos[1] = min(self.size - 1, self.agent_pos[1] + 1)

        self.steps += 1

        # Reward and termination
        if tuple(self.agent_pos) == self.goal:
            return self.agent_pos, 10, True
        elif self.steps >= self.max_steps:
            return self.agent_pos, -1, True
        else:
            return self.agent_pos, -1, False

In [4]:
ACTIONS = ["UP", "DOWN", "LEFT", "RIGHT"]

def random_agent_action():
    return random.choice(ACTIONS)

In [5]:
NUM_EPISODES = 100

env = GridWorld()

total_rewards = []
episode_lengths = []
successes = 0

for episode in range(NUM_EPISODES):
    state = env.reset()
    done = False
    episode_reward = 0

    while not done:
        action = random_agent_action()
        state, reward, done = env.step(action)
        episode_reward += reward

        if done:
            episode_lengths.append(env.steps)
            total_rewards.append(episode_reward)
            if tuple(state) == env.goal:
                successes += 1

In [6]:
avg_reward = np.mean(total_rewards)
avg_length = np.mean(episode_lengths)
success_rate = successes / NUM_EPISODES

print("Random Agent Baseline Results")
print("-----------------------------")
print(f"Episodes run        : {NUM_EPISODES}")
print(f"Average reward      : {avg_reward:.2f}")
print(f"Average episode len : {avg_length:.2f}")
print(f"Success rate        : {success_rate * 100:.2f}%")


Random Agent Baseline Results
-----------------------------
Episodes run        : 100
Average reward      : -38.70
Average episode len : 43.10
Success rate        : 40.00%
