# Implementation: GridWorld Environment

**Goal**: Build a simple environment from scratch.

In [None]:
import numpy as np

class GridWorld:
    def __init__(self, size=3):
        self.size = size
        self.agent_pos = [0, 0] # Start at top-left
        self.goal_pos = [size-1, size-1] # Goal at bottom-right
        
    def reset(self):
        self.agent_pos = [0, 0]
        return self.agent_pos
    
    def step(self, action):
        # Actions: 0=Up, 1=Right, 2=Down, 3=Left
        x, y = self.agent_pos
        
        if action == 0: x = max(0, x - 1)
        elif action == 1: y = min(self.size - 1, y + 1)
        elif action == 2: x = min(self.size - 1, x + 1)
        elif action == 3: y = max(0, y - 1)
        
        self.agent_pos = [x, y]
        
        # Reward Logic
        if self.agent_pos == self.goal_pos:
            reward = 10
            done = True
        else:
            reward = -1 # Living penalty (encourage speed)
            done = False
            
        return self.agent_pos, reward, done

# 1. Initialize
env = GridWorld()
obs = env.reset()
print(f"Start: {obs}")

# 2. Take Steps (Right, Right, Down, Down)
actions = [1, 1, 2, 2]
for a in actions:
    obs, r, done = env.step(a)
    print(f"Action {a} -> Pos {obs}, Reward {r}, Done {done}")

## Conclusion
We successfully reached the goal (Pos [2, 2]) and got +10 reward. The total return was -3.