In [1]:
import random

In [2]:

# Define the environment
class Environment:
    def __init__(self):
        self.monkey_position = 0  # Monkey starts at position 0
        self.banana_position = 5  # Bananas are at position 5
        self.stick_position = 2   # The stick is at position 2
        self.has_stick = False
        self.has_banana = False

    def reset(self):
        """Reset environment to the initial state."""
        self.monkey_position = 0
        self.has_stick = False
        self.has_banana = False
        return self.get_state()

    def get_state(self):
        """Return the current state of the environment."""
        return {
            'monkey_position': self.monkey_position,
            'banana_position': self.banana_position,
            'stick_position': self.stick_position,
            'has_stick': self.has_stick,
            'has_banana': self.has_banana,
        }

    def move(self, direction):
        """Move the monkey."""
        if direction == "left":
            self.monkey_position = max(0, self.monkey_position - 1)
        elif direction == "right":
            self.monkey_position = min(5, self.monkey_position + 1)

    def pick_up_stick(self):
        """Pick up the stick if at the correct position."""
        if self.monkey_position == self.stick_position:
            self.has_stick = True

    def jump(self):
        """Monkey jumps if it has the stick and is at the banana's position."""
        if self.has_stick and self.monkey_position == self.banana_position:
            self.has_banana = True

    def is_done(self):
        """Check if the monkey has obtained the banana."""
        return self.has_banana

# Define the agent (monkey)
class MonkeyAgent:
    def __init__(self):
        self.actions = ["left", "right", "pick_up_stick", "jump"]

    def choose_action(self, state):
        """Choose an action randomly (for simplicity)."""
        return random.choice(self.actions)

# Run the simulation
env = Environment()
agent = MonkeyAgent()
state = env.reset()

# Run for a maximum of 10 steps
for step in range(10):
    print(f"Step {step + 1}:")
    print(f"Current state: {state}")
    action = agent.choose_action(state)
    print(f"Monkey chooses action: {action}")
    
    if action == "left" or action == "right":
        env.move(action)
    elif action == "pick_up_stick":
        env.pick_up_stick()
    elif action == "jump":
        env.jump()
    
    state = env.get_state()
    if env.is_done():
        print("Monkey has obtained the banana!")
        break
else:
    print("Monkey couldn't obtain the banana in 10 steps.")



Step 1:
Current state: {'monkey_position': 0, 'banana_position': 5, 'stick_position': 2, 'has_stick': False, 'has_banana': False}
Monkey chooses action: right
Step 2:
Current state: {'monkey_position': 1, 'banana_position': 5, 'stick_position': 2, 'has_stick': False, 'has_banana': False}
Monkey chooses action: right
Step 3:
Current state: {'monkey_position': 2, 'banana_position': 5, 'stick_position': 2, 'has_stick': False, 'has_banana': False}
Monkey chooses action: jump
Step 4:
Current state: {'monkey_position': 2, 'banana_position': 5, 'stick_position': 2, 'has_stick': False, 'has_banana': False}
Monkey chooses action: left
Step 5:
Current state: {'monkey_position': 1, 'banana_position': 5, 'stick_position': 2, 'has_stick': False, 'has_banana': False}
Monkey chooses action: pick_up_stick
Step 6:
Current state: {'monkey_position': 1, 'banana_position': 5, 'stick_position': 2, 'has_stick': False, 'has_banana': False}
Monkey chooses action: jump
Step 7:
Current state: {'monkey_position'