In [1]:
import random
from zoo_environment import ZooEnvironment

def q_learning(env, episodes, alpha, gamma, epsilon, max_steps=100):
    q_table = {}
    results = []
    for episode in range(episodes):
        state = env.reset()
        total_reward = 0
        path = []
        for step in range(max_steps):
            if state not in q_table:
                q_table[state] = {action: 0 for action in env.get_actions()}
            action = random.choice(list(q_table[state].keys())) if random.random() < epsilon else max(q_table[state], key=q_table[state].get)
            reward = env.move(action)
            total_reward += reward
            next_state = env.state
            path.append(state)
            if next_state not in q_table:
                q_table[next_state] = {action: 0 for action in env.get_actions()}
            old_value = q_table[state][action]
            future_rewards = max(q_table[next_state].values(), default=0)
            q_table[state][action] = old_value + alpha * (reward + gamma * future_rewards - old_value)
            state = next_state
            if env.is_terminal_state():
                break
        results.append({
            "Episode": episode + 1, 
            "Total Reward": total_reward, 
            "Steps": step + 1, 
            "Visited Animals": len(env.visited_animals), 
            "Path": path
        })
    return results

def sarsa(env, episodes, alpha, gamma, epsilon, max_steps=100):
    q_table = {}
    results = []
    for episode in range(episodes):
        state = env.reset()
        total_reward = 0
        path = []
        action = random.choice(env.get_actions())
        for step in range(max_steps):
            if state not in q_table:
                q_table[state] = {action: 0 for action in env.get_actions()}
            reward = env.move(action)
            total_reward += reward
            next_state = env.state
            if next_state not in q_table:
                q_table[next_state] = {action: 0 for action in env.get_actions()}
            next_action = random.choice(list(q_table[next_state].keys())) if random.random() < epsilon else max(q_table[next_state], key=q_table[next_state].get)
            path.append(state)
            old_value = q_table[state][action]
            future_rewards = q_table[next_state][next_action]
            q_table[state][action] = old_value + alpha * (reward + gamma * future_rewards - old_value)
            state = next_state
            action = next_action
            if env.is_terminal_state():
                break
        results.append({
            "Episode": episode + 1, 
            "Total Reward": total_reward, 
            "Steps": step + 1, 
            "Visited Animals": len(env.visited_animals), 
            "Path": path
        })
    return results


ModuleNotFoundError: No module named 'zoo_environment'