In [2]:
!pip install pettingzoo


Collecting pettingzoo
  Downloading pettingzoo-1.24.3-py3-none-any.whl.metadata (8.5 kB)
Using cached pettingzoo-1.24.3-py3-none-any.whl (847 kB)
Installing collected packages: pettingzoo
Successfully installed pettingzoo-1.24.3



[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: C:\Users\ilhui\AppData\Local\Programs\Python\Python312\python.exe -m pip install --upgrade pip


In [2]:
import numpy as np
import random
from pettingzoo.mpe import simple_tag_v3

# Inicializar el entorno
env = simple_tag_v3.env(render_mode="human", max_cycles=100)
env.reset(seed=42)

# Parámetros del aprendizaje
learning_rate = 0.01
discount_factor = 0.9
action_space = 5
epsilon = 0.5

# Definir el total de estados y modelo
total_states = (env.observation_space(env.agents[0]).shape[0], action_space)
model_params_prey = np.zeros((total_states[0], action_space))
model_params_predator = np.zeros((total_states[0], action_space))

# Estado interno de las presas
prey_state = {
    "energy": 100,
    "thirst": 0
}

# Funciones de ayuda
def calc_state(observation):
    return int(observation.sum()) % total_states[0]

def get_action(state, model_params, exploration=True):
    if exploration and random.uniform(0, 1) < epsilon:
        return np.random.choice(action_space)
    else:
        return model_params[state].argmax()

def train(experience, model_params, lr=learning_rate, df=discount_factor):
    """Entrena el modelo actualizando parámetros usando Q-learning."""
    for prev_state, action_taken, state, reward, final in reversed(experience):
        # Actualización de Q-learning usando la recompensa del entorno
        best_next_action = model_params[state].max()
        target = reward + df * best_next_action
        model_params[prev_state][action_taken] += lr * (target - model_params[prev_state][action_taken])

# Lógica para actualizar el estado interno de las presas
def update_prey_state(prey_state, action):
    if action == 1:  # Buscar agua
        prey_state["thirst"] = max(0, prey_state["thirst"] - 10)
    elif action == 2:  # Buscar comida
        prey_state["energy"] = min(100, prey_state["energy"] + 10)
    elif action == 0:  # Huir
        prey_state["energy"] -= 5
        prey_state["thirst"] += 2
    else:  # Descansar
        prey_state["energy"] += 1

# Bucle de episodios
num_episodes = 10
prey_survival_times = {}
agent_actions = {}

for episode in range(num_episodes):
    env.reset()
    experience_prey = {agent: [] for agent in env.agents if "agent" in agent}
    experience_predator = {agent: [] for agent in env.agents if "adversary" in agent}
    survival_time = {agent: 0 for agent in env.agents if "agent" in agent}
    actions_log = {agent: [] for agent in env.agents}
    total_rewards = {agent: 0 for agent in env.agents}

    print(f"\nInicio del Episodio {episode + 1}")

    while True:
        all_terminated = True
        for agent in env.agent_iter():
            observation, reward, termination, truncation, info = env.last()

            # Registrar la recompensa obtenida
            total_rewards[agent] += reward

            if termination or truncation:
                action = None
                print(f"{agent} ha terminado y no toma acción.")
            else:
                state = calc_state(observation)
                if "agent" in agent:  # Presa
                    action = get_action(state, model_params_prey)
                    update_prey_state(prey_state, action)
                    survival_time[agent] += 1  # Incrementar el tiempo de supervivencia de la presa
                elif "adversary" in agent:  # Depredador
                    action = get_action(state, model_params_predator)

                # Registrar la acción tomada por el agente
                actions_log[agent].append(action)

                # Determinar si es presa o depredador y guardar la experiencia
                if "agent" in agent:
                    experience_prey[agent].append([state, action, calc_state(observation), reward, termination or truncation])
                else:
                    experience_predator[agent].append([state, action, calc_state(observation), reward, termination or truncation])

            # Llamar a env.step() siempre, incluso si el agente ha terminado
            env.step(action)

            if not (termination or truncation):
                all_terminated = False

        if all_terminated:
            break

    # Entrenar al final del episodio
    for agent in experience_prey:
        train(experience_prey[agent], model_params_prey)
    for agent in experience_predator:
        train(experience_predator[agent], model_params_predator)

    # Guardar el tiempo de supervivencia de las presas
    for agent in survival_time:
        if agent not in prey_survival_times:
            prey_survival_times[agent] = []
        prey_survival_times[agent].append(survival_time[agent])

    # Mostrar las recompensas acumuladas y acciones realizadas
    print(f"\nResumen del Episodio {episode + 1}")
    for agent, actions in actions_log.items():
        print(f"Acciones de {agent}: {actions}")

    print(f"Recompensas acumuladas: {total_rewards}")
    print(f"Tiempo de supervivencia de las presas: {survival_time}")
    print(model_params_predator.shape)
    print(model_params_prey.shape)

print("\nResultados Finales:")
for agent, times in prey_survival_times.items():
    promedio = np.mean(times)
    print(f"Presa {agent} sobrevivió en promedio {promedio:.2f} pasos.")

env.close()



Inicio del Episodio 1
adversary_0 ha terminado y no toma acción.
adversary_1 ha terminado y no toma acción.
adversary_2 ha terminado y no toma acción.
agent_0 ha terminado y no toma acción.

Resumen del Episodio 1
Acciones de adversary_0: [0, 4, 0, 0, 0, 0, 3, 0, 0, 0, 4, 0, 1, 0, 1, 0, 0, 3, 0, 0, 2, 1, 1, 0, 3, 0, 4, 2, 4, 0, 1, 0, 2, 0, 4, 0, 2, 0, 0, 0, 0, 0, 0, 0, 4, 4, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 4, 0, 0, 0, 0, 1, 0, 0, 4, 3, 0, 0, 3, 3, 0, 0, 4, 0, 0, 0, 0, 3, 0, 0, 3, 0, 4, 0, 2, 0, 0, 0, 0, 0, 4, 4, 4, 0, 4]
Acciones de adversary_1: [1, 0, 2, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 0, 0, 4, 0, 0, 0, 0, 2, 0, 0, 4, 0, 4, 1, 0, 3, 0, 0, 3, 2, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0, 0, 0, 0, 2, 4, 4, 3, 1, 3, 0, 1, 0, 0, 4, 1, 0, 0, 0, 1, 3, 0, 0, 0, 0, 1, 0, 1, 3, 4, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 0, 0, 1, 0, 0, 3, 0, 2, 4, 2, 0, 1, 2, 0, 0, 3, 0, 1]
Acciones de adversary_2: [0, 2, 0, 0, 4, 0, 0, 0, 2, 0, 3, 0, 2, 0, 2, 0, 0, 2, 3, 2, 0, 2, 0, 0, 0, 3, 2, 0, 0, 0, 1, 0, 2, 0, 0, 0,

KeyboardInterrupt: 