In [14]:
import numpy as np
import torch
import torch.nn as nn
import torch_geometric.nn as pyg_nn
from pettingzoo.mpe import simple_tag_v2
import os

# Initialize the environment
env = simple_tag_v2.parallel_env(render_mode=None, num_adversaries=4, num_good=1, num_obstacles=2)
env.reset()

# Parameters
num_class_a = 3
num_class_b = 1
num_adversaries = 4
num_agents = 1  # Only one agent being chased by adversaries
num_obstacles = 2
adversary_agents = [agent for agent in env.agents if 'adversary' in agent]
good_agents = [agent for agent in env.agents if 'agent' in agent]

print("Adversary Agents:", adversary_agents)
print("Good Agents:", good_agents)


Adversary Agents: ['adversary_0', 'adversary_1', 'adversary_2', 'adversary_3']
Good Agents: ['agent_0']


### Define Embedding and CGN layer

In [15]:
# Define linear embedding layer
class LinearEmbedding(nn.Module):
    def __init__(self, input_dim, embed_dim):
        super(LinearEmbedding, self).__init__()
        self.linear = nn.Linear(input_dim, embed_dim)
    
    def forward(self, x):
        return self.linear(x)

# Define GCN layer
class GCNLayer(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(GCNLayer, self).__init__()
        self.conv1 = pyg_nn.GCNConv(input_dim, output_dim)
    
    def forward(self, x, edge_index):
        return self.conv1(x, edge_index)

# Parameters for embedding and GCN
communication_range = 1.5  # Define the communication range
embedding_dim = 8          # Dimension for linear embedding
gcn_output_dim = 16        # Dimension of GCN output

# Initialize embedding and GCN layers
embedding_layer = LinearEmbedding(input_dim=5, embed_dim=embedding_dim)  # input_dim includes the class identifier
gcn_layer = GCNLayer(input_dim=embedding_dim, output_dim=gcn_output_dim)


### Encoder-Decoder

In [16]:
#output_decoder_dim = 18
input_actor_network_max_dim = 40 # Temporary solution with padding

### Observation Wrapper

In [17]:
def adversary_observation_wrapper(observations, num_class_A, num_class_B, adversary_agents, num_adversaries, num_agents, num_obstacles):
    assert num_class_A + num_class_B == num_adversaries, "Number of agents assigned to Class A and Class B must match the total number of adversaries."
    
    class_a_agents = adversary_agents[:num_class_A]
    class_b_agents = adversary_agents[num_class_A:num_class_A + num_class_B]
    
    updated_observations = {}
    adversary_positions = {}  # Store adversary positions for communication
    
    # Step 1: Add agent class identifier into observation
    for agent, obs in observations.items():
        # If the agent is an adversary (either Class A or Class B)
        if agent in class_a_agents or agent in class_b_agents:
            agent_class = 0 if agent in class_a_agents else 1  # Class A: 0, Class B: 1
            updated_obs = np.concatenate([obs, [agent_class]])  # Add class identifier
        else:
            updated_obs = obs  # Non-adversary agents keep their observation
        updated_observations[agent] = updated_obs
    
    # Step 2: Gather positions for communication
    for agent in adversary_agents:
        position = updated_observations[agent][2:4]  # Assume position is at index 2:4
        adversary_positions[agent] = position
    
    # Step 3: Apply linear embedding and handle communication within range
    embedded_information = {}
    for agent, obs in updated_observations.items():
        if agent in adversary_agents:
            # Convert observation to torch tensor
            obs_tensor = torch.tensor(obs[:5], dtype=torch.float32)  # Include the class identifier
            embedded_obs = embedding_layer(obs_tensor)  # Apply embedding to observation
            embedded_information[agent] = embedded_obs
    
    # Step 4: Communication and gather data for GCN input
    node_features = []
    edge_index = []
    num_agents_in_graph = len(adversary_agents)
    
    agent_to_idx = {agent: idx for idx, agent in enumerate(adversary_agents)}
    
    for agent in adversary_agents:
        own_position = adversary_positions[agent]
        node_features.append(embedded_information[agent])
        
        for other_agent in adversary_agents:
            if agent != other_agent:
                other_position = adversary_positions[other_agent]
                distance = np.linalg.norm(own_position - other_position)
                
                if distance <= communication_range:  # Check if within communication range
                    edge_index.append([agent_to_idx[agent], agent_to_idx[other_agent]])  # Add edge

    # Convert to torch tensors
    if edge_index:
        edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()
    else:
        edge_index = torch.empty((2, 0), dtype=torch.long)
    
    node_features = torch.stack(node_features)
    
    # Step 5: Pass the gathered information into a GCN
    gcn_output = gcn_layer(node_features, edge_index)
    
    # Step 6: Concatenate GCN output with agent's own observation
    final_inputs = {}
    for agent_idx, agent in enumerate(adversary_agents):
        own_obs = torch.tensor(updated_observations[agent], dtype=torch.float32)
        final_input = torch.cat([own_obs, gcn_output[agent_idx]])
        final_inputs[agent] = final_input  # This will be used as input to the Actor network
    
    return updated_observations, final_inputs  # Return the updated observations and inputs for Actor network


### Initialize the Environment and Run a Step

In [18]:
# Initialize environment
env.reset()

# Sample action spaces for all agents
actions = {agent: env.action_space(agent).sample() for agent in env.agents}

# Step through the environment
observations, rewards, terminations, truncations, infos = env.step(actions)

# Apply the observation wrapper for adversaries
observations, final_inputs = adversary_observation_wrapper(
    observations, num_class_a, num_class_b, adversary_agents, num_adversaries, num_agents, num_obstacles)

print("Updated Observations:")
for agent, obs in observations.items():
    print(f"{agent}: {obs}")

print("\nFinal Inputs for Actor Network:")
for agent, inp in final_inputs.items():
    print(f"{agent}: {inp}")

Updated Observations:
adversary_0: [-0.00000000e+00  0.00000000e+00 -9.47399199e-01 -1.57172814e-01
  1.32227802e+00  2.75671691e-01  1.49862707e+00 -1.52103484e-01
  3.07478279e-01  9.60103750e-01  6.39038503e-01 -6.16788864e-01
  1.35898805e+00 -5.90383589e-01  1.23976541e+00  6.27690792e-01
 -6.53049388e-35  4.00000006e-01  0.00000000e+00]
adversary_1: [ 0.00000000e+00  0.00000000e+00 -6.39920890e-01  8.02930892e-01
  1.01479983e+00 -6.84432030e-01  1.19114876e+00 -1.11220717e+00
 -3.07478279e-01 -9.60103750e-01  3.31560224e-01 -1.57689261e+00
  1.05150974e+00 -1.55048728e+00  9.32287097e-01 -3.32412928e-01
 -6.53049388e-35  4.00000006e-01  0.00000000e+00]
adversary_2: [-0.00000000e+00 -0.00000000e+00 -3.08360696e-01 -7.73961663e-01
  6.83239579e-01  8.92460525e-01  8.59588563e-01  4.64685380e-01
 -6.39038503e-01  6.16788864e-01 -3.31560224e-01  1.57689261e+00
  7.19949543e-01  2.64052637e-02  6.00726843e-01  1.24447966e+00
 -6.53049388e-35  4.00000006e-01  0.00000000e+00]
adversary

### Actor Network


In [19]:
class ActorNetwork(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(ActorNetwork, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(64, output_dim)  # Output dimension should match the action space
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# Initialize Actor networks for adversaries
actor_networks = {}
for agent in adversary_agents:
    input_dim = len(env.observation_space(agent).low) + 1 + gcn_output_dim  # Observation length plus class id plus GCN output
    output_dim = env.action_space(agent).n  # Assuming discrete action space
    print(f'old agent:{agent} input_dim:{input_dim} output_dim:{output_dim}')
    input_dim = max(input_dim, input_actor_network_max_dim)
    print(f'new agent:{agent} input_dim:{input_dim} output_dim:{output_dim}')
    actor_networks[agent] = ActorNetwork(input_dim, output_dim)


old agent:adversary_0 input_dim:35 output_dim:5
new agent:adversary_0 input_dim:40 output_dim:5
old agent:adversary_1 input_dim:35 output_dim:5
new agent:adversary_1 input_dim:40 output_dim:5
old agent:adversary_2 input_dim:35 output_dim:5
new agent:adversary_2 input_dim:40 output_dim:5
old agent:adversary_3 input_dim:35 output_dim:5
new agent:adversary_3 input_dim:40 output_dim:5


### Training Loop

In [20]:
# Training parameters
num_episodes = 50  # Total number of episodes to run
print_interval = 10  # Print rewards every 10 episodes

# Initialize reward tracking
episode_rewards = []
cumulative_reward = 0

# Optimizers for Actor networks and GCN (assuming we are training them)
learning_rate = 0.001
actor_optimizers = {agent: torch.optim.Adam(actor_networks[agent].parameters(), lr=learning_rate) for agent in adversary_agents}
gcn_optimizer = torch.optim.Adam(gcn_layer.parameters(), lr=learning_rate)

# Loss function (placeholder, you need to define based on your RL algorithm)
loss_fn = nn.MSELoss()

# Training loop
for episode in range(1, num_episodes + 1):
    observations = env.reset()
    done = False
    cumulative_reward = 0  # Reset cumulative reward for the episode
    
    while not done:
        # Apply the observation wrapper for adversaries
        observations, final_inputs = adversary_observation_wrapper(
            observations, num_class_a, num_class_b, adversary_agents, num_adversaries, num_agents, num_obstacles)
        actions = {}
        print(f'env.agents:{env.agents}')
        for agent in env.agents:
            print(f'agent:{agent}')
            if agent in adversary_agents:
                print(f'final_inputs:{final_inputs[agent].shape}')
                m = nn.ConstantPad1d((0, input_actor_network_max_dim - final_inputs[agent].shape[0]), 0)
                final_inputs_pad = m(final_inputs[agent])
                print(f'final_inputs_pad:{final_inputs_pad.shape}')

                print('A')
                # Get the input for the Actor network
                actor_input = final_inputs_pad #final_inputs[agent]
                # Get action probabilities (assuming discrete action space)
                action_probs = actor_networks[agent](actor_input)
                # Sample an action (for simplicity, we take the action with the highest probability)
                action = torch.argmax(action_probs).item()
                actions[agent] = action
                print('/A')
            else:
                print('B')
                # For non-adversary agents, sample random actions
                actions[agent] = env.action_space(agent).sample()
                print('/B')

        print('next')            
        
        # Step the environment
        next_observations, rewards, terminations, truncations, infos = env.step(actions)
        
        # Update cumulative reward
        cumulative_reward += sum(rewards.values())
        
        # Placeholder for training step (you need to implement your RL algorithm here)
        # For example, compute loss and update networks
        
        # For simplicity, let's assume we have a target value (dummy value here)
        target = torch.zeros(1)
        loss = 0
        for agent in adversary_agents:
            print(f'final_inputs2:{final_inputs[agent].shape}')
            m = nn.ConstantPad1d((0, input_actor_network_max_dim - final_inputs[agent].shape[0]), 0)
            final_inputs_pad = m(final_inputs[agent])
            print(f'final_inputs_pad2:{final_inputs_pad.shape}')
            # Get the predicted value
            actor_input = final_inputs_pad # final_inputs[agent]
            prediction = actor_networks[agent](actor_input)
            # Compute loss (this is a placeholder)
            loss += loss_fn(prediction.unsqueeze(0), target)
        
        # Backpropagation
        gcn_optimizer.zero_grad()
        for optimizer in actor_optimizers.values():
            optimizer.zero_grad()
        
        loss.backward()
        
        gcn_optimizer.step()
        for optimizer in actor_optimizers.values():
            optimizer.step()
        
        # Update observations
        observations = next_observations
        
        # Check if all agents are done
        done = all(terminations.values()) or all(truncations.values())
    
    # Append cumulative reward for the episode
    episode_rewards.append(cumulative_reward)
    
    # Print rewards every 'print_interval' episodes
    if episode % print_interval == 0:
        avg_reward = sum(episode_rewards[-print_interval:]) / print_interval
        print(f"Episode {episode}: Average Reward: {avg_reward}")


env.agents:['adversary_0', 'adversary_1', 'adversary_2', 'adversary_3', 'agent_0']
agent:adversary_0
final_inputs:torch.Size([35])
final_inputs_pad:torch.Size([40])
A
/A
agent:adversary_1
final_inputs:torch.Size([35])
final_inputs_pad:torch.Size([40])
A
/A
agent:adversary_2
final_inputs:torch.Size([35])
final_inputs_pad:torch.Size([40])
A
/A
agent:adversary_3
final_inputs:torch.Size([35])
final_inputs_pad:torch.Size([40])
A
/A
agent:agent_0
B
/B
next
final_inputs2:torch.Size([35])
final_inputs_pad2:torch.Size([40])
final_inputs2:torch.Size([35])
final_inputs_pad2:torch.Size([40])
final_inputs2:torch.Size([35])
final_inputs_pad2:torch.Size([40])
final_inputs2:torch.Size([35])
final_inputs_pad2:torch.Size([40])
env.agents:['adversary_0', 'adversary_1', 'adversary_2', 'adversary_3', 'agent_0']
agent:adversary_0
final_inputs:torch.Size([35])
final_inputs_pad:torch.Size([40])
A
/A
agent:adversary_1
final_inputs:torch.Size([35])
final_inputs_pad:torch.Size([40])
A
/A
agent:adversary_2
final_

### Save the Models

In [21]:
# Create a directory to save models
model_dir = 'saved_models'
if not os.path.exists(model_dir):
    os.makedirs(model_dir)


def save_models(num_class_A, num_class_B, adversary_agents):
    class_a_agents = adversary_agents[:num_class_A]
    class_b_agents = adversary_agents[num_class_A:num_class_A + num_class_B]
    print(f'ctype:{class_a_agents}')
    print(f'ctype:{class_b_agents}')
    for agent in class_a_agents:
        if agent in class_a_agents:
            print(f"agent A:{agent}")
            torch.save(actor_networks[agent].state_dict(), os.path.join(model_dir, f"actor_class_A.pth"))
            break
    for agent in class_b_agents:
        if agent in class_b_agents:
            print(f"agent B:{agent}")
            torch.save(actor_networks[agent].state_dict(), os.path.join(model_dir, f"actor_class_B.pth"))
            break

    #torch.save(class_a_agents[0].state_dict(), os.path.join(model_dir, f"actor_class_A.pth"))
    #torch.save(class_b_agents[0].state_dict(), os.path.join(model_dir, f"actor_class_B.pth"))


    # # Step 1: Add agent class identifier into observation
    # for agent, obs in observations.items():
    #     # If the agent is an adversary (either Class A or Class B)
    #     if agent in class_a_agents or agent in class_b_agents:
    #         agent_class = 0 if agent in class_a_agents else 1  # Class A: 0, Class B: 1
    #         updated_obs = np.concatenate([obs, [agent_class]])  # Add class identifier
    #     else:
    #         updated_obs = obs  # Non-adversary agents keep their observation
    #     updated_observations[agent] = updated_obs

save_models(num_class_a, num_class_b, adversary_agents)

# Save Actor networks
#for i in range(1):
#    torch.save(actor_networks[agent].state_dict(), os.path.join(model_dir, f"actor_{agent}.pth"))

# Save GCN model
torch.save(gcn_layer.state_dict(), os.path.join(model_dir, "gcn_model.pth"))

print("Models saved successfully.")


ctype:['adversary_0', 'adversary_1', 'adversary_2']
ctype:['adversary_3']
agent A:adversary_0
agent B:adversary_3
Models saved successfully.


### Load the Models and Test with Different Number of Adversaries

In [22]:
# Set up the environment with a different number of adversaries
new_num_adversaries = 6  # Change the number of adversaries
env = simple_tag_v2.parallel_env(render_mode=None, num_adversaries=new_num_adversaries, num_good=1, num_obstacles=2)
env.reset()

# Update adversary agents list
adversary_agents = [agent for agent in env.agents if 'adversary' in agent]
good_agents = [agent for agent in env.agents if 'agent' in agent]

# Re-initialize embedding and GCN layers
embedding_layer = LinearEmbedding(input_dim=5, embed_dim=embedding_dim)  # Same as before
gcn_layer = GCNLayer(input_dim=embedding_dim, output_dim=gcn_output_dim)
gcn_layer.load_state_dict(torch.load(os.path.join(model_dir, "gcn_model.pth")))

class_a_agents = ['adversary_0', 'adversary_1', 'adversary_2', 'adversary_3']
class_b_agents = ['adversary_4', 'adversary_5']

# Re-initialize Actor networks for new agents and load the saved models
actor_networks = {}
for agent in adversary_agents:
    print(f"Agent: {agent}")
    input_dim = input_actor_network_max_dim #len(env.observation_space(agent).low) + 1 + gcn_output_dim  # Adjust if observation space changes
    output_dim = env.action_space(agent).n  # Assuming discrete action space
    print(f'agent:{agent} input_dim:{input_dim} output_dim:{output_dim}')
    actor_net = ActorNetwork(input_dim, output_dim)
    # Load the saved model (using the first saved model for simplicity)
    if agent in class_a_agents:
        print(f"Agent: {agent} class A")
        actor_net.load_state_dict(torch.load(os.path.join(model_dir, f"actor_class_A.pth")))
        actor_networks[agent] = actor_net
    if agent in class_b_agents:
        print(f"Agent: {agent} class B")
        actor_net.load_state_dict(torch.load(os.path.join(model_dir, f"actor_class_B.pth")))
        actor_networks[agent] = actor_net

    # Adjust the logic as per your agent classes
    #saved_agent = 'adversary_0' if 'adversary_0' in agent or 'adversary_1' in agent or 'adversary_2' in agent else 'adversary_3'
    #actor_net.load_state_dict(torch.load(os.path.join(model_dir, f"actor_{saved_agent}.pth")))
    #actor_networks[agent] = actor_net

print("Models loaded successfully.")


Agent: adversary_0
agent:adversary_0 input_dim:40 output_dim:5
Agent: adversary_0 class A
Agent: adversary_1
agent:adversary_1 input_dim:40 output_dim:5
Agent: adversary_1 class A
Agent: adversary_2
agent:adversary_2 input_dim:40 output_dim:5
Agent: adversary_2 class A
Agent: adversary_3
agent:adversary_3 input_dim:40 output_dim:5
Agent: adversary_3 class A
Agent: adversary_4
agent:adversary_4 input_dim:40 output_dim:5
Agent: adversary_4 class B
Agent: adversary_5
agent:adversary_5 input_dim:40 output_dim:5
Agent: adversary_5 class B
Models loaded successfully.
