In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

class SwarmAgent:
    def __init__(self, agent_id, position):
        self.agent_id = agent_id
        self.position = position

class Discriminator(nn.Module):
    def __init__(self, input_size):
        super(Discriminator, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.sigmoid(x)
        return x

class Generator(nn.Module):
    def __init__(self, input_size, output_size):
        super(Generator, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        return x

class GAIL:
    def __init__(self, num_agents, environment_size, expert_trajectories, num_epochs, learning_rate):
        self.num_agents = num_agents
        self.environment_size = environment_size
        self.expert_trajectories = expert_trajectories
        self.num_epochs = num_epochs
        self.learning_rate = learning_rate

        self.discriminator = Discriminator(environment_size[0] * environment_size[1])
        self.generator = Generator(environment_size[0] * environment_size[1], environment_size[0] * environment_size[1])
        self.optimizer_d = optim.Adam(self.discriminator.parameters(), lr=learning_rate)
        self.optimizer_g = optim.Adam(self.generator.parameters(), lr=learning_rate)
        self.criterion = nn.BCELoss()

    def train(self):
        expert_states = self.get_state_trajectories(self.expert_trajectories)
        expert_labels = torch.ones(len(expert_states)).unsqueeze(1)

        for epoch in range(self.num_epochs):
            self.optimizer_d.zero_grad()

            # Train discriminator with expert data
            expert_output = self.discriminator(expert_states)
            loss_expert = self.criterion(expert_output, expert_labels)
            loss_expert.backward()

            # Train discriminator with generated data
            generated_states = self.generate_states(len(expert_states))
            generated_labels = torch.zeros(len(generated_states)).unsqueeze(1)
            generated_output = self.discriminator(generated_states.detach())
            loss_generated = self.criterion(generated_output, generated_labels)
            loss_generated.backward()

            self.optimizer_d.step()

            # Train generator
            self.optimizer_g.zero_grad()
            generated_output = self.discriminator(generated_states)
            loss_generator = self.criterion(generated_output, expert_labels)
            loss_generator.backward()
            self.optimizer_g.step()

            if epoch % 10 == 0:
                print(f"Epoch: {epoch}, Discriminator Loss: {loss_expert.item() + loss_generated.item()}, Generator Loss: {loss_generator.item()}")

    def get_state_trajectories(self, trajectories):
        states = []

        for trajectory in trajectories:
            agent_positions = [agent.position for agent in trajectory]
            state = np.zeros(self.environment_size)
            for agent_position in agent_positions:
                state[tuple(agent_position)] += 1
            states.append(state.flatten())

        return torch.tensor(states, dtype=torch.float)

    def generate_states(self, num_states):
        noise = torch.randn(num_states, self.environment_size[0] * self.environment_size[1])
        generated_states = self.generator(noise)
        return generated_states

# Usage example
num_agents = 10
environment_size = (10, 10)
num_epochs = 1000
learning_rate = 0.001

swarm_agents = []
for i in range(num_agents):
    position = np.random.randint(0, environment_size[0]), np.random.randint(0, environment_size[1])
    agent = SwarmAgent(i, position)
    swarm_agents.append(agent)

expert_trajectories = [swarm_agents]  # List of expert agent trajectories

gail = GAIL(num_agents, environment_size, expert_trajectories, num_epochs, learning_rate)
gail.train()


  return torch.tensor(states, dtype=torch.float)


Epoch: 0, Discriminator Loss: 1.2777687311172485, Generator Loss: 0.8048244118690491
Epoch: 10, Discriminator Loss: 1.0445780754089355, Generator Loss: 0.8288130760192871
Epoch: 20, Discriminator Loss: 0.9223431646823883, Generator Loss: 0.8192118406295776
Epoch: 30, Discriminator Loss: 0.7722482830286026, Generator Loss: 0.8933328986167908
Epoch: 40, Discriminator Loss: 0.5683342814445496, Generator Loss: 1.1289241313934326
Epoch: 50, Discriminator Loss: 0.9674858897924423, Generator Loss: 0.572892427444458
Epoch: 60, Discriminator Loss: 0.6183156743645668, Generator Loss: 0.9334321618080139
Epoch: 70, Discriminator Loss: 0.7887430712580681, Generator Loss: 0.7161604762077332
Epoch: 80, Discriminator Loss: 0.9035866782069206, Generator Loss: 0.6199588775634766
Epoch: 90, Discriminator Loss: 0.5732090547680855, Generator Loss: 1.0050736665725708
Epoch: 100, Discriminator Loss: 0.8497932851314545, Generator Loss: 0.7129592299461365
Epoch: 110, Discriminator Loss: 0.5511702746152878, Gen

The code starts by defining a SwarmAgent class representing an agent in the swarm. Each agent has an agent ID and a position in the environment.

The Discriminator class is defined as a neural network with two fully connected layers and a sigmoid activation function.

The Generator class is defined as a neural network with two fully connected layers.

The GAIL class represents the Generative Adversarial Imitation Learning algorithm. It takes the number of agents, environment size, expert trajectories, number of training epochs, and learning rate as input.

The GAIL class contains a discriminator and a generator. The discriminator estimates the probability of a trajectory being expert-generated, and the generator generates states.

The train method trains the discriminator and generator using expert trajectories. It iterates for the specified number of epochs.

The discriminator is trained using both expert and generated data. Expert trajectories are labeled as 1, and generated trajectories are labeled as 0.

The generator is trained to generate states that fool the discriminator by minimizing the generator loss.

The get_state_trajectories method converts agent trajectories into state trajectories. Each state is represented by a flattened array, where each element represents the density of agents in that position.

The generate_states method generates states using the generator model.

In the usage example, a swarm of agents is created with random positions in the environment.

Expert trajectories are defined as a list of agent trajectories. In this example, only a single expert trajectory is used.

An instance of GAIL is created, passing the necessary parameters.

The train method is called to train the discriminator and generator.