In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

class SwarmAgent:
    def __init__(self, agent_id, position):
        self.agent_id = agent_id
        self.position = position

class Classifier(nn.Module):
    def __init__(self, input_size, num_classes):
        super(Classifier, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(64, num_classes)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

class ClassicalIL:
    def __init__(self, num_agents, environment_size, num_epochs, learning_rate):
        self.num_agents = num_agents
        self.environment_size = environment_size
        self.num_epochs = num_epochs
        self.learning_rate = learning_rate

        self.classifier = Classifier(environment_size[0] * environment_size[1], num_agents)
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.classifier.parameters(), lr=learning_rate)

    def train(self, expert_trajectories, expert_actions):
        states, actions = self.convert_expert_data(expert_trajectories, expert_actions)

        states = torch.tensor(states, dtype=torch.float)
        actions = torch.tensor(actions, dtype=torch.long)

        for epoch in range(self.num_epochs):
            self.optimizer.zero_grad()
            logits = self.classifier(states)
            loss = self.criterion(logits, actions)
            loss.backward()
            self.optimizer.step()

    def convert_expert_data(self, expert_trajectories, expert_actions):
        states = []
        actions = []

        for trajectory, action_sequence in zip(expert_trajectories, expert_actions):
            for agent, action in zip(trajectory, action_sequence):
                state = np.zeros(self.environment_size)
                state[tuple(agent.position)] = 1
                states.append(state.flatten())
                actions.append(agent.agent_id)

        return states, actions

    def get_action(self, agent):
        state = np.zeros(self.environment_size)
        state[tuple(agent.position)] = 1
        state = torch.tensor(state.flatten(), dtype=torch.float).unsqueeze(0)
        logits = self.classifier(state)
        action = torch.argmax(logits).item()
        return action

# Usage example
num_agents = 10
environment_size = (10, 10)
num_epochs = 100
learning_rate = 0.001

swarm_agents = []
for i in range(num_agents):
    position = np.random.randint(0, environment_size[0]), np.random.randint(0, environment_size[1])
    agent = SwarmAgent(i, position)
    swarm_agents.append(agent)

expert_trajectories = [swarm_agents]  # List of expert agent trajectories
expert_actions = [np.random.randint(0, num_agents, size=num_agents)]  # List of expert actions corresponding to each agent

cil = ClassicalIL(num_agents, environment_size, num_epochs, learning_rate)
cil.train(expert_trajectories, expert_actions)

# Get action for a specific agent
agent_id = 0
action = cil.get_action(swarm_agents[agent_id])
print(f"Action for Agent {agent_id}: {action}")


  states = torch.tensor(states, dtype=torch.float)


Action for Agent 0: 0


The code defines a SwarmAgent class that represents an agent in the swarm. Each agent has an agent ID and a position in the environment.

The Classifier class is a neural network model that consists of two fully connected layers with a ReLU activation function. It takes the input size and the number of output classes as parameters.

The ClassicalIL class represents the Classical Imitation Learning algorithm. It takes the number of agents, environment size, number of epochs, and learning rate as input.

The ClassicalIL class contains a classifier model, a loss criterion (CrossEntropyLoss), and an optimizer (Adam optimizer).

The train method trains the classifier model using expert trajectories and expert actions. It iterates for the specified number of epochs.

The convert_expert_data method converts the expert trajectories and actions into a list of states and actions. Each state is represented as a flattened array, where the position of the agent is set to 1. The agent's ID is used as the corresponding action.

The get_action method retrieves the action for a specific agent based on its position. It constructs a state representation by creating an environment-sized array and setting the position of the agent to 1. The state is then converted to a tensor and passed through the classifier model. The action with the highest logit value is extracted using torch.argmax.

In the usage example, a swarm of agents is created with random positions in the environment.

Expert trajectories are defined as a list containing agent trajectories, and expert actions are defined as a list of action sequences corresponding to each agent.

An instance of ClassicalIL is created, passing the necessary parameters.

The train method is called to train the classifier model using the expert trajectories and actions.

The get_action method is called to retrieve the action for a specific agent.

The action for the agent is printed.

In summary, the code implements Classical Imitation Learning (CIL) in swarm robotics. It trains a neural network classifier to imitate expert behavior based on the agents' positions in the environment. The trained classifier can then be used to select actions for the swarm agents.