---
title: "Lewis Signaling Game for PettingZoo"
subtitle: "Paper Review"
date: 2025-01-01
categories: [review,compositionality,neural networks,signaling systems,language evolution]
keywords: 
    compositionality
    naive compositionality
    language emergence
    deep learning
    neural networks
    signaling systems 
    emergent languages
    topographic similarity
    positional disentanglement
    bag-of-symbols disentanglement
    information gap disentanglement    
bibliography: ./bibliography.bib
---

In [1]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
from pettingzoo.utils.env import AECEnv
from pettingzoo.utils import wrappers
from pettingzoo.utils.agent_selector import agent_selector
import pettingzoo

class Sender:
    def __init__(self, num_signals):
        self.num_signals = num_signals
        self.action_space = spaces.Discrete(num_signals)

    def act(self, observation):
        return self.action_space.sample()  # Random action

class Receiver:
    def __init__(self, num_states):
        self.num_states = num_states
        self.action_space = spaces.Discrete(num_states)

    def act(self, observation):
        # If no signal has been received yet, choose a random action
        if observation == self.num_states: 
            return self.action_space.sample() 
        else:
            # If a signal has been received, choose the corresponding state 
            # (this is a simple, non-learning policy)
            return observation

class LewisSignalingEnv(AECEnv):
    metadata = {"render_modes": ["human"], "name": "lewis_signaling_v0"}

    def __init__(self, num_signals=3, num_states=3, max_cycles=100):
        super().__init__()
        self.possible_agents = ["sender", "receiver"]
        self.agent_name_mapping = dict(zip(self.possible_agents, list(range(len(self.possible_agents)))))
        self.num_signals = num_signals
        self.num_states = num_states
        self.max_cycles = max_cycles
        self.sender = Sender(num_signals)
        self.receiver = Receiver(num_states)
        self.state = None
        self.signal = None
        self.cycles = 0

        self.observation_spaces = {
            "sender": spaces.Discrete(1),  # Sender observes no specific information
            "receiver": spaces.Discrete(self.num_signals + 1)  # Receiver observes signal or no signal
        }
        self.action_spaces = {
            "sender": self.sender.action_space,
            "receiver": self.receiver.action_space,
        }

    def observe(self, agent):
        if agent == "sender":
            return 0  # Sender observes no specific information
        elif agent == "receiver":
            if self.signal is None:
                return self.num_signals  # Indicate no signal has been received yet
            else:
                return self.signal  # Return the sent signal for the receiver to observe
        else:
            raise ValueError(f"Unknown agent: {agent}")

    def reset(self, seed=None, options=None):
        self.agents = self.possible_agents[:]
        self.rewards = {agent: 0 for agent in self.agents}
        self._cumulative_rewards = {agent: 0 for agent in self.agents}
        self.terminations = {agent: False for agent in self.agents}
        self.truncations = {agent: False for agent in self.agents}
        self.infos = {agent: {} for agent in self.agents}
        self.state = np.random.randint(self.num_states)
        self.signal = None
        self.cycles = 0
        self._agent_selector = agent_selector(self.agents)
        self.agent_selection = self._agent_selector.next()
        return {agent: self.observe(agent) for agent in self.agents}

    def step(self, action):
        if self.terminations["sender"] or self.terminations["receiver"]:
            return

        current_agent = self.agent_selection

        if current_agent == "sender":
            self.signal = action
            print(f"Sender sent signal: {self.signal}, State: {self.state}") # Debug print
            self.agent_selection = self._agent_selector.next()
        elif current_agent == "receiver":
            guess = action
            print(f"Receiver guessed: {guess}, State: {self.state}, Signal: {self.signal}") # Debug print
            if guess == self.state:
                reward = 1
            else:
                reward = 0
            self.rewards["sender"] = reward
            self.rewards["receiver"] = reward

            if self._agent_selector.is_last():
                self.cycles += 1
                if self.cycles >= self.max_cycles:
                    for agent in self.agents:
                        self.truncations[agent] = True
                self.state = np.random.randint(self.num_states)
                self._agent_selector.reinit(self.agents)
            else:
                self.agent_selection = self._agent_selector.next()

        self._clear_rewards()

    def action_space(self, agent):
        if agent == "sender":
            return self.sender.action_space
        elif agent == "receiver":
            return self.receiver.action_space
        else:
            return None #Or raise an exception if you prefer



def env(**kwargs):
    env = LewisSignalingEnv(**kwargs)
    if pettingzoo.__version__ >= "1.18.1":
        env = wrappers.OrderEnforcingWrapper(env)
    else:
        env = wrappers.order_enforcing(env)
    env = wrappers.AssertOutOfBoundsWrapper(env)
    return env

# --- Main execution in the notebook ---
num_episodes = 10
total_rewards = {"sender": 0, "receiver": 0}

env_instance = env(num_signals=2, num_states=2)

for episode in range(num_episodes):
    observations = env_instance.reset()
    print(f"Starting episode {episode+1}, New State: {env_instance.state}")
    for agent in env_instance.agent_iter():
        observation, reward, termination, truncation, info = env_instance.last()
        if termination or truncation:
            break
        if agent == "sender":
            action = env_instance.sender.act(observation)
        elif agent == "receiver":
            action = env_instance.receiver.act(observation)
        env_instance.step(action)
        observation, reward, termination, truncation, info = env_instance.last() # Get reward AFTER step
        print(f"*** -- Agent: {agent}, Reward: {reward}")
        total_rewards[agent] += reward # Accumulate reward from the step
print(total_rewards)
mean_rewards = {agent: total_rewards[agent] / num_episodes for agent in total_rewards}
print(f"Mean rewards over {num_episodes} episodes:")
print(f"Sender: {mean_rewards['sender']}")
print(f"Receiver: {mean_rewards['receiver']}")

Starting episode 1, New State: <bound method BaseWrapper.state of <pettingzoo.utils.wrappers.assert_out_of_bounds.AssertOutOfBoundsWrapper object at 0x762a71892fb0>>
Sender sent signal: 0, State: 0
*** -- Agent: sender, Reward: 0
Receiver guessed: 0, State: 0, Signal: 0
*** -- Agent: receiver, Reward: 0
Receiver guessed: 0, State: 0, Signal: 0
*** -- Agent: receiver, Reward: 0
Sender sent signal: 0, State: 0
*** -- Agent: sender, Reward: 0
Receiver guessed: 0, State: 0, Signal: 0
*** -- Agent: receiver, Reward: 0
Receiver guessed: 0, State: 1, Signal: 0
*** -- Agent: receiver, Reward: 0
Sender sent signal: 0, State: 1
*** -- Agent: sender, Reward: 0
Receiver guessed: 0, State: 1, Signal: 0
*** -- Agent: receiver, Reward: 0
Receiver guessed: 0, State: 0, Signal: 0
*** -- Agent: receiver, Reward: 0
Sender sent signal: 1, State: 0
*** -- Agent: sender, Reward: 0
Receiver guessed: 1, State: 0, Signal: 1
*** -- Agent: receiver, Reward: 0
Receiver guessed: 1, State: 1, Signal: 1
*** -- Agent

Sender sent signal: 0, State: 0
*** -- Agent: sender, Reward: 0
Receiver guessed: 0, State: 0, Signal: 0
*** -- Agent: receiver, Reward: 0
Receiver guessed: 0, State: 1, Signal: 0
*** -- Agent: receiver, Reward: 0
Sender sent signal: 1, State: 1
*** -- Agent: sender, Reward: 0
Receiver guessed: 1, State: 1, Signal: 1
*** -- Agent: receiver, Reward: 0
Receiver guessed: 1, State: 0, Signal: 1
*** -- Agent: receiver, Reward: 0
Sender sent signal: 1, State: 0
*** -- Agent: sender, Reward: 0
Receiver guessed: 1, State: 0, Signal: 1
*** -- Agent: receiver, Reward: 0
Receiver guessed: 1, State: 0, Signal: 1
*** -- Agent: receiver, Reward: 0
Sender sent signal: 0, State: 0
*** -- Agent: sender, Reward: 0
Receiver guessed: 0, State: 0, Signal: 0
*** -- Agent: receiver, Reward: 0
Receiver guessed: 0, State: 1, Signal: 0
*** -- Agent: receiver, Reward: 0
Sender sent signal: 0, State: 1
*** -- Agent: sender, Reward: 0
Receiver guessed: 0, State: 1, Signal: 0
*** -- Agent: receiver, Reward: 0
Recei