# Setup

In [1]:
!git clone https://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@github.com/IgnacioOQ/RL_Signaling

Cloning into 'RL_Signaling'...
remote: Enumerating objects: 639, done.[K
remote: Counting objects: 100% (31/31), done.[K
remote: Compressing objects: 100% (25/25), done.[K
remote: Total 639 (delta 10), reused 23 (delta 6), pack-reused 608 (from 1)[K
Receiving objects: 100% (639/639), 35.67 MiB | 20.78 MiB/s, done.
Resolving deltas: 100% (380/380), done.


In [2]:
%cd RL_Signaling

/content/RL_Signaling


In [3]:
from imports import *
from utils import *
from agents import UrnAgent, QLearningAgent, TDLearningAgent
from environment import NetMultiAgentEnv, TempNetMultiAgentEnv
from simulation_function import simulation_function, temp_simulation_function

from joblib import Parallel, delayed, cpu_count
import multiprocessing
from datetime import datetime


In [4]:
# Decide where to put the files and do the working
from google.colab import drive
drive.mount('/content/drive')

dump_path = '/content/drive/My Drive/Colab Projects/Python ABMs/Communication/'
print("Current Directory:", dump_path)

Mounted at /content/drive
Current Directory: /content/drive/My Drive/Colab Projects/Python ABMs/Communication/


# Cannonical Model

- World States: Two binary variables X, Y
- agents_observed_variables = {0:[0],1:[1]}
- Random Cannonical Games
- n_features = 2
- n_signaling_actions = 2
- n_final_actions = 4

In [None]:
add_data = False
n_iterations = 10

## Urn Agent

In [None]:
simulate=False
if simulate:
  add_data = False
  n_iterations = 10000

  # Report available CPU cores
  n_cores = cpu_count()
  print(f"Using all available CPU cores: {n_cores}")

  # Define column names
  column_names = [
      'iteration', 'n_signaling_actions', 'n_final_actions', 'full_information', 'with_signals',
      'Agent_0_Initial_NMI', 'Agent_0_NMI', 'Agent_0_avg_reward', 'Agent_0_final_reward',
      'Agent_1_Initial_NMI', 'Agent_1_NMI', 'Agent_1_avg_reward', 'Agent_1_final_reward'
  ]

  # Simulation parameters
  n_episodes = 10000
  n_agents = 2
  n_features = 2
  n_signaling_actions = 2
  n_final_actions = 4

  def run_single_case(iteration, full_info, with_signals, game_dicts, obs_vars, graph):
      env = NetMultiAgentEnv(
          n_agents=n_agents,
          n_features=n_features,
          n_signaling_actions=n_signaling_actions,
          n_final_actions=n_final_actions,
          full_information=full_info,
          game_dicts=game_dicts,
          observed_variables=obs_vars,
          agent_type=UrnAgent,
          initialize=False,
          graph=graph
      )

      results = [iteration, n_signaling_actions, n_final_actions, full_info, with_signals]

      signal_usage, rewards_history, signal_information_history, _, _ = simulation_function(
          n_agents=n_agents,
          n_features=n_features,
          n_signaling_actions=n_signaling_actions,
          n_final_actions=n_final_actions,
          n_episodes=n_episodes,
          with_signals=with_signals,
          plot=False,
          env=env,
          verbose=False
      )

      for agent_id in range(n_agents):
          info_hist = signal_information_history[agent_id]
          reward_hist = rewards_history[agent_id]
          results.extend([
              np.mean(info_hist[:10]),
              np.mean(info_hist[-100:]),
              np.mean(reward_hist),
              np.mean(reward_hist[-100:])
          ])

      return results

  def run_all_cases_for_iteration(iteration):
      game_dicts = {i: create_random_canonical_game(n_features, n_final_actions, n=1, m=0) for i in range(n_agents)}
      obs_vars = {0: [0], 1: [1]}
      G = nx.DiGraph()
      G.add_edges_from([(0, 1), (1, 0)])

      cases = [(False, False), (False, True), (True, False), (True, True)]
      return [run_single_case(iteration, fi, ws, game_dicts, obs_vars, G) for fi, ws in cases]

  # Run in parallel
  all_results = Parallel(n_jobs=n_cores)(
      delayed(run_all_cases_for_iteration)(i) for i in tqdm(range(n_iterations), desc="Running UrnAgent simulations")
  )

  # Flatten results and create DataFrame
  flat_results = [row for group in all_results for row in group]
  results_df = pd.DataFrame(flat_results, columns=column_names)

  # Append or save
  output_file = dump_path+'urnagent_results_cannonical.csv'
  if add_data:
      old_results_df = pd.read_csv(output_file)
      total_results_df = pd.concat([old_results_df, results_df], ignore_index=True)
  else:
      total_results_df = results_df

  total_results_df.to_csv(output_file, index=False)
  print(f"Total rows in saved file: {len(total_results_df)}")

Using all available CPU cores: 8


Running UrnAgent simulations: 100%|██████████| 10000/10000 [8:24:54<00:00,  3.03s/it]


Total rows in saved file: 40000


## Q-Learning

In [None]:
simulate=False
if simulate:
  add_data = False
  n_iterations = 10000

  # Print number of available CPU cores
  n_cores = cpu_count()
  print(f"Using all available CPU cores: {n_cores}")

  # Define column names
  column_names = [
      'iteration', 'n_signaling_actions', 'n_final_actions', 'full_information', 'with_signals',
      'Agent_0_Initial_NMI', 'Agent_0_NMI', 'Agent_0_avg_reward', 'Agent_0_final_reward',
      'Agent_1_Initial_NMI', 'Agent_1_NMI', 'Agent_1_avg_reward', 'Agent_1_final_reward'
  ]

  n_episodes = 10000
  n_agents = 2
  n_features = 2
  n_signaling_actions = 2
  n_final_actions = 4

  def run_single_case(iteration, full_info, with_signals, game_dicts, obs_vars, graph):
      env = NetMultiAgentEnv(
          n_agents=n_agents,
          n_features=n_features,
          n_signaling_actions=n_signaling_actions,
          n_final_actions=n_final_actions,
          full_information=full_info,
          game_dicts=game_dicts,
          observed_variables=obs_vars,
          agent_type=QLearningAgent,
          initialize=False,
          graph=graph
      )

      env.agents = [
          QLearningAgent(
              n_signaling_actions=n_signaling_actions,
              n_final_actions=n_final_actions,
              exploration_rate=0.9976461429984532,
              exploration_decay=0.9805828477324336,
              min_exploration_rate=0.002583111144750034,
              initialize=False
          ) for _ in range(n_agents)
      ]

      results = [iteration, n_signaling_actions, n_final_actions, full_info, with_signals]

      signal_usage, rewards_history, signal_information_history, *_ = simulation_function(
          n_agents=n_agents,
          n_features=n_features,
          n_signaling_actions=n_signaling_actions,
          n_final_actions=n_final_actions,
          n_episodes=n_episodes,
          with_signals=with_signals,
          plot=False,
          env=env,
          verbose=False
      )

      for agent_id in range(n_agents):
          info_hist = signal_information_history[agent_id]
          reward_hist = rewards_history[agent_id]
          results.extend([
              np.mean(info_hist[:10]),
              np.mean(info_hist[-100:]),
              np.mean(reward_hist),
              np.mean(reward_hist[-100:])
          ])

      return results

  def run_all_cases_for_iteration(iteration):
      # Prepare shared data for all 4 cases
      game_dicts = {i: create_random_canonical_game(n_features, n_final_actions) for i in range(n_agents)}
      obs_vars = {0: [0], 1: [1]}
      G = nx.DiGraph()
      G.add_edges_from([(0, 1), (1, 0)])

      cases = [(False, False), (False, True), (True, False), (True, True)]
      return [run_single_case(iteration, fi, ws, game_dicts, obs_vars, G) for fi, ws in cases]

  # Run simulations in parallel using all available cores
  all_results = Parallel(n_jobs=n_cores)(
      delayed(run_all_cases_for_iteration)(i) for i in tqdm(range(n_iterations), desc="Running in parallel")
  )

  # Flatten the list of lists
  flat_results = [row for group in all_results for row in group]
  results_df = pd.DataFrame(flat_results, columns=column_names)

  # Append or save
  output_file = dump_path+'qlearning_results_cannonical.csv'
  if add_data:
      old_results_df = pd.read_csv(output_file)
      total_results_df = pd.concat([old_results_df, results_df], ignore_index=True)
  else:
      total_results_df = results_df

  total_results_df.to_csv(output_file, index=False)
  print(f"Total rows: {len(total_results_df)}")

Using all available CPU cores: 8


Running in parallel: 100%|██████████| 10000/10000 [5:39:38<00:00,  2.04s/it]


Total rows: 40000


## TD Agent

In [None]:
n_iterations = 100
# Report CPU info
n_cores = cpu_count()
print(f"Using all available CPU cores: {n_cores}")

# Define output columns
column_names = [
    'iteration', 'n_signaling_actions', 'n_final_actions', 'full_information', 'with_signals',
    'Agent_0_Initial_NMI', 'Agent_0_NMI', 'Agent_0_avg_reward', 'Agent_0_final_reward',
    'Agent_1_Initial_NMI', 'Agent_1_NMI', 'Agent_1_avg_reward', 'Agent_1_final_reward'
]

# Global params
n_episodes = 10000
n_agents = 2
n_features = 2
n_signaling_actions = 2
n_final_actions = 4

def run_single_case(iteration, full_info, with_signals, game_dicts, obs_vars, graph):
    env = TempNetMultiAgentEnv(
        n_agents=n_agents,
        n_features=n_features,
        n_signaling_actions=n_signaling_actions,
        n_final_actions=n_final_actions,
        learning_rate=0.1,
        exploration_rate=1.0,
        exploration_decay=0.995,
        min_exploration_rate=0.001,
        full_information=full_info,
        game_dicts=game_dicts,
        observed_variables=obs_vars,
        agent_type=TDLearningAgent,
        graph=graph
    )

    env.agents = [
            TDLearningAgent(
                n_actions=env.max_actions,
                learning_rate=0.1,  # Fixed learning rate
                exploration_rate=0.6733441159316643,
                exploration_decay=0.9865577543877726,
                min_exploration_rate=0.0013741277073265228,
                gamma=0.9729821735549989
            ) for _ in range(n_agents)
        ]

    results = [iteration, n_signaling_actions, n_final_actions, full_info, with_signals]

    signal_usage, rewards_history, signal_information_history, _, _ = temp_simulation_function(
        n_agents=n_agents,
        n_features=n_features,
        n_signaling_actions=n_signaling_actions,
        n_final_actions=n_final_actions,
        n_episodes=n_episodes,
        with_signals=with_signals,
        plot=False,
        env=env,
        verbose=False
    )

    for agent_id in range(n_agents):
        info_hist = signal_information_history[agent_id]
        reward_hist = rewards_history[agent_id]
        results.extend([
            np.mean(info_hist[:10]),
            np.mean(info_hist[-100:]),
            np.mean(reward_hist),
            np.mean(reward_hist[-100:])
        ])

    return results

def run_all_cases_for_iteration(iteration):
    # Create fresh game and graph for this iteration
    game_dicts = {i: create_random_canonical_game(n_features, n_final_actions, n=1, m=0) for i in range(n_agents)}
    obs_vars = {0: [0], 1: [1]}
    G = nx.DiGraph()
    G.add_edges_from([(0, 1), (1, 0)])

    cases = [(False, False), (False, True), (True, False), (True, True)]
    return [run_single_case(iteration, fi, ws, game_dicts, obs_vars, G) for fi, ws in cases]

# Run all in parallel
all_results = Parallel(n_jobs=n_cores)(
    delayed(run_all_cases_for_iteration)(i) for i in tqdm(range(n_iterations), desc="Running parallel simulations")
)

# Flatten results
flat_results = [row for group in all_results for row in group]
results_df = pd.DataFrame(flat_results, columns=column_names)

# Save or append to file
output_file = dump_path+'td_learning_results_cannonical.csv'
if add_data:
    old_results_df = pd.read_csv(output_file)
    total_results_df = pd.concat([old_results_df, results_df], ignore_index=True)
else:
    total_results_df = results_df

total_results_df.to_csv(output_file, index=False)
print(f"Total rows in saved file: {len(total_results_df)}")

# More Complex Model

- World States: Three binary variables X, Y, Z
- agents_observed_variables = {0:[0,1],1:[1,2]}
- n_features = 3 #parameters['n_features']
- n_signaling_actions = 4 #parameters['n_signaling_actions']
- n_final_actions = 8 #parameters['n_final_actions']
- Random Games (possibly non-cannonical)

## Urn Agent

In [None]:
simulate=True
if simulate:
  add_data = False
  n_iterations = 10000

  # Report available CPU cores
  n_cores = cpu_count()
  print(f"Using all available CPU cores: {n_cores}")

  # Define column names
  column_names = [
      'iteration', 'n_signaling_actions', 'n_final_actions', 'full_information', 'with_signals',
      'Agent_0_Initial_NMI', 'Agent_0_NMI', 'Agent_0_avg_reward', 'Agent_0_final_reward',
      'Agent_1_Initial_NMI', 'Agent_1_NMI', 'Agent_1_avg_reward', 'Agent_1_final_reward'
  ]

  # Simulation parameters
  n_episodes = 10000
  n_agents = 2
  n_features = 3
  n_signaling_actions = 4
  n_final_actions = 8

  def run_single_case(iteration, full_info, with_signals, game_dicts, obs_vars, graph):
      env = NetMultiAgentEnv(
          n_agents=n_agents,
          n_features=n_features,
          n_signaling_actions=n_signaling_actions,
          n_final_actions=n_final_actions,
          full_information=full_info,
          game_dicts=game_dicts,
          observed_variables=obs_vars,
          agent_type=UrnAgent,
          initialize=False,
          graph=graph
      )

      results = [iteration, n_signaling_actions, n_final_actions, full_info, with_signals]

      signal_usage, rewards_history, signal_information_history, _, _ = simulation_function(
          n_agents=n_agents,
          n_features=n_features,
          n_signaling_actions=n_signaling_actions,
          n_final_actions=n_final_actions,
          n_episodes=n_episodes,
          with_signals=with_signals,
          plot=False,
          env=env,
          verbose=False
      )

      for agent_id in range(n_agents):
          info_hist = signal_information_history[agent_id]
          reward_hist = rewards_history[agent_id]
          results.extend([
              np.mean(info_hist[:10]),
              np.mean(info_hist[-100:]),
              np.mean(reward_hist),
              np.mean(reward_hist[-100:])
          ])

      return results

  def run_all_cases_for_iteration(iteration):
      game_dicts = {i: create_random_canonical_game(n_features, n_final_actions, n=1, m=0) for i in range(n_agents)}
      obs_vars = {0:[0,1],1:[1,2]}
      G = nx.DiGraph()
      G.add_edges_from([(0, 1), (1, 0)])

      cases = [(False, False), (False, True), (True, False), (True, True)]
      return [run_single_case(iteration, fi, ws, game_dicts, obs_vars, G) for fi, ws in cases]

  # Run in parallel
  all_results = Parallel(n_jobs=n_cores)(
      delayed(run_all_cases_for_iteration)(i) for i in tqdm(range(n_iterations), desc="Running UrnAgent simulations")
  )

  # Flatten results and create DataFrame
  flat_results = [row for group in all_results for row in group]
  results_df = pd.DataFrame(flat_results, columns=column_names)

  # Append or save
  output_file = dump_path+'urnagent_results_complex.csv'
  if add_data:
      old_results_df = pd.read_csv(output_file)
      total_results_df = pd.concat([old_results_df, results_df], ignore_index=True)
  else:
      total_results_df = results_df

  total_results_df.to_csv(output_file, index=False)
  print(f"Total rows in saved file: {len(total_results_df)}")

Using all available CPU cores: 8


Running UrnAgent simulations:   2%|▏         | 152/10000 [14:17<15:59:21,  5.84s/it]

## Q-Learning

In [None]:
# n_iterations = 10
# Print number of available CPU cores
n_cores = cpu_count()
print(f"Using all available CPU cores: {n_cores}")

# Define column names
column_names = [
    'iteration', 'n_signaling_actions', 'n_final_actions', 'full_information', 'with_signals',
    'Agent_0_Initial_NMI', 'Agent_0_NMI', 'Agent_0_avg_reward', 'Agent_0_final_reward',
    'Agent_1_Initial_NMI', 'Agent_1_NMI', 'Agent_1_avg_reward', 'Agent_1_final_reward'
]

n_episodes = 10000
n_agents = 2
n_features = 3
n_signaling_actions = 4
n_final_actions = 8

def run_single_case(iteration, full_info, with_signals, game_dicts, obs_vars, graph):
    env = NetMultiAgentEnv(
        n_agents=n_agents,
        n_features=n_features,
        n_signaling_actions=n_signaling_actions,
        n_final_actions=n_final_actions,
        full_information=full_info,
        game_dicts=game_dicts,
        observed_variables=obs_vars,
        agent_type=QLearningAgent,
        initialize=False,
        graph=graph
    )

    env.agents = [
            QLearningAgent(
                n_signaling_actions=n_signaling_actions,
                n_final_actions=n_final_actions,
                exploration_rate=1,
                exploration_decay=0.995,
                min_exploration_rate=0.0001,
                initialize=False
            ) for _ in range(n_agents)
        ]

    results = [iteration, n_signaling_actions, n_final_actions, full_info, with_signals]

    signal_usage, rewards_history, signal_information_history, *_ = simulation_function(
        n_agents=n_agents,
        n_features=n_features,
        n_signaling_actions=n_signaling_actions,
        n_final_actions=n_final_actions,
        n_episodes=n_episodes,
        with_signals=with_signals,
        plot=False,
        env=env,
        verbose=False
    )

    for agent_id in range(n_agents):
        info_hist = signal_information_history[agent_id]
        reward_hist = rewards_history[agent_id]
        results.extend([
            np.mean(info_hist[:10]),
            np.mean(info_hist[-100:]),
            np.mean(reward_hist),
            np.mean(reward_hist[-100:])
        ])

    return results

def run_all_cases_for_iteration(iteration):
    # Prepare shared data for all 4 cases
    game_dicts = {i: create_random_canonical_game(n_features, n_final_actions) for i in range(n_agents)}
    obs_vars = {0:[0,1],1:[1,2]}
    G = nx.DiGraph()
    G.add_edges_from([(0, 1), (1, 0)])

    cases = [(False, False), (False, True), (True, False), (True, True)]
    return [run_single_case(iteration, fi, ws, game_dicts, obs_vars, G) for fi, ws in cases]

# Run simulations in parallel using all available cores
all_results = Parallel(n_jobs=n_cores)(
    delayed(run_all_cases_for_iteration)(i) for i in tqdm(range(n_iterations), desc="Running in parallel")
)

# Flatten the list of lists
flat_results = [row for group in all_results for row in group]
results_df = pd.DataFrame(flat_results, columns=column_names)

# Append or save
output_file = dump_path+'qlearning_results_complex.csv'
if add_data:
    old_results_df = pd.read_csv(output_file)
    total_results_df = pd.concat([old_results_df, results_df], ignore_index=True)
else:
    total_results_df = results_df

total_results_df.to_csv(output_file, index=False)
print(f"Total rows: {len(total_results_df)}")

Using all available CPU cores: 8


Running in parallel: 100%|██████████| 10/10 [00:00<00:00, 7113.81it/s]


Total rows: 40


## TD Agent

In [None]:
# n_iterations = 10
# Report CPU info
n_cores = cpu_count()
print(f"Using all available CPU cores: {n_cores}")

# Define output columns
column_names = [
    'iteration', 'n_signaling_actions', 'n_final_actions', 'full_information', 'with_signals',
    'Agent_0_Initial_NMI', 'Agent_0_NMI', 'Agent_0_avg_reward', 'Agent_0_final_reward',
    'Agent_1_Initial_NMI', 'Agent_1_NMI', 'Agent_1_avg_reward', 'Agent_1_final_reward'
]

# Global params
n_episodes = 10000
n_agents = 2
n_features = 3
n_signaling_actions = 4
n_final_actions = 8

def run_single_case(iteration, full_info, with_signals, game_dicts, obs_vars, graph):
    env = TempNetMultiAgentEnv(
        n_agents=n_agents,
        n_features=n_features,
        n_signaling_actions=n_signaling_actions,
        n_final_actions=n_final_actions,
        learning_rate=0.1,
        exploration_rate=1.0,
        exploration_decay=0.995,
        min_exploration_rate=0.001,
        full_information=full_info,
        game_dicts=game_dicts,
        observed_variables=obs_vars,
        agent_type=TDLearningAgent,
        graph=graph
    )

    env.agents = [
            TDLearningAgent(
                n_actions=env.max_actions,
                learning_rate=0.1,  # Fixed learning rate
                exploration_rate=1,
                exploration_decay=0.995,
                min_exploration_rate=0.0001
            ) for _ in range(n_agents)
        ]

    results = [iteration, n_signaling_actions, n_final_actions, full_info, with_signals]

    signal_usage, rewards_history, signal_information_history, _, _ = temp_simulation_function(
        n_agents=n_agents,
        n_features=n_features,
        n_signaling_actions=n_signaling_actions,
        n_final_actions=n_final_actions,
        n_episodes=n_episodes,
        with_signals=with_signals,
        plot=False,
        env=env,
        verbose=False
    )

    for agent_id in range(n_agents):
        info_hist = signal_information_history[agent_id]
        reward_hist = rewards_history[agent_id]
        results.extend([
            np.mean(info_hist[:10]),
            np.mean(info_hist[-100:]),
            np.mean(reward_hist),
            np.mean(reward_hist[-100:])
        ])

    return results

def run_all_cases_for_iteration(iteration):
    # Create fresh game and graph for this iteration
    game_dicts = {i: create_random_canonical_game(n_features, n_final_actions, n=1, m=0) for i in range(n_agents)}
    obs_vars = {0:[0,1],1:[1,2]}
    G = nx.DiGraph()
    G.add_edges_from([(0, 1), (1, 0)])

    cases = [(False, False), (False, True), (True, False), (True, True)]
    return [run_single_case(iteration, fi, ws, game_dicts, obs_vars, G) for fi, ws in cases]

# Run all in parallel
all_results = Parallel(n_jobs=n_cores)(
    delayed(run_all_cases_for_iteration)(i) for i in tqdm(range(n_iterations), desc="Running parallel simulations")
)

# Flatten results
flat_results = [row for group in all_results for row in group]
results_df = pd.DataFrame(flat_results, columns=column_names)

# Save or append to file
output_file = dump_path+'td_learning_results_complex.csv'
if add_data:
    old_results_df = pd.read_csv(output_file)
    total_results_df = pd.concat([old_results_df, results_df], ignore_index=True)
else:
    total_results_df = results_df

total_results_df.to_csv(output_file, index=False)
print(f"Total rows in saved file: {len(total_results_df)}")

Using all available CPU cores: 8


Running parallel simulations: 100%|██████████| 10/10 [00:00<00:00, 6440.88it/s]


Total rows in saved file: 40
