# Setup

In [None]:
from imports import *
from utils import *
from agents import QLearningAgent
from environment import NetMultiAgentEnv
from simulation_function import simulation_function

# Cannonical Model

- World States: Two binary variables X, Y
- agents_observed_variables = {0:[0],1:[1]}
- Random Cannonical Games
- n_features = 2 
- n_signaling_actions = 2 
- n_final_actions = 4 

In [None]:
# Define column names
column_names = ['iteration','n_signaling_actions','n_final_actions','full_information','with_signals',
                'Agent_0_Initial_NMI','Agent_0_NMI', 'Agent_0_avg_reward', 'Agent_0_final_reward',
                'Agent_1_Initial_NMI','Agent_1_NMI', 'Agent_1_avg_reward','Agent_1_final_reward']

# Create an empty DataFrame with the specified column names
results_df = pd.DataFrame(columns=column_names)

n_episodes = 10000
n_iterations = 980

#for iterations in tqdm(range(n_iterations), desc="Processing"):
for iteration in tqdm(range(n_iterations), desc="Processing processing simulations..."):
    n_agents = 2 
    n_features = 2 
    n_signaling_actions = 2 
    n_final_actions = 4 

    # We get the same dictionary of games for each of the four potential setups/cases
    # but agents have play distinct independent games at each iteration
    randomcannonical_game = {}
    for i in range(n_agents):
        randomcannonical_game[i] = create_random_canonical_game(n_features,n_final_actions)

    # Similarly for each of the four potential setups we keep fixed which variables are observed by each agent
    # a dictionary of lists of the observed indexed variables
    agents_observed_variables = {0:[0],1:[1]}

    # Graph
    G = nx.DiGraph()
    G.add_nodes_from([0,1])  # Adds multiple nodes at once
    G.add_edges_from([(0, 1), (1, 0)])  # Adds multiple edges


    # CASE 1 , self.signal_information_history
    with_signals,full_information = False, False
    env = NetMultiAgentEnv(n_agents=n_agents, n_features=n_features,
                  n_signaling_actions=n_signaling_actions,
                  n_final_actions=n_final_actions,
                  full_information = full_information,
                  game_dicts=randomcannonical_game,
                  observed_variables = agents_observed_variables,
                  agent_type=QLearningAgent,
                  initialize = False,
                  graph=G)
    results = [iteration,n_signaling_actions,n_final_actions,full_information,with_signals]
    signal_usage, rewards_history, signal_information_history, urn_histories,nature_history = simulation_function(n_agents=n_agents,
                      n_features=n_features, n_signaling_actions=n_signaling_actions, n_final_actions=n_final_actions,
                      n_episodes=n_episodes, with_signals = with_signals,
                      plot=False,env=env, verbose=False)
    
    # mutual_info_0, normalized_mutual_info_0 = compute_mutual_information(signal_usage[0])
    # mutual_info_1, normalized_mutual_info_1 = compute_mutual_information(signal_usage[1])
    output = [np.mean(signal_information_history[0][:10]),np.mean(signal_information_history[0][-100:]),
                np.mean(rewards_history[0]),np.mean(rewards_history[0][-100:]),
                np.mean(signal_information_history[1][:10]),np.mean(signal_information_history[1][-100:]),
                np.mean(rewards_history[1]),np.mean(rewards_history[1][-100:])]
    results+=output
    # Adding the list as a row using loc
    results_df.loc[len(results_df)] = results

    # CASE 2
    with_signals,full_information = True, False
    env = NetMultiAgentEnv(n_agents=n_agents, n_features=n_features,
                  n_signaling_actions=n_signaling_actions,
                  n_final_actions=n_final_actions,
                  full_information = full_information,
                  game_dicts=randomcannonical_game,
                  observed_variables = agents_observed_variables,
                  agent_type=QLearningAgent,
                  initialize = False,
                  graph=G)
    results = [iteration,n_signaling_actions,n_final_actions,full_information,with_signals]
    signal_usage, rewards_history, signal_information_history, urn_histories,nature_history = simulation_function(n_agents=n_agents,
                      n_features=n_features, n_signaling_actions=n_signaling_actions, n_final_actions=n_final_actions,
                      n_episodes=n_episodes, with_signals = with_signals,
                      plot=False,env=env, verbose=False)

    # mutual_info_0, normalized_mutual_info_0 = compute_mutual_information(signal_usage[0])
    # mutual_info_1, normalized_mutual_info_1 = compute_mutual_information(signal_usage[1])
    output = [np.mean(signal_information_history[0][:10]),np.mean(signal_information_history[0][-100:]),
                np.mean(rewards_history[0]),np.mean(rewards_history[0][-100:]),
                np.mean(signal_information_history[1][:10]),np.mean(signal_information_history[1][-100:]),
                np.mean(rewards_history[1]),np.mean(rewards_history[1][-100:])]
    results+=output
    # Adding the list as a row using loc
    results_df.loc[len(results_df)] = results

    # CASE 3
    with_signals,full_information = False, True
    env = NetMultiAgentEnv(n_agents=n_agents, n_features=n_features,
                  n_signaling_actions=n_signaling_actions,
                  n_final_actions=n_final_actions,
                  full_information = full_information,
                  game_dicts=randomcannonical_game,
                  observed_variables = agents_observed_variables,
                  agent_type=QLearningAgent,
                  initialize = False,
                  graph=G)
    results = [iteration,n_signaling_actions,n_final_actions,full_information,with_signals]
    signal_usage, rewards_history, signal_information_history, urn_histories,nature_history = simulation_function(n_agents=n_agents,
                      n_features=n_features, n_signaling_actions=n_signaling_actions, n_final_actions=n_final_actions,
                      n_episodes=n_episodes, with_signals = with_signals,
                      plot=False,env=env, verbose=False)

    output = [np.mean(signal_information_history[0][:10]),np.mean(signal_information_history[0][-100:]),
                np.mean(rewards_history[0]),np.mean(rewards_history[0][-100:]),
                np.mean(signal_information_history[1][:10]),np.mean(signal_information_history[1][-100:]),
                np.mean(rewards_history[1]),np.mean(rewards_history[1][-100:])]
    results+=output
    # Adding the list as a row using loc
    results_df.loc[len(results_df)] = results

    # CASE 4
    with_signals,full_information = True, True
    env = NetMultiAgentEnv(n_agents=n_agents, n_features=n_features,
                  n_signaling_actions=n_signaling_actions,
                  n_final_actions=n_final_actions,
                  full_information = full_information,
                  game_dicts=randomcannonical_game,
                  observed_variables = agents_observed_variables,
                  agent_type=QLearningAgent,
                  initialize = False,
                  graph=G)
    results = [iteration,n_signaling_actions,n_final_actions,full_information,with_signals]
    signal_usage, rewards_history, signal_information_history, urn_histories,nature_history = simulation_function(n_agents=n_agents,
                      n_features=n_features, n_signaling_actions=n_signaling_actions, n_final_actions=n_final_actions,
                      n_episodes=n_episodes, with_signals = with_signals,
                      plot=False,env=env, verbose=False)

    # mutual_info_0, normalized_mutual_info_0 = compute_mutual_information(signal_usage[0])
    # mutual_info_1, normalized_mutual_info_1 = compute_mutual_information(signal_usage[1])
    output = [np.mean(signal_information_history[0][:10]),np.mean(signal_information_history[0][-100:]),
                np.mean(rewards_history[0]),np.mean(rewards_history[0][-100:]),
                np.mean(signal_information_history[1][:10]),np.mean(signal_information_history[1][-100:]),
                np.mean(rewards_history[1]),np.mean(rewards_history[1][-100:])]
    results+=output
    # Adding the list as a row using loc
    results_df.loc[len(results_df)] = results

add_data = True
if add_data:
    old_results_df = pd.read_csv('basic_qlearning_results_cannonical_bigexplore.csv')
    total_results_df = pd.concat([old_results_df, results_df], ignore_index=True)
total_results_df.to_csv('basic_qlearning_results_cannonical_bigexplore.csv', index=False)
len(total_results_df)

# More Complex Model

- World States: Three binary variables X, Y, Z
- agents_observed_variables = {0:[0,1],1:[1,2]}
- n_features = 3 #parameters['n_features']
- n_signaling_actions = 4 #parameters['n_signaling_actions']
- n_final_actions = 8 #parameters['n_final_actions']
- Random Games (possibly non-cannonical)

In [None]:
# Define column names
column_names = ['iteration','n_signaling_actions','n_final_actions','full_information','with_signals',
                'Agent_0_Initial_NMI','Agent_0_NMI', 'Agent_0_avg_reward', 'Agent_0_final_reward',
                'Agent_1_Initial_NMI','Agent_1_NMI', 'Agent_1_avg_reward','Agent_1_final_reward']

# Create an empty DataFrame with the specified column names
results_df = pd.DataFrame(columns=column_names)

n_episodes = 10000
n_iterations = 980

#for iterations in tqdm(range(n_iterations), desc="Processing"):
for iteration in tqdm(range(n_iterations), desc="Processing processing simulations"):
    # get the paramteres from the dictionary
    n_agents = 2 #parameters['n_agents']
    n_features = 3 #parameters['n_features']
    n_signaling_actions = 4 #parameters['n_signaling_actions']
    n_final_actions = 8 #parameters['n_final_actions']

# We get the same dictionary of games for each of the four potential setups/cases
    # but agents have play distinct independent games at each iteration
    random_game = {}
    for i in range(n_agents):
        random_game[i] = create_random_game(n_features,n_final_actions)
    # Similarly for each of the four potential setups we keep fixed which variables are observed by each agent
    # a dictionary of lists of the observed indexed variables
    agents_observed_variables = {0:[0,1],1:[1,2]}
    
    # Graph
    G = nx.DiGraph()
    G.add_nodes_from([0,1])  # Adds multiple nodes at once
    G.add_edges_from([(0, 1), (1, 0)])  # Adds multiple edges

    
    # CASE 1 , self.signal_information_history
    with_signals,full_information = False, False
    env = NetMultiAgentEnv(n_agents=n_agents, n_features=n_features,
                  n_signaling_actions=n_signaling_actions,
                  n_final_actions=n_final_actions,
                  full_information = full_information,
                  game_dicts=random_game,
                  observed_variables = agents_observed_variables,
                  agent_type=QLearningAgent,
                  initialize = False,
                  graph=G)
    results = [iteration,n_signaling_actions,n_final_actions,full_information,with_signals]
    signal_usage, rewards_history, signal_information_history, urn_histories,nature_history = simulation_function(n_agents=n_agents,
                      n_features=n_features, n_signaling_actions=n_signaling_actions, n_final_actions=n_final_actions,
                      n_episodes=n_episodes, with_signals = with_signals,
                      plot=False,env=env, verbose=False)
 
    output = [np.mean(signal_information_history[0][:10]),np.mean(signal_information_history[0][-100:]),
              np.mean(rewards_history[0]),np.mean(rewards_history[0][-100:]),
              np.mean(signal_information_history[1][:10]),np.mean(signal_information_history[1][-100:]),
              np.mean(rewards_history[1]),np.mean(rewards_history[1][-100:])]
    results+=output
    # Adding the list as a row using loc
    results_df.loc[len(results_df)] = results

    # CASE 2
    with_signals,full_information = True, False
    env = NetMultiAgentEnv(n_agents=n_agents, n_features=n_features,
                  n_signaling_actions=n_signaling_actions,
                  n_final_actions=n_final_actions,
                  full_information = full_information,
                  game_dicts=random_game,
                  observed_variables = agents_observed_variables,
                  agent_type=QLearningAgent,
                  initialize = False,
                  graph=G)
    results = [iteration,n_signaling_actions,n_final_actions,full_information,with_signals]
    signal_usage, rewards_history, signal_information_history, urn_histories,nature_history = simulation_function(n_agents=n_agents,
                      n_features=n_features, n_signaling_actions=n_signaling_actions, n_final_actions=n_final_actions,
                      n_episodes=n_episodes, with_signals = with_signals,
                      plot=False,env=env, verbose=False)

    # mutual_info_0, normalized_mutual_info_0 = compute_mutual_information(signal_usage[0])
    # mutual_info_1, normalized_mutual_info_1 = compute_mutual_information(signal_usage[1])
    output = [np.mean(signal_information_history[0][:10]),np.mean(signal_information_history[0][-100:]),
              np.mean(rewards_history[0]),np.mean(rewards_history[0][-100:]),
              np.mean(signal_information_history[1][:10]),np.mean(signal_information_history[1][-100:]),
              np.mean(rewards_history[1]),np.mean(rewards_history[1][-100:])]
    results+=output
    # Adding the list as a row using loc
    results_df.loc[len(results_df)] = results

    # CASE 3
    with_signals,full_information = False, True
    env = NetMultiAgentEnv(n_agents=n_agents, n_features=n_features,
                  n_signaling_actions=n_signaling_actions,
                  n_final_actions=n_final_actions,
                  full_information = full_information,
                  game_dicts=random_game,
                  observed_variables = agents_observed_variables,
                  agent_type=QLearningAgent,
                  initialize = False,
                  graph=G)
    results = [iteration,n_signaling_actions,n_final_actions,full_information,with_signals]
    signal_usage, rewards_history, signal_information_history, urn_histories,nature_history = simulation_function(n_agents=n_agents,
                      n_features=n_features, n_signaling_actions=n_signaling_actions, n_final_actions=n_final_actions,
                      n_episodes=n_episodes, with_signals = with_signals,
                      plot=False,env=env, verbose=False)

    # mutual_info_0, normalized_mutual_info_0 = compute_mutual_information(signal_usage[0])
    # mutual_info_1, normalized_mutual_info_1 = compute_mutual_information(signal_usage[1])
    output = [np.mean(signal_information_history[0][:10]),np.mean(signal_information_history[0][-100:]),
              np.mean(rewards_history[0]),np.mean(rewards_history[0][-100:]),
              np.mean(signal_information_history[1][:10]),np.mean(signal_information_history[1][-100:]),
              np.mean(rewards_history[1]),np.mean(rewards_history[1][-100:])]
    results+=output
    # Adding the list as a row using loc
    results_df.loc[len(results_df)] = results

    # CASE 4
    with_signals,full_information = True, True
    env = NetMultiAgentEnv(n_agents=n_agents, n_features=n_features,
                  n_signaling_actions=n_signaling_actions,
                  n_final_actions=n_final_actions,
                  full_information = full_information,
                  game_dicts=random_game,
                  observed_variables = agents_observed_variables,
                  agent_type=QLearningAgent,
                  initialize = False,
                  graph=G)
    results = [iteration,n_signaling_actions,n_final_actions,full_information,with_signals]
    signal_usage, rewards_history, signal_information_history, urn_histories,nature_history = simulation_function(n_agents=n_agents,
                      n_features=n_features, n_signaling_actions=n_signaling_actions, n_final_actions=n_final_actions,
                      n_episodes=n_episodes, with_signals = with_signals,
                      plot=False,env=env, verbose=False)

    # mutual_info_0, normalized_mutual_info_0 = compute_mutual_information(signal_usage[0])
    # mutual_info_1, normalized_mutual_info_1 = compute_mutual_information(signal_usage[1])
    output = [np.mean(signal_information_history[0][:10]),np.mean(signal_information_history[0][-100:]),
              np.mean(rewards_history[0]),np.mean(rewards_history[0][-100:]),
              np.mean(signal_information_history[1][:10]),np.mean(signal_information_history[1][-100:]),
              np.mean(rewards_history[1]),np.mean(rewards_history[1][-100:])]
    results+=output
    # Adding the list as a row using loc
    results_df.loc[len(results_df)] = results

add_data = True
if add_data:
    old_results_df = pd.read_csv('qlearning_results_bigexplore.csv')
    total_results_df = pd.concat([old_results_df, results_df], ignore_index=True)
total_results_df.to_csv('qlearning_results_bigexplore.csv', index=False)
len(total_results_df)