In [None]:
import os, sys
import pandas as pd
import torch


dir2 = os.path.abspath('')
dir1 = os.path.dirname(dir2)
dir0 = os.path.dirname(dir1)

if dir1 not in sys.path: sys.path.append(dir0)

from src.config import PPOConfig, EmbeddingStrategy
from src.experiments import ExperimentSuite
from src.utils import ExperimentUtils

import logging

logger = logging.getLogger('torchrl')
logger.setLevel(logging.WARNING)  # show only WARNING and above

def rewards_of_suite(suite, agents):
    all_df = pd.DataFrame()
    for i in agents:
        change_to_config = {"n_agents": i}
        df = suite.rollout_all_get_rewards(change_to_config)

        # Rename the reward column to a common name and add the number of agents
        reward_col = df.columns[1]  # Assumes second column is the reward
        df = df.rename(columns={reward_col: "rewards"})
        df["agents"] = i

        all_df = pd.concat([all_df, df[["experiment", "agents", "rewards"]]], ignore_index=True)

    all_df = all_df.rename(columns={"experiment": "strategy"})
    return all_df


def run_generalizability(strategies, scenario, file_name, training_agent, testing_agents, steps=200):
    my_device = torch.device("cpu")
    df_all_strategies_rollout = pd.DataFrame()
    df_all_strategies_train = pd.DataFrame()

    url = "saved_experiments" + "/" + file_name + '.csv'
    url_rollout = "saved_experiments" + "/" + file_name + '_rollout' + '.csv'

    for strategy in strategies:
            base_config_balance_5_agents = PPOConfig(
                scenario_name=scenario, max_agents=max(testing_agents), use_strategy_defaults=True, max_steps=steps, n_agents=training_agent
            )

            param_grid = {
                "strategy": [strategy],
            }
            suite = ExperimentSuite(base_config=base_config_balance_5_agents, param_grid=param_grid, name="test_all", device=my_device)
            suite.run_all_confidence(k=10, profile_once=False, update=True)


            # test on testing_agents
            df = rewards_of_suite(suite, testing_agents)
            df_all_strategies_rollout = df_all_strategies_rollout.append(df)

            suite_utils = ExperimentUtils(experiment_suite=suite)
            df_all_strategies_train = df_all_strategies_train.append(suite_utils.df)

            df_all_strategies_rollout.to_csv(url_rollout, index=False)
            df_all_strategies_train.to_csv(url, index=False)

In [None]:
file_name='2_balance_train_low_roll_higher'
training_agent = 5
strategies = [
    EmbeddingStrategy.CONCAT,
    EmbeddingStrategy.MLP,
    EmbeddingStrategy.MLP_LOCAL,
    EmbeddingStrategy.MLP_GLOBAL,
    EmbeddingStrategy.GRAPH_SAGE,
    EmbeddingStrategy.GRAPH_GAT,
    EmbeddingStrategy.GRAPH_GAT_v2,
    EmbeddingStrategy.SET_TRANSFORMER_INV,
    EmbeddingStrategy.SAB_TRANSFORMER,
    EmbeddingStrategy.ISAB_TRANSFORMER,
]

run_generalizability(strategies, scenario='balance', file_name=file_name, training_agent=training_agent, testing_agents=[30,40])

In [None]:
file_name='2_navigation_train_low_roll_higher'
training_agent = 5
strategies = [
    EmbeddingStrategy.CONCAT,
    EmbeddingStrategy.MLP,
    EmbeddingStrategy.MLP_LOCAL,
    EmbeddingStrategy.MLP_GLOBAL,
    EmbeddingStrategy.GRAPH_SAGE,
    EmbeddingStrategy.GRAPH_GAT,
    EmbeddingStrategy.GRAPH_GAT_v2,
    EmbeddingStrategy.SET_TRANSFORMER_INV,
    EmbeddingStrategy.SAB_TRANSFORMER,
    EmbeddingStrategy.ISAB_TRANSFORMER,
]

run_generalizability(strategies, scenario='navigation', file_name=file_name, training_agent=training_agent, testing_agents=[30,40], steps=100)