# POSGGym.agents Pairwise Agent Comparison

This script can be used for visualizing the pairwise performance of implemented policies.

The actual CSV results files are stored in the `posggym/notebooks/results/pairwise_agent_comparison` directory.

In [None]:
import os
import os.path as osp
import sys
from typing import Tuple

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)
warnings.simplefilter(action='ignore', category=DeprecationWarning)

import numpy as np
import pandas as pd

from posggym.config import REPO_DIR

sys.path.append(osp.join(REPO_DIR, "notebooks"))
import plot_utils

results_dir = osp.join(REPO_DIR, "notebooks", "results", "pairwise_agent_comparison")

available_env_result_dirs = os.listdir(results_dir)
available_env_result_dirs.sort()

print("Available Env Results")
print("---------------------")
for dir_name in available_env_result_dirs:
    print(dir_name)


## The environment

In [None]:
# CHANGE THIS
# Select from env ID's above
env_id = "Driving-v0"
# End CHANGE THIS
assert env_id in os.listdir(results_dir)
env_result_dir = osp.join(results_dir, env_id)

print("The different environment arguments that have results")
print("-----------------------------------------------------")
env_result_files = os.listdir(env_result_dir)
env_result_files.sort()
for i, file_name in enumerate(env_result_files):
    print(i, file_name.replace(".csv", ""))


In [None]:
# CHANGE THIS
# Select from list above (change index)
env_result_file_name = env_result_files[0]
# End CHANGE THIS

env_results_file = os.path.join(env_result_dir, env_result_file_name)

print("Looking at results for file")
print("---------------------------")
print(f"Env ID = {env_id}")
print(f"Env Args File = {env_result_file_name}")
print(f"Full path = {env_results_file}")

## Data Loading

In [None]:
def display_df_info(df: pd.DataFrame):
    env_id = df["env_id"].unique().tolist()[0]
    env_args_id = df["env_args_id"].unique().tolist()[0]
    env_symmetric = df["symmetric"].unique().tolist()[0]
    seed = df["seed"].unique().tolist()[0]
    num_agents = df["num_agents"].unique().tolist()[0]
    num_episodes = df["num_episodes"].unique().tolist()[0]
    print(f"Env ID: {env_id}")
    print(f"Env Args ID: {env_args_id}")
    print(f"Env symmetric: {env_symmetric}")
    print(f"Seed: {seed}")
    print(f"Num Agents: {num_agents}")
    print(f"Num Episodes: {num_episodes}")
    
    print("\nAgent IDs")
    print("---------")
    agent_ids = df["agent_id"].unique().tolist()
    agent_ids.sort()
    for i in agent_ids:
        print(i)
        
    print("\nColumns")
    print("--------")
    for c in df.columns:
        print(c)
        if c.startswith("policy_name_"):
            agent_ids.append(c.split("_")[-1])

    def display_column(column_name):
        for i in agent_ids:
            values = df[df["agent_id"] == i][column_name].unique().tolist()
            values.sort()
            if env_symmetric:
                for v in values:
                    print(v)
                break
            else:
                print(f"AgentID = {i}")
                for v in values:
                    print(f"  {v}")

    for c in ["policy_name", "policy_type", "policy_seed", "co_team_id", "co_team_type", "co_team_seed"]:
        print(f"\n{c}")
        print("-" * len(c))
        display_column(c)

df = plot_utils.import_results(env_results_file)
display_df_info(df)

## Pairwise Performance

Here we look at the performance for each possible pairing of policies (or policies and teams if N>2).

In [None]:
env_symmetric = df["symmetric"].unique().tolist()[0]
if env_symmetric:
    # can do a single plot
    fig_width = len(df["co_team_name"].unique()) // 1.5
    fig_height = len(df["policy_name"].unique()) // 1.5
    
    plot_utils.plot_pairwise_comparison(
        df, 
        y_key="episode_reward_mean", 
        policy_key="policy_name",
        coplayer_policy_key="co_team_name",
        vrange=None, 
        figsize=(fig_width, fig_height), 
        valfmt="{x:.2f}",
    )
else:
    # for asymmetric envs we do one plot per agent
    agent_ids = df["agent_id"].unique().tolist()
    agent_ids.sort()
    for i in agent_ids:
        df_i = df[df["agent_id"] == i]
        fig_width = len(df_i["co_team_name"].unique()) // 1.5
        fig_height = len(df_i["policy_name"].unique()) // 1.5
        
        fig, axs = plot_utils.plot_pairwise_comparison(
            df_i, 
            y_key="episode_reward_mean", 
            policy_key="policy_name",
            coplayer_policy_key="co_team_name",
            vrange=None, 
            figsize=(fig_width, fig_height), 
            valfmt="{x:.2f}",
        )
        axs[0][0].set_xlabel(f"AgentID={i}")
        

## Pairwise Performance by Policy Type

Here we look at the performance for each pairing of policy type averaged over the random seeds.

Each plot shows the *mean performance* for the *row* policy against the *column* policy.

In [None]:
env_symmetric = df["symmetric"].unique().tolist()[0]
if env_symmetric:
    # can do a single plot
    fig_width = len(df["co_team_type"].unique()) // 1.5
    fig_height = len(df["policy_type"].unique()) // 1.5
    
    plot_utils.plot_pairwise_comparison(
        df, 
        y_key="episode_reward_mean", 
        policy_key="policy_type",
        coplayer_policy_key="co_team_type",
        vrange=None, 
        figsize=(fig_width, fig_height), 
        valfmt="{x:.2f}",
    )
else:
    # for asymmetric envs we do one plot per agent
    agent_ids = df["agent_id"].unique().tolist()
    agent_ids.sort()
    for i in agent_ids:
        df_i = df[df["agent_id"] == i]
        fig_width = len(df_i["co_team_type"].unique()) // 1.5
        fig_height = len(df_i["policy_type"].unique()) // 1.5
        
        fig, axs = plot_utils.plot_pairwise_comparison(
            df_i, 
            y_key="episode_reward_mean", 
            policy_key="policy_type",
            coplayer_policy_key="co_team_type",
            vrange=None, 
            figsize=(fig_width, fig_height), 
            valfmt="{x:.2f}",
        )
        axs[0][0].set_xlabel(f"AgentID={i}")

## Pairwise Performance by Policy Type (Same-Play vs Cross-Play)

Here we look at the performance for each pairing of policy type averaged over the random seeds.

Each plot shows the *mean performance* for the *row* policy against the *column* policy.

The **left-hand plot** shows *same-play* performance. So performance of policies against policies from the same training population (i.e. random seed). With the values shown being the mean over all populations.

The **middle plot** shows *cross-play* performance. So performance of policies against policies from different training populations. With the values shown being the mean over all populations.

The **right-hand plot** shows the generalization gap which is just the difference between the *Same-Play* and *Cross-Play* performance.

In [None]:
env_symmetric = df["symmetric"].unique().tolist()[0]

if env_symmetric:
    fig_width = max(12, (len(df["co_team_type"].unique()) // 1.25) * 3)
    fig_height = max(6, len(df["policy_type"].unique()) // 1.25)

    for y_key in ["episode_reward_mean", "prop_WIN", "prop_LOSS"]:
        plot_utils.plot_mean_pairwise_comparison(
            df, 
            y_key=y_key, 
            policy_key="policy_type",
            pop_key="policy_seed",
            coplayer_pop_key="co_team_seed",
            coplayer_policy_key="co_team_type",
            vrange=None, 
            figsize=(fig_width, fig_height), 
            valfmt="{x:.2f}"
        )
else:
    # for asymmetric envs we do one plot per agent
    agent_ids = df["agent_id"].unique().tolist()
    agent_ids.sort()
    for i in agent_ids:
        df_i = df[df["agent_id"] == i]
        fig_width = len(df_i["co_team_name"].unique()) // 1.5
        fig_height = len(df_i["policy_name"].unique()) // 1.5

        for y_key in ["episode_reward_mean", "prop_WIN", "prop_LOSS"]:
            fig, axs = plot_utils.plot_mean_pairwise_comparison(
                df_i, 
                y_key=y_key, 
                policy_key="policy_type",
                pop_key="policy_seed",
                coplayer_pop_key="co_team_seed",
                coplayer_policy_key="co_team_type",
                vrange=None, 
                figsize=(fig_width, fig_height), 
                valfmt="{x:.2f}"
            )

            axs[0][0].set_xlabel(f"AgentID={i}")