# POSGGym.agents Pairwise Agent Comparison

This script can be used for visualizing the pairwise performance of implemented policies.

The actual CSV results files are stored in the `posggym/notebooks/results/pairwise_agent_comparison` directory.

In [None]:
import os
import os.path as osp
import sys 

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)
warnings.simplefilter(action='ignore', category=DeprecationWarning)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import posggym

from posggym.agents.registration import parse_policy_id
from posggym.config import REPO_DIR

sys.path.append(osp.join(REPO_DIR, "notebooks"))
import plot_utils

results_dir = osp.join(REPO_DIR, "notebooks", "results", "pairwise_agent_comparison")

available_env_result_dirs = os.listdir(results_dir)
available_env_result_dirs.sort()

print("Available Env Results")
print("---------------------")
for dir_name in available_env_result_dirs:
    print(dir_name)


## The environment

In [None]:
# CHANGE THIS
env_id = "DrivingContinuous-v0"
# env_id = "PursuitEvasionContinuous-v0"
# CHANGE THIS
assert env_id in os.listdir(results_dir)
env_result_dir = osp.join(results_dir, env_id)

print("The different environment arguments that have results")
print("-----------------------------------------------------")
env_result_files = os.listdir(env_result_dir)
env_result_files.sort()
for file_name in env_result_files:
    print(file_name.replace(".csv", ""))


## Data Loading

In [None]:
def display_df_info(df: pd.DataFrame):
    env_id = df["env_id"].unique().tolist()[0]
    env_args_id = df["env_args_id"].unique().tolist()[0]
    env_symmetric = df["symmetric"].unique().tolist()[0]
    seed = df["seed"].unique().tolist()[0]
    num_agents = df["num_agents"].unique().tolist()[0]
    num_episodes = df["num_episodes"].unique().tolist()[0]
    print(f"Env ID: {env_id}")
    print(f"Env Args ID: {env_args_id}")
    print(f"Env symmetric: {env_symmetric}")
    print(f"Seed: {seed}")
    print(f"Num Agents: {num_agents}")
    print(f"Num Episodes: {num_episodes}")
    
    print("\nAgent IDs")
    print("---------")
    agent_ids = df["agent_id"].unique().tolist()
    agent_ids.sort()
    for i in agent_ids:
        print(i)
        
    print("\nColumns")
    print("--------")
    for c in df.columns:
        print(c)
        if c.startswith("policy_name_"):
            agent_ids.append(c.split("_")[-1])

    print("\nPolicies")
    print("--------")
    policies = {}
    for i in agent_ids:
        policies_i = df[df["agent_id"] == i][f"policy_name"].unique().tolist()
        policies_i.sort()
        if env_symmetric:
            policies = policies_i
            for pi in policies:
                print(pi)
            break
        else:
            policies[i] = policies_i
            print(f"AgentID = {i}")
            for pi in policies_i:
                print(f"  {pi}")
                
    print("\nCo-Team IDs")
    print("-----------")
    co_team_ids = {}
    for i in agent_ids:
        co_teams_i = df[df["agent_id"] == i][f"co_team_id"].unique().tolist()
        co_teams_i.sort()
        if env_symmetric:
            co_team_ids = co_teams_i
            for co_team in co_teams_i:
                print(co_team)
            break
        else:
            co_team_ids[i] = co_teams_i
            print(f"AgentID = {i}")
            for co_team in co_teams_i:
                print(f"  {co_team}")
                
def add_95CI(df: pd.DataFrame) -> pd.DataFrame:
    """Add 95% CI columns to dataframe."""

    def conf_int(row, prefix):
        std = row[f"{prefix}_std"]
        n = row["num_episodes"]
        return 1.96 * (std / np.sqrt(n))

    prefix = ""
    for col in df.columns:
        if not col.endswith("_std"):
            continue
        prefix = col.replace("_std", "")
        df[f"{prefix}_CI"] = df.apply(lambda row: conf_int(row, prefix), axis=1)
    return df


def add_outcome_proportions(df: pd.DataFrame) -> pd.DataFrame:
    """Add proportion columns to dataframe."""

    def prop(row, col_name):
        n = row["num_episodes"]
        total = row[col_name]
        return total / n

    columns = ["num_LOSS", "num_DRAW", "num_WIN", "num_NA"]
    new_column_names = ["prop_LOSS", "prop_DRAW", "prop_WIN", "prop_NA"]
    for col_name, new_name in zip(columns, new_column_names):
        if col_name in df.columns:
            df[new_name] = df.apply(lambda row: prop(row, col_name), axis=1)
    return df


def add_co_team_name(df: pd.DataFrame) -> pd.DataFrame:
    """Add co team name to dataframe.
    
    Also removes unwanted rows.
    """
    # For each policy we want to group rows where that policy is paired with equivalent co-player policies.
    env_symmetric = df["symmetric"].unique().tolist()[0]
    if env_symmetric:
        # For symmetric environments we group rows where the policy is paired with the same co-player policies,
        # independent of the ordering
        same_co_team_ids = set()
        for team_id in df[f"co_team_id"].unique().tolist():
            # ignore ( and ) and start and end
            pi_names = team_id[1:-1].split(",")
            if all(name == pi_names[0] for name in pi_names):
                same_co_team_ids.add(team_id)

        df = df[df["co_team_id"].isin(same_co_team_ids)]

        def get_team_name(row):
            team_id = row["co_team_id"]
            return team_id[1:-1].split(",")[0]

        df["co_team_name"] = df.apply(get_team_name, axis=1)
    else:
        # for asymmetric environments ordering matters so can't reduce team IDs
        def get_team_name_asymmetric(row):
            team_id = row["co_team_id"]
            return team_id[1:-1]
        
        df["co_team_name"] = df.apply(get_team_name_asymmetricsymmetric, axis=1)
        
    return df
    

def import_results(result_file: str,) -> pd.DataFrame:
    """Import experiment results."""
    # disable annoying warning
    pd.options.mode.chained_assignment = None
    df = pd.read_csv(result_file)

    df = add_95CI(df)
    df = add_outcome_proportions(df)
    df = add_co_team_name(df)
    
    # enable annoyin warning
    pd.options.mode.chained_assignment = "warn"
    return df

df = import_results(os.path.join(env_result_dir, env_result_files[1]))
display_df_info(df)

## Pairwise Performance

Here we look at the performance for each possible pairing of policies.

For each performance metric we have a grid of (grid)-plots:
    
- Outer-grid: train seed X train seed
- Inner-grid: policy_id X policy_id

In [None]:
env_symmetric = df["symmetric"].unique().tolist()[0]
if env_symmetric:
    # can do a single plot
    fig_width = len(df["co_team_name"].unique()) // 1.5
    fig_height = len(df["policy_name"].unique()) // 1.5
    
    plot_utils.plot_pairwise_comparison(
        df, 
        y_key="episode_reward_mean", 
        policy_key="policy_name",
        coplayer_policy_key="co_team_name",
        vrange=None, 
        figsize=(fig_width, fig_height), 
        valfmt="{x:.2f}",
    )
else:
    # for asymmetric envs we do one plot per agent
    agent_ids = df["agent_id"].unique().tolist()
    agent_ids.sort()
    for i in agent_ids:
        df_i = df[df["agent_id"] == i]
        fig_width = len(df_i["co_team_name"].unique()) // 1.5
        fig_height = len(df_i["policy_name"].unique()) // 1.5
        
        fig, axs = plot_utils.plot_pairwise_comparison(
            df_i, 
            y_key="episode_reward_mean", 
            policy_key="policy_name",
            coplayer_policy_key="co_team_name",
            vrange=None, 
            figsize=(fig_width, fig_height), 
            valfmt="{x:.2f}",
        )
        axs[0][0].set_xlabel(f"AgentID={i}")
        