# Analysis of Population Diversity

In this notebook we explore the diversity of the two different populations, `P0` and `P1` for each environment.

In [None]:
import os
import os.path as osp
import yaml
import sys

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from posggym_baselines.config import REPO_DIR

sys.path.insert(0, osp.join(REPO_DIR, "baseline_exps"))
import exp_utils

sns.set_theme()
sns.set_context("paper", font_scale=1.5)
sns.set_palette("colorblind")

SAVE_RESULTS = False

In [None]:
ALL_ENV_DATA = exp_utils.load_all_env_data()
for k in ALL_ENV_DATA:
    print(k)

NUM_ENVS = len(ALL_ENV_DATA)

# figure parameters
FIGSIZE = (10, 10)
N_COLS = min(3, NUM_ENVS)
N_ROWS = (NUM_ENVS // N_COLS) + int(NUM_ENVS % N_COLS > 0)

## Population Diversity

The diversity of a population of policies is calculated by taking the mean of the pairwise distances between the returns of all policies in the population.  The Euclidean distance is used for the distance metric, as it captures the magnitude of the difference between the returns of each policy against each other policy.

In [None]:
def policy_pop_id(row, env_id):
    if row["policy_name"] in ALL_ENV_DATA[env_id].pop_policy_names["P0"]:
        return "P0"
    if row["policy_name"] in ALL_ENV_DATA[env_id].pop_policy_names["P1"]:
        return "P1"
    # not used in exps
    return "P2"

def co_team_name(row):
    co_team_id = row["co_team_id"].replace("(", "").replace(")", "")
    return co_team_id

def co_team_pop_id(row, env_id):
    if row["co_team_name"] in ALL_ENV_DATA[env_id].pop_co_team_names["P0"]:
        return "P0"
    if row["co_team_name"] in ALL_ENV_DATA[env_id].pop_co_team_names["P1"]:
        return "P1"
    # not used in exps
    return "P2"


div_results_df = []
for env_id, env_data in ALL_ENV_DATA.items():
    print(env_id)
    env_div_results_df = pd.read_csv(env_data.pop_div_results_file)
    env_div_results_df["env_agent_id"] = env_id
    env_div_results_df["agent_id"] = env_div_results_df["agent_id"].astype(str)
    # drop "Random-v0" policy
    env_div_results_df = env_div_results_df[env_div_results_df["policy_id"] != "Random-v0"]
    env_div_results_df = env_div_results_df[env_div_results_df["co_team_id"] != "(Random)"]

    env_div_results_df["co_team_name"] = env_div_results_df.apply(
        lambda row: co_team_name(row), axis=1
    )
    env_div_results_df["policy_pop_id"] = env_div_results_df.apply(
        lambda row: policy_pop_id(row, env_id), axis=1
    )
    env_div_results_df["co_team_pop_id"] = env_div_results_df.apply(
        lambda row: co_team_pop_id(row, env_id), axis=1
    )

    if env_id == "Driving-v1":
        env_div_results_df["policy_name"] = env_div_results_df["policy_name"].apply(
            lambda x: x.replace("Shortestpath", "")
        )
        env_div_results_df["co_team_name"] = env_div_results_df["co_team_name"].apply(
            lambda x: x.replace("Shortestpath", "")
        )

    # drop unused rows
    env_div_results_df = env_div_results_df[env_div_results_df["policy_pop_id"] != "P2"]
    env_div_results_df = env_div_results_df[env_div_results_df["co_team_pop_id"] != "P2"]

    # average over any duplicate rows
    env_div_results_df = env_div_results_df.groupby([
        "env_id",
        "env_agent_id", 
        "policy_name",
        "co_team_name",
        "policy_pop_id", 
        "co_team_pop_id",
        "agent_id",
    ]).agg(
        {"episode_reward_mean": "mean"}
    ).reset_index()

    div_results_df.append(env_div_results_df)

div_results_df = pd.concat(div_results_df)

In [None]:
# Per environment combined population Diversity
fig, axes = plt.subplots(
    nrows=NUM_ENVS, 
    ncols=1, 
    figsize=(6, (NUM_ENVS * 4)+2),
    squeeze=False
)

for row, (env_id, env_data) in enumerate(ALL_ENV_DATA.items()):
    agent_id = env_data.agent_id
    print(env_id, agent_id)
    env_df = div_results_df[
        (div_results_df["env_agent_id"] == env_id) &
        (div_results_df["agent_id"] == agent_id)
    ]

    pw_returns = env_df.pivot(
        index="policy_name", 
        columns="co_team_name", 
        values="episode_reward_mean"
    )
    sns.heatmap(
        data=pw_returns,
        ax=axes[row, 0],
        annot=True,
        cmap="YlGnBu",
        fmt=".2f",
        square=True,
        annot_kws={"fontsize": 6}
    )
    axes[row, 0].set_title(f"Env: {env_id}")

fig.tight_layout()

In [None]:
# Per population Diversity
fig, axes = plt.subplots(nrows=NUM_ENVS, ncols=2, figsize=(10, (NUM_ENVS * 4) + 2))

for row, (env_id, env_data) in enumerate(ALL_ENV_DATA.items()):
    agent_id = env_data.agent_id
    print(env_id, agent_id)
    for col, pop_id in enumerate(["P0", "P1"]):
        env_df = div_results_df[
            (div_results_df["env_agent_id"] == env_id) &
            (div_results_df["agent_id"] == agent_id) &
            (div_results_df["policy_pop_id"] == pop_id) &
            (div_results_df["co_team_pop_id"] == pop_id)
        ]

        pw_returns = env_df.pivot(
            index="policy_name", 
            columns="co_team_name", 
            values="episode_reward_mean"
        )
        sns.heatmap(
            data=pw_returns,
            ax=axes[row, col],
            annot=True,
            cmap="YlGnBu",
            fmt=".2f",
            square=True,
            annot_kws={"fontsize": 6}
        )
        axes[row, col].set_title(f"Env: {env_id}, Pop: {pop_id}")

fig.tight_layout()


In [None]:
# fig, axes = plt.subplots(nrows=NUM_ENVS, ncols=2, figsize=(10, (NUM_ENVS * 4) + 2))

cols = ["Env ID", "Population", "Mean Pairwise L2 Distance", "Mean Pairwise Return"]
pop_div_results_df = []
for row, (env_id, env_data) in enumerate(ALL_ENV_DATA.items()):
    agent_id = env_data.agent_id
    for col, pop_id in enumerate(["P0", "P1"]):
        pop_pw_df = div_results_df[
            (div_results_df["env_agent_id"] == env_id) &
            (div_results_df["agent_id"] == agent_id) &
            (div_results_df["policy_pop_id"] == pop_id) &
            (div_results_df["co_team_pop_id"] == pop_id)
        ].pivot(index="co_team_name", columns="policy_name", values="episode_reward_mean")
        
        pw_ed = np.zeros((pop_pw_df.shape[0], pop_pw_df.shape[0]))
        pw_returns = np.zeros((pop_pw_df.shape[0], pop_pw_df.shape[0]))
        for i, pi_i in enumerate(pop_pw_df):
            pw_returns[i] = pop_pw_df[pi_i]
            for j, pi_j in enumerate(pop_pw_df):
                pw_ed[i, j] = np.sqrt(np.sum((pop_pw_df[pi_i] - pop_pw_df[pi_j])**2))

        pop_div_results_df.append([env_id, pop_id, np.mean(pw_ed), np.mean(pw_returns)])

pop_div_results_df = pd.DataFrame(columns=cols, data=pop_div_results_df)
div_plot = sns.catplot(
    data=pop_div_results_df,
    x="Population",
    y="Mean Pairwise L2 Distance",
    col="Env ID",
    col_wrap=N_COLS,
    kind="bar",
    sharey=True,
)