# Analysis of all methods results

Here we explore the performance of RL, Planning, and RL+Planning across the different environments.

We will look at their mean performance (i.e. episode returns) across the different environments. Looking at both in-distribution (planning population matches the test population) and out-of-distribution (planning population does not match the test population) settings.

**Note** each experiment run was repeated 5 times (once for each RL policy seed), so we average the results across these 5 runs.

In [None]:
import sys

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from posggym_baselines.config import REPO_DIR

sys.path.insert(0, str(REPO_DIR / "baseline_exps"))
import exp_utils

sns.set_theme()
sns.set_context("paper", font_scale=1.5)
sns.set_palette("colorblind")

SAVE_RESULTS = False

In [None]:
ALL_ENV_DATA = exp_utils.load_all_env_data()
for k in ALL_ENV_DATA:
    print(k)

NUM_ENVS = len(ALL_ENV_DATA)

# figure parameters
FIGSIZE = (10, 10)
N_COLS = min(3, NUM_ENVS)
N_ROWS = (NUM_ENVS // N_COLS) + int(NUM_ENVS % N_COLS > 0)

## Load Results

In [None]:
# Add In/Out of Distribution labels
def get_in_out_dist_label(row):
    return row["Planning Population"] == row["Test Population"]

### Planning Results

In [None]:
planning_results_df = []
for env_id, env_data in ALL_ENV_DATA.items():
    env_planning_results = pd.read_csv(env_data.planning_results_file)
    env_planning_results["full_env_id"] = env_id
    planning_results_df.append(env_planning_results)

planning_results_df = pd.concat(planning_results_df, ignore_index=True)
planning_results_df.rename(
    columns={
        "alg": "Algorithm",
        "planning_pop_id": "Planning Population",
        "test_pop_id": "Test Population",
        "return": "Return",
    },
    inplace=True,
)

planning_results_df["In Distribution"] = planning_results_df.apply(
    get_in_out_dist_label, axis=1
)

planning_results_df.sort_values(
    by=[
        "Algorithm", 
        "full_env_id", 
        "Planning Population", 
        "Test Population", 
        "search_time_limit"
    ], 
    inplace=True
)

max_search_time = planning_results_df["search_time_limit"].max()

# Can remove this once we have all the data
# planning_results_df = planning_results_df[planning_results_df["search_time_limit"] > 0.05]
# planning_results_df = planning_results_df[
#     (planning_results_df["alg"] == "POTMMCP") &
#     (planning_results_df["full_env_id"] != "LevelBasedForaging-v3") &
#     (planning_results_df["full_env_id"] != "PursuitEvasion-v1_i0")
# ]
planning_results_df = planning_results_df[planning_results_df["search_time_limit"].isin([0.05, 0.1, 0.5, 1, 5, 10, 20])]
print(planning_results_df["search_time_limit"].unique())

### RL Results

In [None]:
br_results_df = []
for full_env_id, env_data in ALL_ENV_DATA.items():
    env_br_results_df = pd.read_csv(env_data.rl_br_results_file)
    env_br_results_df["full_env_id"] = full_env_id
    br_results_df.append(env_br_results_df)

br_results_df = pd.concat(br_results_df)
br_results_df.rename(
    columns={
        "train_pop": "Planning Population",
        "eval_pop": "Test Population",
        "mean_returns": "Return",
    },
    inplace=True,
)


br_results_df["In Distribution"] = br_results_df.apply(
    get_in_out_dist_label, axis=1
)
br_results_df["Algorithm"] = "RL-BR"


br_results_df.sort_values(
    by=[
        "full_env_id", 
        "Planning Population", 
        "Test Population"
    ], 
    inplace=True
)

### Combined (RL+Planning) Results

In [None]:
combined_results_df = []
for env_id, env_data in ALL_ENV_DATA.items():
    env_planning_results = pd.read_csv(env_data.combined_results_file)
    env_planning_results["full_env_id"] = env_id
    combined_results_df.append(env_planning_results)

combined_results_df = pd.concat(combined_results_df, ignore_index=True)
combined_results_df.rename(
    columns={
        "alg": "Algorithm",
        "planning_pop_id": "Planning Population",
        "test_pop_id": "Test Population",
        "return": "Return",
    },
    inplace=True,
)


combined_results_df["In Distribution"] = combined_results_df.apply(
    get_in_out_dist_label, axis=1
)

combined_results_df.sort_values(
    by=[
        "Algorithm", 
        "full_env_id", 
        "Planning Population", 
        "Test Population", 
        "search_time_limit"
    ], 
    inplace=True
)

# combined_results_df = combined_results_df[combined_results_df["search_time_limit"].isin([0.05, 0.1, 0.5, 1, 5, 10, 20])]
print(combined_results_df["search_time_limit"].unique())

### All results

In [None]:
# Need to add search_time_limit to br_results_df
# We duplicate the DF and set the search_time_limit to the max and min values
# This will produce a horizontal line in the plots :)
min_search_time = min(
    planning_results_df["search_time_limit"].min(),
    combined_results_df["search_time_limit"].min(),
)
max_search_time = max(
    planning_results_df["search_time_limit"].max(),
    combined_results_df["search_time_limit"].max(),
)
br_results_df["search_time_limit"] = max_search_time
min_br_results_df = br_results_df.copy(deep=True)
min_br_results_df["search_time_limit"] = min_search_time

# combine planning, combined, and rl results together
all_results_df = pd.concat(
    [
        planning_results_df, 
        combined_results_df, 
        br_results_df,
        min_br_results_df
    ],
)

## In-Distribution Performance

Here we look at the in-distribution performance of all methods (planning population and test populations are the same). We look only at the performance of each algorithm given the maximum search time.

Dimensions:

- `y-axis`: mean episode return
- `x-axis`: search time
- `z-axis/hue`: algorithm

In [None]:
g = sns.relplot(
    data=all_results_df,
    x="search_time_limit",
    y="Return",
    hue="Algorithm",
    row="full_env_id",
    col="In Distribution",
    legend="full",
    markers=True,
    dashes=False,
    kind="line",
    facet_kws={"sharey": 'row', "sharex": True},
)

# plot.axes.set_title(full_env_id)
# plot.axes.set_xlabel("Planning Time Limit (s)")
# plot.axes.set_ylabel("Mean Return")
# plot.axes.grid(True)
# plot.figure.tight_layout()

for (row_key, col_key), ax in g.axes_dict.items():
    print(col_key, type(col_key), bool(col_key) is False)
    col_key = "OOD" if bool(col_key) is False else "In Dist."
    ax.set_title(f"{row_key} | {col_key}")

del g

## In vs Out of Distribution Performance

Here we look at the in-distribution vs out-of-distribution performance of each algorithm.

- `x-axis`: Environment
- `y-axis`: Mean episode return
- `hue/z-axis`: In (True) vs Out (False) of Distribution
- `col/figures`: Algorithm

## Search Statistics

Here we look at various statistics of the search process for each algorithm.

Each figure is a different statistic, each line is a different environment since we expect some differences between environments based on things like average steps to terminal state. In and out of distribution results are grouped together since we expect and see no different in search statistics between in vs out of distribution.

- `x-axis`: Search time
- `y-axis`: Search Statistic Values
- `col/figures`: Environment

In [None]:
for stat_key in [
    "update_time",
    "reinvigoration_time",
    "evaluation_time",
    "policy_calls",
    "inference_time",
    "search_depth",
    "num_sims",
    "mem_usage",
    "min_value",
    "max_value",
]:

    plot = sns.relplot(
        data=all_results_df[all_results_df["Algorithm"] != "RL-BR"],
        x="search_time_limit",
        y=stat_key,
        hue="Algorithm",
        # col_wrap=N_COLS,
        col="Planning Population",
        row="full_env_id",
        kind="line",
    )

    del plot