In [None]:
import matplotlib.pyplot as plt
from tensorboard.backend.event_processing import event_accumulator
import pandas as pd
import numpy as np
import warnings

warnings.filterwarnings("ignore")

def parse_tensorboard(path, scalars):
    """returns a dictionary of pandas dataframes for each requested scalar"""
    ea = event_accumulator.EventAccumulator(
        path,
        size_guidance={event_accumulator.SCALARS: 0},
    )
    _absorb_print = ea.Reload()
    # make sure the scalars are in the event accumulator tags
    assert all(
        s in ea.Tags()["scalars"] for s in scalars
    ), "some scalars were not found in the event accumulator"
    return {k: pd.DataFrame(ea.Scalars(k)) for k in scalars}

def make_plot(dfs, labels, value='value', window=20):
    fig, ax = plt.subplots()
    for index, df in enumerate(dfs):
        line = ax.plot(
            df['step'], df[value], alpha=0.5, zorder=0, 
        )
        temp_df = df.rolling(window, center=False).mean()
        ax.plot(
            temp_df['step'], temp_df[value], color=line[0].get_color(), zorder=1, label=labels[index]
        )
    return fig, ax


In [None]:
scalers = ['charts/train_soup']

with_centralized_critic = r'.\runs\fcnn_v2\cramped_room\fcnn_v2_3_num_envs\events.out.tfevents.1713584415.overcooked-training.5534.0'

with_decentralized_critic = r'.\runs\fcnn\cramped_room\fcnn_v1_3_num_envs\events.out.tfevents.1713551795.overcooked-training.5519.0'

with_centralized_critic_res = parse_tensorboard(with_centralized_critic, scalers)
with_decentralized_critic_res = parse_tensorboard(with_decentralized_critic, scalers)

In [None]:
dfs = [
    with_centralized_critic_res['charts/train_soup'],
    with_decentralized_critic_res['charts/train_soup'],
]
fig, ax = make_plot(dfs, ['centralized_critic (default)', 'decentralized_critic'], value='value')
ax.set_xlim([0, 2000])
ax.set_ylim([0, 2])
ax.legend()
ax.set_xlabel('num_update')
ax.set_ylabel('mean number of soup per episode')
fig.savefig('critic.png')

In [None]:
scalers = ['charts/train_soup']

without_rw = r'.\runs\fcnn_v2\cramped_room\fcnn_v2_3_num_envs\events.out.tfevents.1713584415.overcooked-training.5534.0'

with_rw = r'.\runs\fcnn_v2\cramped_room\fcnn_v2_3_num_envs_with_extra_rw\events.out.tfevents.1713594167.with-extra-reward.8822.0'

without_rw_res = parse_tensorboard(without_rw, scalers)
with_rw_res = parse_tensorboard(with_rw, scalers)

dfs = [
    without_rw_res['charts/train_soup'],
    with_rw_res['charts/train_soup'],
]
fig, ax = make_plot(dfs, ['without_rw (default)', 'with extra_rw'], value='value')
ax.set_xlim([0, 5000])
ax.set_ylim([0, 10])
ax.legend()
ax.set_xlabel('num_update')
ax.set_ylabel('mean number of soup per episode')
fig.savefig('extra_rw.png')

In [None]:
scalers = ['charts/train_soup']

without_time = r'.\runs\fcnn_v2\cramped_room\fcnn_v2_3_num_envs\events.out.tfevents.1713584415.overcooked-training.5534.0'

with_time_01 = r'.\runs\fcnn_v2\cramped_room\fcnn_v2_3_num_envs_with_time_punish_0.1\events.out.tfevents.1713598269.overcook-runner.32077.0'

with_time_1 = r'.\runs\fcnn_v2\cramped_room\fcnn_v2_3_num_envs_with_time_punish_1\events.out.tfevents.1713598303.overcook-runner.32955.0'

without_time_res = parse_tensorboard(without_time, scalers)
with_time_01_res = parse_tensorboard(with_time_01, scalers)
with_time_1_res = parse_tensorboard(with_time_1, scalers)

dfs = [
    without_time_res['charts/train_soup'],
    with_time_01_res['charts/train_soup'],
    with_time_1_res['charts/train_soup'],
]
fig, ax = make_plot(dfs, ['without_time_punish (default)', 'time_punish -0.1', 'time_punish -1'], value='value')
ax.set_xlim([0, 3000])
ax.set_ylim([0, 4])
ax.legend()
ax.set_xlabel('num_update')
ax.set_ylabel('mean number of soup per episode')
fig.savefig('time_punish.png')

In [None]:
scalers = ['charts/train_soup']

num_envs_3 = r'.\runs\fcnn_v2\cramped_room\fcnn_v2_3_num_envs\events.out.tfevents.1713584415.overcooked-training.5534.0'

num_envs_1 = r'.\runs\fcnn_v2\cramped_room\fcnn_v2_1_num_envs\events.out.tfevents.1713631968.with-extra-reward.53789.0'

num_envs_6 = r'.\runs\fcnn_v2\cramped_room\fcnn_v2_6_num_envs\events.out.tfevents.1713628362.overcooked-training.1933271.0'

num_envs_3_res = parse_tensorboard(num_envs_3, scalers)
num_envs_1_res = parse_tensorboard(num_envs_1, scalers)
num_envs_6_res = parse_tensorboard(num_envs_6, scalers)

dfs = [
    num_envs_3_res['charts/train_soup'],
    num_envs_1_res['charts/train_soup'],
    num_envs_6_res['charts/train_soup'],
]
fig, ax = make_plot(dfs, ['3 envs (default)', '1 envs', '6 envs'], value='value')
ax.set_xlim([0, 3000])
ax.set_ylim([0, 6])
ax.legend()
ax.set_xlabel('num_update')
ax.set_ylabel('mean number of soup per episode')
fig.savefig('num_envs.png')

In [None]:
scalers = ['charts/train_soup']

normal = r'.\runs\fcnn_v2\cramped_room\fcnn_v2_3_num_envs\events.out.tfevents.1713584415.overcooked-training.5534.0'

fast = r'.\runs\fcnn_v2\cramped_room\fcnn_v2_3_num_envs_all_fast\events.out.tfevents.1713613616.DESKTOP-I7N4NTT.2160.0'

slow = r'.\runs\fcnn_v2\cramped_room\fcnn_v2_3_num_envs_all_slow\events.out.tfevents.1713592644.overcooked-training.4077513.0'

normal_res = parse_tensorboard(normal, scalers)
fast_res = parse_tensorboard(fast, scalers)
slow_res = parse_tensorboard(slow, scalers)

dfs = [
    normal_res['charts/train_soup'],
    fast_res['charts/train_soup'],
    slow_res['charts/train_soup'],
]
fig, ax = make_plot(dfs, ['Normal (default)', 'Fast', 'Slow'], value='value')
ax.set_xlim([0, 5000])
ax.set_ylim([0, 8])
ax.legend()
ax.set_xlabel('num_update')
ax.set_ylabel('mean number of soup per episode')
fig.savefig('lr.png')

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

def plot_data_with_std(df, ys):
    # Create a new figure and axes object
    fig, ax = plt.subplots()

    # Iterate through the y-values to plot each one with its standard deviation
    for y in ys:
        # Extract the y values and their respective standard deviations
        mean = df[y]
        std = df[f'{y}_std']
        step = df['num_update']

        # Plot the mean line
        ax.plot(step, mean, label=y)

        # Fill between mean Â± std deviation
        ax.fill_between(step, mean - std, mean + std, alpha=0.2)

    # Adding a legend to the plot
    ax.legend()

    # Return the figure and axes object
    return fig, ax


from ppo_fcnn_v2 import helper_func_obs
import torch
device = 'cpu'

def evaluate_policy(ppo_policy, env, num_episode=1000):
    num_soups_made = np.zeros(num_episode)

    for i in range(num_episode):

        next_done = False

        next_obs = env.reset()

        reshaped_obs = helper_func_obs(next_obs["both_agent_obs"])
        while not next_done:

            with torch.no_grad():
                _, actions, _, _ = ppo_policy.get_action_and_value(
                    reshaped_obs,
                    deterministic=False,
                )

            next_obs, R, next_done, info = env.step(actions.view(-1).tolist())
            reshaped_obs = helper_func_obs(next_obs["both_agent_obs"])
            num_soups_made[i] += int(R / 20)

    return num_soups_made

from overcooked_ai_py.mdp.overcooked_mdp import OvercookedGridworld
from overcooked_ai_py.mdp.overcooked_env import OvercookedEnv
from overcooked_ai_py.agents.agent import NNPolicy, AgentFromPolicy, AgentPair
from overcooked_ai_py.agents.benchmarking import AgentEvaluator
from overcooked_ai_py.visualization.state_visualizer import StateVisualizer
import gym, os
from tqdm import tqdm
def evaluate_multiple_policies(base_policy, layout_name, num_episode=2):
    # create envs
    mdp = OvercookedGridworld.from_layout_name(
        layout_name,
        rew_shaping_params=None,
    )
    base_env = OvercookedEnv.from_mdp(
        mdp, horizon=400, info_level=0
    )

    env = gym.make(
            "Overcooked-v0",
            base_env=base_env,
            featurize_fn=base_env.featurize_state_mdp,
        )
    
    model_dir = rf'./runs/fcnn_v2/{layout_name}/final'
    param_files = [name for name in os.listdir(model_dir) if 'model.pt' in name]

    final_res = {
        'num_update':[],
        layout_name: [],
        layout_name+('_std'): [],
    }
    for param_file in tqdm(param_files, total=len(param_files)):
        num_update = int(param_file.split('-')[0])
        base_policy.load_model(os.path.join(model_dir, param_file))

        num_soups_made = evaluate_policy(base_policy, env, num_episode=num_episode)

        final_res['num_update'] += [num_update]
        final_res[layout_name] += [np.mean(num_soups_made)]
        final_res[layout_name+('_std')] += [np.std(num_soups_made)]
    return pd.DataFrame(final_res)

In [None]:
from ppo_fcnn_v2 import Policy
base_policy = Policy(100, 6, 'cpu')
final_res = None
for layout in ['cramped_room', 'asymmetric_advantages', 'coordination_ring', 'forced_coordination', 'counter_circuit_o_1order']:
    print(layout)
    res = evaluate_multiple_policies(base_policy, layout, num_episode=100)
    if final_res is not None:
        final_res = final_res.merge(res, on='num_update')
    else:
        final_res = res

In [None]:
final_res = final_res.sort_values('num_update')
layout_names = ['cramped_room', 'asymmetric_advantages', 'coordination_ring', 'forced_coordination', 'counter_circuit_o_1order']
fig, ax = plot_data_with_std(final_res, ys=layout_names)

ax.hlines(y=7, xmax=5000, xmin=0, color='black', linestyles='--')

ax.set_ylabel('Mean Number of Soup')
ax.set_xlabel('Num Update')
fig.savefig('final_res.png')

In [None]:
scalers = ['charts/train_soup']

cramped_room = r'.\runs\fcnn_v2\cramped_room\final\events.out.tfevents.1713657918.overcooked-training.1423059.0'
asymmetric_advantages = r'.\runs\fcnn_v2\asymmetric_advantages\final\events.out.tfevents.1713657943.overcooked-training.1433505.0'
coordination_ring = r'.\runs\fcnn_v2\coordination_ring\final\events.out.tfevents.1713671226.with-extra-reward.32297.0'
forced_coordination = r'.\runs\fcnn_v2\forced_coordination\final\events.out.tfevents.1713658130.overcook-runner.101915.0'
counter_circuit_o_1order = r'.\runs\fcnn_v2\counter_circuit_o_1order\final\events.out.tfevents.1713658505.DESKTOP-I7N4NTT.36036.0'


cramped_room = parse_tensorboard(cramped_room, scalers)
asymmetric_advantages = parse_tensorboard(asymmetric_advantages, scalers)
coordination_ring = parse_tensorboard(coordination_ring, scalers)
forced_coordination = parse_tensorboard(forced_coordination, scalers)
counter_circuit_o_1order = parse_tensorboard(counter_circuit_o_1order, scalers)

dfs = [
    cramped_room['charts/train_soup'],
    asymmetric_advantages['charts/train_soup'],
    coordination_ring['charts/train_soup'],
    forced_coordination['charts/train_soup'],
    counter_circuit_o_1order['charts/train_soup'],
]

for index, df in enumerate(dfs):
    print(layout_names[index])
    print(df[df['value'] > 0]['step'][:5].to_list())