# Load Data

In [7]:
import os
import tensorflow as tf
from tensorflow.core.util import event_pb2
import glob
import numpy as np
import pandas as pd
from scipy.stats import f_oneway, pearsonr
from scipy.special import rel_entr
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

figsize=(15,10)
fontsize = 45
export_dir = './plots_bar/'
load_dir = './data_tournament/'
filetype = 'pdf'

env_name = 'prisoners_dilemma'
row_name = ['S', 'M', 'L', 'O']
col_name = ['SP5', 'PP3', 'PP5']

row_name_dict = {'S': 'Small',
                 'M': 'Medium',
                 'L': 'Large',
                 'O': 'Obstacle'}

col_name_dict = {'SP5': 'SP 5 Seeds',
                 'PP3': 'PP 3 Seeds',
                 'PP5': 'PP 5 Seeds',
                 'FCP3': 'FCP 3 Seeds',
                 'FCP5': 'FCP 5 Seeds'}

sns.set()
sns.set_context("paper")
sns.set(rc={'figure.figsize':figsize})
sns.set_style("whitegrid")
palette = ["#f8ac8c", "#9ac9db", "#2878b5"]

# Tournament DF

In [8]:
rewards = np.empty((len(row_name), len(col_name), 2)) # mean & variance
tour_df = pd.DataFrame()
for row in np.arange(len(row_name)):
    for col in np.arange(len(col_name)):
        loaded = np.load(os.path.join(load_dir,env_name+'_'+row_name[row]+'_'+col_name[col]+'.npz'))
        # print(np.shape(loaded['rewards']))
        current_reward = np.mean(loaded['rewards'], axis=1).flatten()
        temp_df = pd.DataFrame({'rewards': current_reward, 
                                'config': np.repeat(row_name_dict[row_name[row]], len(current_reward)),
                                'method': np.repeat(col_name_dict[col_name[col]], len(current_reward))})
        tour_df = tour_df._append(temp_df, ignore_index=True)
tour_df

Unnamed: 0,rewards,config,method
0,59.381732,Small,SP 5 Seeds
1,64.212515,Small,SP 5 Seeds
2,59.201329,Small,SP 5 Seeds
3,60.340010,Small,SP 5 Seeds
4,59.259974,Small,SP 5 Seeds
...,...,...,...
715,52.268217,Obstacle,PP 5 Seeds
716,56.331844,Obstacle,PP 5 Seeds
717,52.823335,Obstacle,PP 5 Seeds
718,55.843384,Obstacle,PP 5 Seeds


# P-Value

In [9]:
for row in np.arange(len(row_name)):
    F, p = f_oneway(tour_df.loc[(tour_df['method'] == 'SP 5 Seeds') & (tour_df['config'] == row_name_dict[row_name[row]])]['rewards'].to_numpy(),
        tour_df.loc[(tour_df['method'] == 'PP 3 Seeds') & (tour_df['config'] == row_name_dict[row_name[row]])]['rewards'].to_numpy(),
        tour_df.loc[(tour_df['method'] == 'PP 5 Seeds') & (tour_df['config'] == row_name_dict[row_name[row]])]['rewards'].to_numpy())
    print(f'{row_name_dict[row_name[row]]}:  \t F-statistic: {F:.4}\tp-value: {p:.2e}')

Small:  	 F-statistic: 72.48	p-value: 1.01e-23
Medium:  	 F-statistic: 288.7	p-value: 1.90e-56
Large:  	 F-statistic: 75.46	p-value: 1.99e-24
Obstacle:  	 F-statistic: 43.89	p-value: 3.31e-16


# Index

In [10]:
env_name_list = [f'{env_name}_{_}' for _ in row_name]
load_dir_list = [f'./data_index/{_}_5M_2.npz' for _ in env_name_list]

index = []

for load_dir in load_dir_list:
    loaded = np.load(load_dir)
    rewards = loaded['rewards']
    checkpoints = loaded['checkpoints']
    rewards = rewards[0,...] # only have 1 evaluation seed
    mean_rewards = np.mean(rewards,axis=0)
    # np.shape(mean_rewards)
    index_list = []
    for seed in [0,1,2,3,4]:
        current_seed_list = []
        for eval_gen in range(np.shape(rewards)[0]):
            row_rewards = rewards[eval_gen,seed,:,:,0]
            col_rewards = rewards[eval_gen,seed,:,:,1]
            # start = np.min(row_rewards)
            # end = np.max(row_rewards)
            start = 0
            end = 100
            num_bins = 50
            # marginal distribution
            total_rewards = row_rewards.flatten()
            marginal_hist, marginal_bin_edges = np.histogram(total_rewards, bins=num_bins, range=(start, end))
            marginal_distribution = marginal_hist / sum(marginal_hist)
            # KL divergence
            KL_divergence = np.zeros(len(row_rewards))
            for i in range(len(row_rewards)):
                hist, bin_edges = np.histogram(row_rewards[i], bins=num_bins, range=(start, end))
                distribution = hist / sum(hist)
                KL_divergence[i] = sum(rel_entr(distribution,marginal_distribution))
            # index
            cur_index = sum(KL_divergence)/len(KL_divergence)
            index_list.append(cur_index)
            current_seed_list.append(cur_index
    print(f'{load_dir} Avg: {np.mean(index_list):.3f} +/- {np.std(index_list):.2f}')
    index.append(np.mean(index_list))
print(index)

./data_index/prisoners_dilemma_S_5M_2.npz Avg: 1.377 +/- 0.11
./data_index/prisoners_dilemma_M_5M_2.npz Avg: 1.385 +/- 0.11
./data_index/prisoners_dilemma_L_5M_2.npz Avg: 1.180 +/- 0.09
./data_index/prisoners_dilemma_O_5M_2.npz Avg: 1.100 +/- 0.12
[1.3768534893524849, 1.385400101613565, 1.1799314745893448, 1.0997269286408466]


# Difference

In [11]:
diff = []
for row in np.arange(len(row_name)):
    mean_1 = np.mean(tour_df.loc[(tour_df['method'] == 'SP 5 Seeds') & (tour_df['config'] == row_name_dict[row_name[row]])]['rewards'].to_numpy())
    mean_2 = np.mean(tour_df.loc[(tour_df['method'] == 'PP 3 Seeds') & (tour_df['config'] == row_name_dict[row_name[row]])]['rewards'].to_numpy())
    mean_3 = np.mean(tour_df.loc[(tour_df['method'] == 'PP 5 Seeds') & (tour_df['config'] == row_name_dict[row_name[row]])]['rewards'].to_numpy())
    diff.append((mean_3-mean_1)/2)
print(diff)

[7.053505236390436, 9.468815777422634, 3.7930557074761815, 3.2388795821923395]


# Correlation

In [12]:
res = pearsonr(index, diff)
res

PearsonRResult(statistic=0.9388776522962613, pvalue=0.06112234770373881)