In [None]:
import pandas as pd
from scipy.stats import mannwhitneyu
import seaborn as sns
from itertools import combinations, permutations
import numpy as np
import warnings
from os.path import join, dirname
from os import makedirs
from matplotlib import pyplot as plt
warnings.filterwarnings('ignore')

In [None]:
plt.rcParams.update({'font.size': 12})


pd.set_option('display.float_format', '{:.6g}'.format)
df_save_path = 'tmp/evaluation_results'

results_df = pd.read_pickle(df_save_path).fillna(np.nan)
results_df['algo'] = results_df['algo'].replace(['q_learning','mb_q_learning','policy_iteration','rmin'],['Q Learning', 'Model-Based Q Learning', 'Policy Iteration', 'R-Min'])
results_df['model_name'] = results_df['model_name'].replace(['model_final_h-20bits','model_aeq-20bits','model_h_c-20bits','untrained_model_final_h-20bits','untrained_model_aeq-20bits','untrained_model_h_c-20bits',],['DiscFinalH', 'DiscLSTM-AE', 'DiscHC', 'Untrained DiscFinalH', 'Untrained DiscLSTM-AE', 'Untrained DiscHC'])
results_df = results_df[(results_df['save_type'] == 'x_interval') | (results_df['save_type'].isna())]
algos = results_df['algo'].unique().tolist()
results_df['log_model_loss'] = np.log10(results_df['model_loss'])
trained_results_df = results_df[results_df['trained_model']==True]
const_dataset_results_df = results_df[(results_df['dataset_size']==10000)|(results_df['dataset_size'].isnull())]
const_dataset_trained_results_df = const_dataset_results_df[const_dataset_results_df['trained_model']==True]
results_df

In [None]:
def save_fig(plot_obj, fig_save_path):
    makedirs(dirname(fig_save_path), exist_ok=True)
    plot_obj.savefig(fig_save_path,bbox_inches = 'tight')

In [None]:
def plot_bar_graph(df,x,y='mean',yerr_idx='std',ylim = None, figsize=None, save_path=None,**kwargs):
    figsize = figsize if figsize else (12, 7)
    plt.figure(figsize=figsize)
    ax = sns.barplot(x=x, y=y, data=df,**kwargs)
    if ylim:
        ax.set_ylim(ylim)
    x_coords = [p.get_x() + 0.5 * p.get_width() for p in ax.patches]
    y_coords = [p.get_height() for p in ax.patches]
    ax.errorbar(x=x_coords, y=y_coords, yerr=df[yerr_idx], fmt="none", c="k", capsize=8)
    if save_path:
        save_fig(ax.get_figure(),save_path)

def box_plot(df,x,y='mean',ylim = None, xlim=None,figsize=None, save_path=None, major_ticks=np.arange(-2000, -150, 200), minor_ticks = np.arange(-2000, -150, 100),legend_title=None,**kwargs):
    df = df.copy()
    if df[y].dtypes.name == 'bool':
        df[y] = df[y].map({True: 'True', False: 'False'})
    figsize = figsize if figsize else (12, 7)
    plt.figure(figsize=figsize)
    ax = sns.boxplot(x=x, y=y, data=df,**kwargs)
    if ylim:
        ax.set_ylim(ylim)
    if xlim:
        ax.set_xlim(xlim)


    ax.set_xticks(major_ticks)
    ax.set_xticks(minor_ticks, minor=True)


    ax.grid(which='minor', alpha=0.2)
    ax.grid(which='major', alpha=0.5)
    leg = plt.legend()
    for lh in leg.legendHandles:
        lh.set_alpha(0.3)

    if legend_title:
        ax.get_legend().set_title(legend_title)
    if save_path:
        save_fig(ax.get_figure(),save_path)

def dist_plot(df,x,y='mean',ylim = None, xlim=None,figsize=None, save_path=None, major_ticks=np.arange(-2000, -150, 200), minor_ticks = np.arange(-2000, -150, 100),legend_title=None,**kwargs):

    df = df.copy()

    figsize = figsize if figsize else (12, 7)
    plt.figure(figsize=figsize)
    ax = sns.displot(x=x, y=y, data=df,**kwargs)
    if ylim:
        ax.set_ylim(ylim)
    if xlim:
        ax.set_xlim(xlim)


    ax.set_xticks(major_ticks)
    ax.set_xticks(minor_ticks, minor=True)


    ax.grid(which='minor', alpha=0.2)
    ax.grid(which='major', alpha=0.5)
    leg = plt.legend()
    for lh in leg.legendHandles:
        lh.set_alpha(0.3)

    if legend_title:
        ax.get_legend().set_title(legend_title)
    if save_path:
        save_fig(ax.get_figure(),save_path)


# Test If Training helps
$H_{0}$: Training the discretization model has no effect on the final rewards
$H_{a}$: Training the discretization model results in greater rewards

In [None]:
training_hypothesis_columns = ['algo','x','y','p-value']
training_hypothesis_df = pd.DataFrame(columns=training_hypothesis_columns)
for algo in results_df['algo'].unique():

    trained_rewards = const_dataset_results_df[const_dataset_results_df['algo']==algo][const_dataset_results_df['trained_model']==True]['rewards'].tolist()
    untrained_rewards = const_dataset_results_df[const_dataset_results_df['algo']==algo][const_dataset_results_df['trained_model']==False]['rewards'].tolist()
    #sns_plot = sns.displot(const_dataset_results_df[const_dataset_results_df['algo']==algo], x="rewards", hue="trained_model", kind="kde", fill=True, label=algo)

    #save_fig(sns_plot,fig_save_path)
    corr, p_value = mannwhitneyu(trained_rewards,untrained_rewards, alternative='greater')
    new_row = pd.DataFrame([[algo,True,False,p_value]], columns=training_hypothesis_columns )
    training_hypothesis_df = pd.concat((training_hypothesis_df,new_row))
print(training_hypothesis_df)
fig_save_path = join('tmp','results','training_test')
box_plot(df=const_dataset_results_df, x="rewards", y="algo",hue='trained_model',xlim=[-2000,-100],save_path=fig_save_path,boxprops=dict(alpha=.3),figsize=(10,12),legend_title='Model Trained?')

In [None]:

algo_combinations = list(permutations(algos,2))
algo_combinations

# Test if results from one algorithm is better than other
$H_{0}$: Algorithm A results in rewards same as B
$H_{a}$: Algorithm A results in rewards greater then B

In [None]:
algo_comp_hypothesis_columns = ['algo_A','algo_B','p-value']
algo_comp_hypothesis_df = pd.DataFrame(columns=algo_comp_hypothesis_columns)
for algo_combination in algo_combinations:
    print(algo_combination)
    trained_rewards_algo = [const_dataset_results_df[const_dataset_results_df['algo']==algo][const_dataset_results_df['trained_model']==True]['rewards'].tolist() for algo in algo_combination]



    _, p_value = mannwhitneyu(trained_rewards_algo[0],trained_rewards_algo[1], alternative='greater')
    new_row = pd.DataFrame([[algo_combination[0],algo_combination[1],p_value]], columns=algo_comp_hypothesis_columns )
    algo_comp_hypothesis_df = pd.concat((algo_comp_hypothesis_df,new_row))
print(algo_comp_hypothesis_df)
print(algo_comp_hypothesis_df.reset_index(drop=True).to_latex())
fig_save_path = join('tmp','results','algo_compare')
box_plot(df=const_dataset_results_df[const_dataset_results_df['trained_model']==True],x="rewards",y='algo',figsize=(7,12),boxprops=dict(alpha=.3),save_path=fig_save_path,major_ticks=np.arange(-1000, -150, 100), minor_ticks = np.arange(-1000, -150, 50))

In [None]:
def calculate_corr(df,x,y):
    col = ['algo','corr','p_value']
    results_df = pd.DataFrame(columns=col)
    for algo in df['algo'].unique():
        corr, p_value = scipy.stats.kendalltau(df[df['algo']==algo][x].tolist(), df[df['algo']==algo][y].tolist())
        new_row = pd.DataFrame([[algo,corr,p_value]], columns=col )
        results_df = pd.concat((results_df,new_row))
    return results_df

# Correlation between rewards and dataset size
$H_{0}$: Rewards are independent from the dataset size used for the offline algorithms
$H_{a}$: Rewards are dependent on the dataset size used for the offline algorithms

In [None]:
import scipy

offline_algos = ['R-Min', 'Policy Iteration']
offline_trained_df = trained_results_df[trained_results_df['algo'].isin(offline_algos)]

dataset_size_corr_df = calculate_corr(offline_trained_df,'dataset_size','rewards')
fig_save_path = join('tmp','results','dataset_compare')
box_plot(df=offline_trained_df,x="rewards",y='algo',hue='dataset_size',figsize=(7,12),boxprops=dict(alpha=.3),save_path=fig_save_path,major_ticks=np.arange(-1000, -150, 100), minor_ticks = np.arange(-1000, -150, 50),legend_title='Dataset Size (Episodes)')
#print(dataset_size_corr_df)
print(dataset_size_corr_df.reset_index(drop=True).to_latex())

In [None]:
offline_trained_df[offline_trained_df['algo']=='rmin']

# Correlation between rewards and model loss
$H_{0}$: Rewards are independent from the loss of the discretizing model
$H_{a}$: Rewards are dependent on the loss of the discretizing model

In [None]:

loss_test_df = const_dataset_trained_results_df[const_dataset_trained_results_df['dataset_size'].isin([np.nan,10000])]
model_loss_corr_df = calculate_corr(loss_test_df,'model_loss','rewards')

print(model_loss_corr_df)
print(model_loss_corr_df.to_latex())

# Correlation between rewards and log model loss
$H_{0}$: Rewards are independent from the log loss of the discretizing model
$H_{a}$: Rewards are dependent on the log loss of the discretizing model

In [None]:
log_model_loss_corr_df = calculate_corr(loss_test_df,'log_model_loss','rewards')
print(model_loss_corr_df)

# Correlation between rewards and total_states
$H_{0}$: Rewards are independent from the total states in the policy
$H_{a}$: Rewards are dependent on the total states in the policy

In [None]:
for model in loss_test_df['model_name'].unique():
    print(model)
    total_states_corr_df = calculate_corr(loss_test_df[loss_test_df['model_name']==model],'total_states', 'rewards')
    #print(total_states_corr_df)
    print(total_states_corr_df.to_latex())
box_plot(loss_test_df,x='total_states',y='model_name', major_ticks=np.arange(0, 8000, 500), minor_ticks = np.arange(0, 8000, 250),save_path=join('tmp','results','total-states-dist'),boxprops=dict(alpha=.3))

# Correlation between rewards and observed states
$H_{0}$: Rewards are independent from the observed states during evaluation
$H_{a}$: Rewards are dependent on the observed states during evaluation

In [None]:
observed_states_corr_df = calculate_corr(loss_test_df,'unique_obs', 'rewards')
print(observed_states_corr_df)

In [None]:
for model in results_df['model_name'].unique():
    mean_loss_df = results_df.groupby('model_name')['model_loss'].agg(['mean','std']).reset_index()
mean_loss_df

In [None]:
box_plot(const_dataset_results_df,x='new_unique_obs',y='algo',hue='model_name',major_ticks=np.arange(0, 200, 10), minor_ticks = np.arange(0, 200, 5),save_path=join('tmp','results','new-unique-obs-dist'),boxprops=dict(alpha=.3))


# Test if results from one model are better than other
$H_{0}$: Model A results in rewards same as B
$H_{a}$: Model A results in rewards greater then B

In [None]:
model_comp_hypothesis_columns = ['Model A','Model B','p-Value']
model_comp_hypothesis_df = pd.DataFrame(columns=algo_comp_hypothesis_columns)
models = const_dataset_trained_results_df['model_name'].unique()
model_permutations = list(permutations(models,2))
for model_combination in model_permutations:
    trained_rewards_algo1 = [const_dataset_trained_results_df[const_dataset_trained_results_df['model_name']==model]['rewards'].tolist() for model in model_combination]

    _, p_value = mannwhitneyu(trained_rewards_algo1[0],trained_rewards_algo1[1], alternative='greater')
    new_row = pd.DataFrame([[model_combination[0],model_combination[1],p_value]], columns=model_comp_hypothesis_columns )
    model_comp_hypothesis_df = pd.concat((model_comp_hypothesis_df,new_row))


print(model_comp_hypothesis_df.to_latex())
box_plot(const_dataset_trained_results_df,x='rewards',y='algo',hue='model_name',major_ticks=np.arange(-1000, -150, 100), minor_ticks = np.arange(-1000, -150, 50),legend_title='Discretization Models',save_path=join('tmp','results','model-reward-dist'),boxprops=dict(alpha=.3),figsize=(7,12))

In [None]:

print(const_dataset_trained_results_df.groupby(['exp_id','model_name','algo']).agg('mean').reset_index().groupby(['model_name','algo']).agg('max').reset_index().to_latex())
const_dataset_trained_results_df.groupby(['exp_id','model_name','algo']).agg('mean').reset_index().groupby(['model_name','algo']).agg('max').reset_index()

# Mean Algo rewards per experiment

In [None]:


exp_algo_grouped_mean_rewards_df = trained_results_df.groupby(['exp_id','algo','model_name','dataset_size'],dropna=False)['rewards'].agg(['mean','std','max','count']).reset_index()

exp_algo_grouped_mean_rewards_red_df = exp_algo_grouped_mean_rewards_df.sort_values('mean').drop_duplicates(['algo'],keep='last')

ppo_df = pd.DataFrame([[np.nan,'PPO',np.nan,np.nan,-185.24,0.88, np.nan,np.nan]],columns=['exp_id', 'algo', 'model_name', 'dataset_size', 'mean', 'std', 'max',
       'count'])
exp_algo_grouped_mean_rewards_red_df = exp_algo_grouped_mean_rewards_red_df.append(ppo_df)
exp_algo_grouped_mean_rewards_red_df = exp_algo_grouped_mean_rewards_red_df.sort_values(['mean'])
plot_bar_graph(df=exp_algo_grouped_mean_rewards_red_df,x='algo', ylim=[-230, -175],save_path=join('tmp','results','algo_best_rewards'), alpha=.3)

exp_algo_grouped_mean_rewards_red_df

In [None]:
results_df.groupby(['exp_id','model_name','dataset_size'])


box plot of training variances

In [None]:
exp_algo_grouped_mean_rewards_df

In [None]:
const_dataset_trained_results_df.groupby(['algo','model_name']).agg('mean').reset_index()

In [None]:
tmp_df = const_dataset_trained_results_df.copy()
tmp_df['exp_id'] = tmp_df['exp_id'].astype('category').cat.codes+1
save_path = join('tmp','results','training_variations')
box_plot(df=tmp_df,x='rewards',y='algo',hue='model_name' ,major_ticks=np.arange(-1000, -150, 100), minor_ticks = np.arange(-1000, -150, 50),boxprops=dict(alpha=.3),figsize=(8,17),legend_title='Discrete Model')
# for algo in algos:
#     print(algo)
#     save_path = join('tmp','results','training_variations',f'{algo} variation')
#     box_plot(df=tmp_df[tmp_df['algo']==algo],x='rewards',y='model_name' ,save_path=save_path,boxprops=dict(alpha=.3))



In [None]:
tmp_df[tmp_df['algo']=='Q Learning']