# Evaluation Analysis


In [None]:
import os
import sys
import logging
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
sys.path.append(os.path.join(os.getcwd(), '..'))

from rlq_scheduler.common.config_helper import GlobalConfigHelper
from rlq_scheduler.common.plot_utils.stats import get_stats_dataframe, load_agents_data,\
    print_status, get_assignment_history_df
from rlq_scheduler.common.object_handler import MinioObjectHandler

sns.set(style="darkgrid", font_scale=2)
# reference here https://matplotlib.org/3.1.1/tutorials/introductory/customizing.html
rc = {
    'lines.linewidth': 1,
    'lines.markersize': 10,
    'axes.titlesize': 18,
    'axes.labelsize': 16,
    'xtick.labelsize': 16,
    'ytick.labelsize': 16,
    'legend.fancybox': False,
    'figure.titlesize' : 18,
    'legend.fontsize': 16,
    'legend.title_fontsize': 16
}
# sns.set_context("notebook", rc=rc)

%matplotlib inline

logging.basicConfig(level=logging.INFO, format="%(processName)-10s | %(message)s")
logger = logging.getLogger()

global_config = GlobalConfigHelper(config_path='config/global.yml')
global_config.config['object_handler']['default_bucket'] = 'sb1'

np.random.seed(42)

In [None]:
if not os.path.exists('/jupyter/evaluation-images'):
    os.mkdir('/jupyter/evaluation-images')
    
BASE_PATH = '/jupyter/evaluation-images/multi-load'
if not os.path.exists(BASE_PATH):
    os.mkdir(BASE_PATH)

rew_names = ['waiting-time', 'execution-time', 'execution-cost']
for name in rew_names:
    path = os.path.join(BASE_PATH, name)
    if not os.path.exists(path):
        os.mkdir(path)

In [None]:

endpoint = "10.25.1.120:9000" # os.getenv('MINIO_ENDPOINT')
access_key = os.getenv('MINIO_ACCESSKEY')
secret_key = os.getenv('MINIO_SECRETKEY')
secure = bool(os.getenv('MINIO_SECURE'))
handler = MinioObjectHandler(
    endpoint=endpoint,
    access_key=access_key,
    secret_key=secret_key,
    secure=secure,
    default_bucket=global_config.object_handler_base_bucket()
)

In [None]:
def build_datatable(prop, parent_prop):
    data = []
    columns = [prop, 't', 'agent', 'seed']
    for agent, info in agents.items():
        for stats in info['runs_stats']:
            prop_values = stats.to_dict()[parent_prop][prop]
            seed = stats.agent_stats['agent_parameters']['agent_seed']
            for i, v in enumerate(prop_values):
                data.append([float(v), i, agent, seed])

    return pd.DataFrame(data=data, columns=columns)


In [None]:
def create_aggregated_reward_df(agent_data, window_size=50):
    data = []
    columns = ['cumulative_reward', 'reward', 'timestep', 'seed', 'delta', 'agent_name', 'run_code']
    for agent_name, info in agent_data.items():
        for stats in info['runs_stats']:
            seed = int(stats.agent_stats['agent_parameters']['agent_seed'])
            if agent_name == 'LinUCB':
                delta = float(stats.agent_stats['agent_parameters']['delta'])
            else:
                delta = None
            run_code = stats.run_code
            cum_reward = 0
            cum_window = []
            rew_window = []
            for i, reward in enumerate(stats.execution_history_stats['reward']):
                cum_reward += float(reward)
                cum_window.append(cum_reward)
                rew_window.append(float(reward))
                if i % window_size == 0:
                    cum_avg = sum(cum_window)/len(cum_window)
                    rew_avg = sum(rew_window)/len(rew_window)
                    time_step = i // window_size
                    data.append([
                        cum_avg,
                        rew_avg,
                        time_step,
                        seed,
                        delta,
                        agent_name,
                        run_code
                    ])
                    cum_window = []
                    rew_window = []


    return pd.DataFrame(data=data, columns=columns)

In [None]:
default_steps = (
    {'label': '$\lambda$ = 60', 'color': 'tab:olive', 'value': 0, 'linestyle': '--', 'linewidth': 2},
    {'label': '$\lambda$ = 600', 'color': 'tab:cyan', 'value': 1000, 'linestyle': '--', 'linewidth': 2},
    {'label': '$\lambda$ = 30', 'color': 'tab:pink', 'value': 2000, 'linestyle': '--', 'linewidth': 2}
)

def plot_assignments_data(df, sup_title, name, code, save_image=True,
                          figsize=(24, 16), window_size=25, steps=default_steps,
                          ytext_1=None, ytext_2=None,
                          legend_1_x=None, legend_1_y=None, legend_1_loc='best',
                          legend_2_x=None, legend_2_y=None, legend_2_loc='best'
                         ):
    fig, axs = plt.subplots(2, 1, figsize=figsize)
    # fig.suptitle(sup_title, fontsize=18)
    
    def add_vlines(axes, text_y):
        for step in steps:
            step_value = step['value'] / window_size
            axes.axvline(step_value, label=step['label'], c=step['color'],
                         linestyle=step['linestyle'], linewidth=step['linewidth'])
            axes.text(step_value + 0.3, text_y, step['label'], rotation=0, verticalalignment='baseline', fontsize=16)
    
    sns.lineplot(data=df, x="time_step", y="reward", hue="agent", ax=axs[0])
    # axs[0].set_title('Average reward per time window of {} time steps'.format(window_size))
    axs[0].set_ylabel('Average reward')
    axs[0].set_xlabel(f'Time window ({window_size} steps)')
    if legend_1_x is not None and legend_1_y is not None:
        axs[0].legend(bbox_to_anchor=(legend_1_x, legend_1_y),borderaxespad=0, loc=legend_1_loc)
    else:
        axs[0].legend(bbox_to_anchor=(1.01, 1),borderaxespad=0, loc=legend_1_loc)
    min_v = df.reward.min()
    max_v = df.reward.max()
    step = ((min_v - max_v) // 5) * 2.5
    if ytext_1 is not None:
        step = ytext_1
    
    add_vlines(axs[0], step)

    sns.lineplot(data=df, x="time_step", y="cumulative_reward", hue="agent", ax=axs[1])
    # axs[1].set_title('Cumulative reward every {} steps'.format(window_size))
    axs[1].set_ylabel('Cumulative reward')
    axs[1].set_xlabel(f'Time window ({window_size} steps)')
    if legend_2_x is not None and legend_2_y is not None:
        axs[1].legend(bbox_to_anchor=(legend_2_x, legend_2_y),borderaxespad=0, loc=legend_2_loc)
    else:
        axs[1].legend(bbox_to_anchor=(1.01, 1),borderaxespad=0, loc=legend_2_loc)
    min_v = df.cumulative_reward.min()
    max_v = df.cumulative_reward.max()
    step = (min_v - max_v) // 5 * 3
    if ytext_2 is not None:
        step = ytext_2

    add_vlines(axs[1], step)
    
    if save_image:
        fig.savefig(f'{BASE_PATH}/{code}/evaluation-history.pdf', bbox_inches='tight')

    plt.show()

In [None]:
def rename_agents(df, names):
    for agent_old, agent_new in names.items():
        df.replace({'agent': agent_old}, agent_new, inplace=True)
    return df

## Waiting Time

In [None]:
RESULT_FOLDER = 'eval-new-multi-waiting-time'
agents = {
    'LinUCB': {
        'runs_names': [],
        'runs_stats': []
    },
    'DoubleDQN': {
        'runs_names': [],
        'runs_stats': []
    },
    'Random': {
        'runs_names': [],
        'runs_stats': []
    },
    'LRU': {
        'runs_names': [],
        'runs_stats': []
    }
}
agents_wt = load_agents_data(agents, handler, RESULT_FOLDER)

In [None]:
RESULT_FOLDER = 'new-baseline-waiting-time-multi-load'
baselines = {
    'E-PVM': {
        'runs_names': [],
        'runs_stats': []
    }
}
baselines_wt = load_agents_data(baselines, handler, RESULT_FOLDER)
for agent, data in baselines_wt.items():
    agents_wt[agent] = data

In [None]:
rewards_df_wt = get_stats_dataframe(
    agents_wt,
    {
        'label': 'total_reward',
        'name': 'reward',
        'aggregation': 'sum'
    }
)

### Reward assignment history analysis

In [None]:
window_size = 25
history_df_wt = get_assignment_history_df(agents_wt, window_size=window_size)     
history_df_wt = rename_agents(history_df_wt, {'LinUCB': 'RLQ-LinUCB', 'DoubleDQN': 'RLQ-DoubleDQN', 'E-PVM': 'E-PVM'})

In [None]:
plot_assignments_data(
    history_df_wt, 
    'Waiting Time - Evaluation results with multi load',
    'Waiting Time', 
    'waiting-time', 
    save_image=True,
    figsize=(10, 22),
    window_size=window_size,
    ytext_1=-880,
    ytext_2=-44000,
    legend_1_x=0.45,
    legend_1_y=0.42,
    legend_2_x=0.45,
    legend_2_y=0.42
)

## Execution Time

In [None]:
RESULT_FOLDER = 'eval-new-multi-execution-time'
agents = {
    'LinUCB': {
        'runs_names': [],
        'runs_stats': []
    },
    'DoubleDQN': {
        'runs_names': [],
        'runs_stats': []
    },
    'Random': {
        'runs_names': [],
        'runs_stats': []
    },
    'LRU': {
        'runs_names': [],
        'runs_stats': []
    }
}
agents_et = load_agents_data(agents, handler, RESULT_FOLDER)

In [None]:
RESULT_FOLDER = 'new-baseline-execution-time-multi-load'
baselines = {
    'E-PVM': {
        'runs_names': [],
        'runs_stats': []
    }
}
baselines_et = load_agents_data(baselines, handler, RESULT_FOLDER)
for agent, data in baselines_et.items():
    agents_et[agent] = data

### Reward assignment history analysis

In [None]:
window_size = 25
history_df_et = get_assignment_history_df(agents_et, window_size=window_size)  
history_df_et = rename_agents(history_df_et, {'LinUCB': 'RLQ-LinUCB', 'DoubleDQN': 'RLQ-DoubleDQN', 'E-PVM': 'E-PVM'})

In [None]:
plot_assignments_data(
    history_df_et, 
    'Execution Time - Evaluation results with multi load',
    'Execution Time', 
    'execution-time', 
    save_image=True,
    figsize=(10, 22),
    window_size=window_size,
    ytext_1=-0.45,
    ytext_2=-420,
    legend_1_x=0.65,
    legend_1_y=0.31,
    legend_2_x=0.45,
    legend_2_y=0.42
)

## Execution Cost

In [None]:
RESULT_FOLDER = 'eval-new-multi-execution-cost'
agents = {
    'LinUCB': {
        'runs_names': [],
        'runs_stats': []
    },
    'DoubleDQN': {
        'runs_names': [],
        'runs_stats': []
    },
    'Random': {
        'runs_names': [],
        'runs_stats': []
    },
    'LRU': {
        'runs_names': [],
        'runs_stats': []
    }
}
agents_ec = load_agents_data(agents, handler, RESULT_FOLDER)

In [None]:
RESULT_FOLDER = 'new-baseline-execution-cost-multi-load'
baselines = {
    'E-PVM': {
        'runs_names': [],
        'runs_stats': []
    }
}
baselines_ec = load_agents_data(baselines, handler, RESULT_FOLDER)
for agent, data in baselines_ec.items():
    agents_ec[agent] = data

In [None]:
rewards_df_ec = get_stats_dataframe(
    agents_ec,
    {
        'label': 'total_reward',
        'name': 'reward',
        'aggregation': 'sum'
    }
)

### Reward assignment history analysis

In [None]:
window_size = 25
history_df_ec = get_assignment_history_df(agents_ec, window_size=window_size)  
history_df_ec = rename_agents(history_df_ec, {'LinUCB': 'RLQ-LinUCB', 'DoubleDQN': 'RLQ-DoubleDQN', 'E-PVM': 'E-PVM'})

In [None]:
plot_assignments_data(
    history_df_ec, 
    'Execution Cost - Evaluation results with multi load',
    'Execution Cost', 
    'execution-cost', 
    save_image=True,
    figsize=(10, 22),
    window_size=window_size,
    ytext_1=-2.2,
    ytext_2=-950,
    legend_1_x=0.4,
    legend_1_y=0.3,
    legend_2_x=0.44,
    legend_2_y=0.42
)

In [None]:
rewards_df_ec.groupby(['agent']).max()

In [None]:
history_df_wt = history_df_wt.assign(reward_function='Waiting Time')
history_df_et = history_df_et.assign(reward_function='Execution Time')
history_df_ec = history_df_ec.assign(reward_function='Execution Cost')


In [None]:
# combined_df = pd.concat([history_df_wt, history_df_et, history_df_ec, history_df_make], ignore_index=True)
combined_df = pd.concat([history_df_wt, history_df_et, history_df_ec], ignore_index=True)
combined_df = rename_agents(combined_df, {'E-PVM': 'E-PVM'})
combined_df['reward'] = combined_df['reward'] * -1
combined_df['cumulative_reward'] = combined_df['cumulative_reward'] * -1
combined_df

In [None]:
combined_df.groupby(['agent', 'reward_function']).count()

In [None]:
combined_df[combined_df.reward_function == 'Waiting Time'].reward.min()

In [None]:
sns.set(style="darkgrid", font_scale=2.8)

fig, axs = plt.subplots(2, 3, figsize=(40, 16))

reward_functions = ['Execution Time', 'Execution Cost', 'Waiting Time']
letters = ['(a)', '(b)', '(c)']
n_agents = len(list(combined_df.groupby(['agent']).count().index)) 


default_steps = (
    {'label': '$\lambda$ = 60', 'color': 'tab:olive', 'value': 0, 'linestyle': '--', 'linewidth': 2},
    {'label': '$\lambda$ = 600', 'color': 'tab:cyan', 'value': 1000, 'linestyle': '--', 'linewidth': 2},
    {'label': '$\lambda$ = 30', 'color': 'tab:pink', 'value': 2000, 'linestyle': '--', 'linewidth': 2}
)


def add_vlines(axes, text_y):
    for step in default_steps:
        step_value = step['value'] / window_size
        axes.axvline(step_value, label=step['label'], c=step['color'],
                     linestyle=step['linestyle'], linewidth=step['linewidth'])
        # axes.text(step_value + 0.3, text_y, step['label'], rotation=0, verticalalignment='baseline', fontsize=16)

for i, rew_func in enumerate(reward_functions):
    filterd_df = combined_df[combined_df.reward_function == rew_func]
    sns.lineplot(data=filterd_df, x="time_step", y="reward", hue="agent", ax=axs[0, i])
    axs[0, i].set_title(f'{letters[i]} {rew_func}')
    axs[0, i].set_xlabel('')
    if i == 0:
        axs[0, i].set_ylabel('Average metric value')
    else:
        axs[0, i].set_ylabel('')
    axs[0, i].legend_.remove()
    axs[0, i].set_xticks([])
    
    min_v = filterd_df.reward.min()
    max_v = filterd_df.reward.max()
    step = ((min_v - max_v) // 5) * 2.5

    add_vlines(axs[0, i], step)
    
    sns.lineplot(data=filterd_df, x="time_step", y="cumulative_reward", hue="agent", ax=axs[1, i])
    # axs[1].set_title('Cumulative reward every {} steps'.format(window_size))
    if i == 0:
        axs[1, i].set_ylabel('Cumulative metric value')
    else:
        axs[1, i].set_ylabel('')
    axs[1, i].set_xlabel(f'Time window ({window_size} steps)')
    axs[1, i].legend_.remove()
    
    min_v = filterd_df.cumulative_reward.min()
    max_v = filterd_df.cumulative_reward.max()
    step = (min_v - max_v) // 5 * 3

    add_vlines(axs[1, i], step)

handles, labels = axs[0,0].get_legend_handles_labels()
fig.legend(handles, labels, loc='upper center', bbox_to_anchor=(0.5, 1.05),
           ncol=n_agents + len(default_steps), frameon=False, fancybox=False)

fig.tight_layout()

if not os.path.exists(f'{BASE_PATH}/all_rewards'):
    os.mkdir(f'{BASE_PATH}/all_rewards')
fig.savefig(f'{BASE_PATH}/all_rewards/evaluation-history-multi-load.pdf', bbox_inches='tight', dpi=plt.gcf().dpi)
