# Automatic Evaluator

In [2]:
%%capture
#########################
# Requirements and Imports
#########################
%pip install -e ..

import inspect
import importlib.util
import sys

import ipywidgets as widgets
from pathlib import Path

import torch
import numpy as np
import random

import matplotlib.pyplot as plt

import hackatari
import ocatari
from ocatari.core import OCAtari

sys.path.append('../')
import automatic_evaluator.game_execution as game_execution
import automatic_evaluator.eval as game_eval
import automatic_evaluator.plots.rewards as reward_plots
import automatic_evaluator.plots.actions as action_plots
import automatic_evaluator.plots.times as time_plots
import automatic_evaluator.plots.comparisons as comparison_plots
import automatic_evaluator.plots.plots as plots
import automatic_evaluator.plots.style as style

In [3]:
##############################################
# Default Value Initialization
##############################################
avail_games = ocatari.core.AVAILABLE_GAMES

hackatari_games = [x.name.replace('.py', '') for x in Path('../hackatari/games').glob('**/*') if x.is_file() and '__init__' not in x.name and 'cpython' not in x.name]
games = []
for g in avail_games:
    if g.lower() in hackatari_games:
        games.append(g)

games.sort()

game = 'Pong'
model_path = 'model.gz'
log_path = 'log.json'
game_modifications = ['']
selected_modifications = ['']
active_mods = []
agents = []
rewardfunc_path = None
dopamine_pooling = False
game_mode = 0
difficulty = 0
render_mode = "None"
obs_mode = "dqn"
mode = "ram"
hud = False
renderc_oc_overlay = True
buffer_window_size = 4
frameskip = 4
repeat_action_probability = 0.25
full_action_space = False
episodes = 10
log_infos = []
processed_logs = []


run = False # required so that game is not run on load

In [None]:
##############################################
# Helper Methods
##############################################

# Widget Helper Methods
from pathlib import Path
import time


def set_seed(reproducibility: bool=True, seed_value: int =0):
    if reproducibility:
        random.seed(seed_value)
        torch.manual_seed(seed_value)
        np.random.seed(seed_value)
        
        # Use deterministic Algorithms
        torch.use_deterministic_algorithms(True)
        torch.utils.deterministic.fill_uninitialized_memory = True


def set_log_path(log_path_value='log.json'):
    global log_path
    log_path = log_path_value

def set_modifications(modifications_value):
    global selected_modifications
    global active_mods
    selected_modifications = modifications_value
    active_mods = selected_modifications
    

def get_obs_mode_from(agent_path: str):
    if 'obj' in agent_path:
        return 'obj'
    else:
        return 'dqn'

def get_game_modifications_for(game: str = 'Pong'):
    global game_modifications
    try:
        game_modifications = [mod.split(':')[0][1:] for mod in hackatari.core._available_modifications(game).split('*')[1:]]
    except AttributeError:
        game_modifications = []
    return game_modifications

def get_game_modes_for(game: str = 'Pong'):
    global game_modes
    try:        
        game_modes = OCAtari(game).env.env.ale.getAvailableModes()
    except AttributeError:
        game_modes = []
    return game_modes

def get_difficulties_for(game: str = 'Pong'):
    global difficulties
    try:
        difficulties = OCAtari(game).env.env.ale.getAvailableDifficulties()
    except AttributeError:
        difficulties = []
    return difficulties

def set_episodes(episodes_value = 3):
    global episodes
    episodes = episodes_value
    
def set_dp(dopamine_pooling_value = False):
    global dopamine_pooling
    dopamine_pooling = dopamine_pooling_value
    
def set_obs_mode(obs_mode_value = 'dqn'):
    global obs_mode
    obs_mode = obs_mode_value

def set_window_size(window_size_value = 4):
    global buffer_window_size
    buffer_window_size = window_size_value
    
def set_frameskip(frameskip_value = 4):
    global frameskip
    frameskip = frameskip_value

def set_repeat_action_probability(rap_value = 0.25):
    global repeat_action_probability
    repeat_action_probability = rap_value
    
def set_game_mode(game_mode_value = 0):
    global game_mode
    game_mode = game_mode_value
    
def set_game_difficulty(difficulty_value = 0):
    global difficulty
    difficulty = difficulty_value

# Game Helper Methods
def run_game():
    global agents
    global active_mods
    global rewardfunc_path
    global dopamine_pooling
    global game_mode
    global difficulty
    global render_mode
    global obs_mode
    global mode
    global hud
    global renderc_oc_overlay
    global buffer_window_size
    global frameskip
    global repeat_action_probability
    global full_action_space
    global episodes
    global log_path
    global log_infos
    global processed_logs

    processed_logs = []
    log_infos = []


    #TODO allow for multiple agents
    log_file = Path(log_path).resolve()
    if not log_file.is_dir():
        log_file = log_file.parent
        
    # Run Game without modifications
    timestamp = time.strftime("%Y%m%d%H%M%S") # Used for naming the log files

    game_execution.eval_run(
        game = game,
        agents = [agents[0]],
        modifications = active_mods,
        #rewardfunc_path= rewardfunc_path,
        dopamine_pooling= dopamine_pooling,
        game_mode= game_mode,
        difficulty= difficulty,
        #render_mode= render_mode,
        obs_mode= obs_mode,
        #mode= mode,
        #hud= hud,
        #renderc_oc_overlay= renderc_oc_overlay,
        buffer_window_size= buffer_window_size,
        frameskip= frameskip,
        repeat_action_probability= repeat_action_probability,
        #full_action_space= full_action_space,
        episodes = episodes,
        log_file = str(log_file / f'logs_{timestamp}.json')
    )
    log_infos.append({"log_name": str(log_file / f'logs_{timestamp}.json'), "modifications": None, "game": game, "model": agents[0]})

    # Running Game with one modification at a time
    for modification in active_mods:
        game_execution.eval_run(
            game = game,
            agents = [agents[0]],
            modifications = active_mods,
            #rewardfunc_path= rewardfunc_path,
            dopamine_pooling= dopamine_pooling,
            game_mode= game_mode,
            difficulty= difficulty,
            #render_mode= render_mode,
            obs_mode= obs_mode,
            #mode= mode,
            #hud= hud,
            #renderc_oc_overlay= renderc_oc_overlay,
            buffer_window_size= buffer_window_size,
            frameskip= frameskip,
            #repeat_action_probability= repeat_action_probability,
            #full_action_space= full_action_space,
            episodes = episodes,
            log_file = str(log_file/f"logs_{timestamp}_{modification}.json")
        )
        log_infos.append({"log_name": str(log_file/f"logs_{timestamp}_{modification}.json"), "modifications": modification, "game": game, "model": agents[0]})

    logs = game_eval.load_logs(log_infos)
    processed_logs = game_eval.process_logs(logs, selected_game=game, model_path = agents[0])

    # Clean the logs
    for log in log_infos:
        Path(log["log_name"]).unlink()
        # # Remove the .gz file if it exists
        # print(log["log_name"][:-5] + ".gz")
        # if Path(log["log_name"][:-5] + ".gz").exists():
        #     Path(log["log_name"][:-5] + ".gz").unlink()



# Evaluation Helper Methods
def create_modif_distr_plot():
    global processed_logs
    global game
    reward_plots.plot_reward_modif_distribution(processed_logs, game)
    
def create_reward_progression_plot():
    global processed_logs
    global game
    reward_plots.plot_reward_progression(processed_logs, game)
    
def create_smoothed_acc_rewards_plot():
    global processed_logs
    global game 
    reward_plots.plot_non_filtered_smoothed_accumulated_rewards(processed_logs, sigma=30.0)

def create_acc_reward_plot():
    global processed_logs
    global game 
    reward_plots.plot_acummulated_reward(processed_logs, sigma=10.0)  
    
    # ############################### Actions ########################################
def create_distr_bar_plot():
    global processed_logs
    global game 
    action_plots.action_distribution_barchart(processed_logs, game)

def create_distr_heat_plot():
    global processed_logs
    global game 
    action_plots.plot_action_transition_heatmaps(processed_logs, game)

def create_distr_heat_corr_plot():
    global processed_logs
    global game 
    action_plots.plot_action_transition_heatmaps_corr(processed_logs, game)
    
    # ###############################  Time  ########################################
def create_time_box_plot():
    global processed_logs
    global game 
    time_plots.plot_time_boxplot(processed_logs)
    
def create_time_distr_plot():
    global processed_logs
    global game 
    time_plots.plot_time_distribution(processed_logs)
    
    ###############################  Other  ########################################
    # Plot correlation between actions and rewards for each modification
def create_act_corr_plot():
    global processed_logs
    global game 
    comparison_plots.plot_action_reward_correlation(processed_logs, game)

def create_act_time_plot():
    global processed_logs
    global game 
    comparison_plots.plot_reward_vs_time(processed_logs)

In [None]:
##############################################
# Widget Creation, interactivity, layout
##############################################

import ipywidgets as widgets

# Create Widgets
reproducibility_widget = widgets.Checkbox(
    value = True,
    description = "Reproducibility",
    disabled = False
)

seed_widget = widgets.BoundedIntText(
    value=0,
    min = 0,
    placeholder=0,
    description="Seed:",
    disabled=False
)

model_path_widget = widgets.Text(
    value='model.gz',
    placeholder='Model Path',
    description="Model Path:",
    disabled=False
)

log_path_widget = widgets.Text(
    value='log.json',
    placeholder='Log Path',
    description="Log Path:",
    disabled=False
)

game_selector_widget = widgets.Select(
    options = games,
    value = "Pong",
    description = 'Game:',
    disabled = False,
)

modification_selector_widget = widgets.SelectMultiple(
    options = get_game_modifications_for(game),
    value = [],
    description = "Game Modification:",
    disabled = False
)

run_button_widget = widgets.Button(
    description = 'Run',
    disabled = False,
    buttan_style = '',
    tooltip = 'Run',
    icon = 'play'
)

load_button_widget = widgets.Button(
    description = 'Load',
    disabled = False,
    buttan_style = '',
    tooltip = 'Load',
    icon = 'upload'
)

episodes_widget = widgets.BoundedIntText(
    value = 3,
    min = 1,
    placeholder = 3,
    description = 'Episodes: ',
    layout=widgets.Layout(width='140px'),
    disabled = False
)

dp_checkbox = widgets.Checkbox(
    value=False,
    description='Dopamine Pooling',
    disabled=False,
    indent=True
)

obs_mode_widget = widgets.Dropdown(
    options=['dqn', 'obj'],
    value= 'dqn',
    description='Observation:',
    layout=widgets.Layout(width='150px'),
    disabled=False,
)

window_size_widget = widgets.BoundedIntText(
    value=4,
    placeholder=4,
    min = 1,
    description='Window Size:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='140px'),
    disabled=False,
)

repeact_action_probability_widget = widgets.FloatSlider(
    value = 0.25,
    min = 0,
    max = 1.0,
    step = 0.01,
    description = "Repeat Action Probability:",
    disabled = False,
)

frameskip_widget = widgets.BoundedIntText(
    value=4,
    min = 0,
    placeholder=4,
    description='Frameskip:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='120px'),
    disabled=False,
)

mode_dropdown = widgets.Dropdown(
    options=get_game_modes_for(game),
    value=0,
    description='Game Mode:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='140px'),
    disabled=False

)

difficulty_dropdown = widgets.Dropdown(
    options=get_difficulties_for(game),
    value=0,
    description='Difficulty:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='120px'),
    disabled=False,
)


def update_game_selection(game_value:str):
    global game
    mods = get_game_modifications_for(game_value)
    game = game_value
    modification_selector_widget.options = mods
    mode_dropdown.options = get_game_modes_for(game_value)
    difficulty_dropdown.options = get_difficulties_for(game_value)


def set_model_path(model_path_value='model.gz'):
    global agents
    model_path = Path(model_path_value)
    if model_path.is_dir():
        agents = [str(x.resolve()) for x in model_path.glob('*')  if x.is_file() and '.gz' in x.name]
    else:
        agents = [str(model_path.resolve())]

    try: # needed in case path does not have a model
        obs_mode_widget.value = get_obs_mode_from(agents[0])
    except:
        pass 
    

modif_distr_plot_view = widgets.interactive(create_modif_distr_plot)
reward_progression_plot_view = widgets.interactive(create_reward_progression_plot)
smoothed_acc_rewards_plot_view = widgets.interactive(create_smoothed_acc_rewards_plot)
acc_reward_plot_view = widgets.interactive(create_acc_reward_plot)
distr_bar_plot_view = widgets.interactive(create_distr_bar_plot)
distr_heat_plot_view = widgets.interactive(create_distr_heat_plot)
distr_heat_corr_plot_view = widgets.interactive(create_distr_heat_corr_plot)
time_box_plot_view = widgets.interactive(create_time_box_plot)
time_distr_plot_view = widgets.interactive(create_time_distr_plot)
act_corr_plot_view = widgets.interactive(create_act_corr_plot)
act_time_plot_view = widgets.interactive(create_act_time_plot)


def run_log_update():
    global run
    if run:
        run_game()
        modif_distr_plot_view.update()
        reward_progression_plot_view.update()
        smoothed_acc_rewards_plot_view.update()
        acc_reward_plot_view.update()
        distr_bar_plot_view.update()
        distr_heat_plot_view.update()
        distr_heat_corr_plot_view.update()
        time_box_plot_view.update()
        time_distr_plot_view.update()
        act_corr_plot_view.update()
        act_time_plot_view.update()
    
    run = True


# Create Views
reproducibility_view = widgets.interactive(set_seed, reproducibility=reproducibility_widget, seed_value=seed_widget)
model_path_view = widgets.interactive(set_model_path, model_path_value=model_path_widget)
log_path_view = widgets.interactive(set_log_path, log_path_value=log_path_widget)
episodes_view = widgets.interactive(set_episodes, episodes_value=episodes_widget)
modification_selector_view = widgets.interactive(set_modifications, modifications_value=modification_selector_widget)
game_selector_view = widgets.interactive(update_game_selection, game_value=game_selector_widget)
dp_checkbox_view = widgets.interactive(set_dp, dopamine_pooling_value=dp_checkbox)
obs_mode_view = widgets.interactive(set_obs_mode, obs_mode_value=obs_mode_widget)
window_size_view = widgets.interactive(set_window_size, window_size_value=window_size_widget)
frameskip_view = widgets.interactive(set_frameskip, frameskip_value=frameskip_widget)
repeact_action_probability_view = widgets.interactive(set_repeat_action_probability, rap_value=repeact_action_probability_widget)
game_mode_view = widgets.interactive(set_game_mode, game_mode_value=mode_dropdown)
game_difficulty_view = widgets.interactive(set_game_difficulty, difficulty_value=difficulty_dropdown)
run_view = widgets.interactive(run_log_update)
    
reproducibility_widget.observe(lambda x: reproducibility_view.update(), 'value')
seed_widget.observe(lambda x: reproducibility_view.update(), 'value')
model_path_widget.observe(lambda x: model_path_view.update(), 'value')   
episodes_widget.observe(lambda x: episodes_view.update(), 'value')
dp_checkbox.observe(lambda x: dp_checkbox_view.update(), 'value')
obs_mode_widget.observe(lambda x: obs_mode_view.update(), 'value')
window_size_widget.observe(lambda x: window_size_view.update(), 'value')
frameskip_widget.observe(lambda x: frameskip_view.update(), 'value')
repeact_action_probability_widget.observe(lambda x: repeact_action_probability_view.update(), 'value')
log_path_widget.observe(lambda x: log_path_view.update(), 'value')
game_selector_widget.observe(lambda x: game_selector_view.update(), 'value')
mode_dropdown.observe(lambda x: game_mode_view.update(), 'value')
difficulty_dropdown.observe(lambda x: game_difficulty_view.update(), 'value')

run_button_widget.on_click(lambda x: run_view.update())

grid = widgets.GridspecLayout(240,3, height='13000px', layout=widgets.Layout(justify_content='center'))
grid[0:2,0] = reproducibility_view
grid[2,0] = model_path_view
grid[3,0] = log_path_view
grid[4,0] = widgets.HBox( [episodes_view, obs_mode_view], layout=widgets.Layout(display='flex'))
grid[5,0] = dp_checkbox_view
grid[6,0] = widgets.HBox([window_size_view, frameskip_view], layout=widgets.Layout(display='flex')) 
grid[7,0] = repeact_action_probability_view
grid[8:11,0] = game_selector_view
grid[11, 0] = widgets.HBox([game_mode_view, game_difficulty_view], layout=widgets.Layout(display='flex'))
grid[12:15,0] = modification_selector_view
grid[15,0] = widgets.HBox([run_button_widget, load_button_widget], layout=widgets.Layout(display='flex'))
grid[0:16,1:] = run_view
grid[16:38,0:] = modif_distr_plot_view
grid[38:51,0:] = reward_progression_plot_view
grid[51:66,0:] = smoothed_acc_rewards_plot_view
grid[66:81,0:] = acc_reward_plot_view
grid[81:101,0:] = distr_bar_plot_view
grid[101:121,0:] = distr_heat_plot_view
grid[121:141,0:] = distr_heat_corr_plot_view
grid[141:162,0:] = time_box_plot_view
grid[162:182,0:] = act_corr_plot_view
grid[182:202,0:] = act_time_plot_view

In [None]:
display(grid)