# Evaluator with more manual steps

In [2]:
%%capture
#########################
# Requirements and Imports
#########################
from os import path
import ipywidgets as widgets
from pathlib import Path

import torch
import numpy as np
import random

## Reproducibility

In [3]:
reproducibility_field = widgets.Checkbox(
    value = True,
    description = "Reproducibility",
    disabled = False
)
display(reproducibility_field)

Checkbox(value=True, description='Reproducibility')

In [4]:
reproducibility = reproducibility_field.value
if reproducibility:
    seed_field = widgets.IntText(
        value=0,
        placeholder=0,
        description="Seed:",
        disabled=False
    )
    display(seed_field)

IntText(value=0, description='Seed:')

In [6]:
# Set seeds for PRGs

reproducibility = reproducibility_field.value
if reproducibility:
    seed = seed_field.value
    print(seed)
    
    random.seed(seed)
    torch.manual_seed(seed)
    np.random.seed(seed)
    
    # Use deterministic Algorithms
    torch.use_deterministic_algorithms(True)
    torch.utils.deterministic.fill_uninitialized_memory = True

0


## Game Selection

In [7]:
import ocatari
all_games = ocatari.core.AVAILABLE_GAMES
print(all_games)

['Adventure', 'AirRaid', 'Alien', 'Amidar', 'Assault', 'Asterix', 'Asteroids', 'Atlantis', 'BankHeist', 'BattleZone', 'BeamRider', 'Berzerk', 'Bowling', 'Boxing', 'Breakout', 'Carnival', 'Centipede', 'ChopperCommand', 'CrazyClimber', 'DemonAttack', 'DonkeyKong', 'DoubleDunk', 'Enduro', 'FishingDerby', 'Freeway', 'Frogger', 'Frostbite', 'Galaxian', 'Gopher', 'Hero', 'IceHockey', 'Jamesbond', 'Kangaroo', 'KeystoneKapers', 'KingKong', 'Krull', 'KungFuMaster', 'MarioBros', 'MontezumaRevenge', 'MsPacman', 'NameThisGame', 'Pacman', 'Phoenix', 'Pitfall', 'Pong', 'Pooyan', 'PrivateEye', 'Qbert', 'Riverraid', 'RoadRunner', 'Seaquest', 'Skiing', 'SpaceInvaders', 'StarGunner', 'Tennis', 'TimePilot', 'UpNDown', 'Venture', 'VideoPinball', 'YarsRevenge', 'Zaxxon']


In [8]:
hack_games = [x.name.replace('.py', '') for x in Path('../hackatari/games').glob('*.py') if x.is_file() and x.name != '__init__.py']

games_option = []
for g in all_games:
    if g.lower() in hack_games:
        games_option.append(g)


game_selector = widgets.Select(
    options = games_option,
    value = "Pong",
    description = 'Game:',
    disabled = False,
)
game_selector.layout.width = '30%'
display(game_selector)

Select(description='Game:', index=21, layout=Layout(width='30%'), options=('Amidar', 'Assault', 'Asterix', 'At…

In [9]:
selected_game = game_selector.value
selected_game

'Pong'

# Model Selection

In [None]:
from IPython.display import display, clear_output
import os

model_dir_input = widgets.Text(
    value='../models',
    placeholder='Enter relative path...',
    description='Relative Path:',
    disabled=False,
    layout={'width': '30%'}
)

output_area = widgets.Output() #create an output area for feedback

def on_button_click(b):
    global stored_path_object
    with output_area: #capture output into the defined output area
        clear_output() #clear previous output
        relative_path = model_dir_input.value
        
        if os.path.exists(relative_path):
            print(f"Path '{relative_path}' exists. Path is stored.")
            stored_path_object = Path(relative_path) #create path object
        else:
            print(f"Path '{relative_path}' does not exist. Path is set to None.")
            stored_path_object = None
              

button = widgets.Button(description="Verify Model Path")
button.on_click(on_button_click)

input_and_button = widgets.HBox([model_dir_input, button]) # create hbox

# Display the widget
display(input_and_button, output_area)


HBox(children=(Text(value='../models', description='Relative Path:', layout=Layout(width='30%'), placeholder='…

Output()

In [12]:
models = []
for model_pattern in [f'{selected_game}/**/*.gz']: # seems not support f'{selected_game}/**/*.*model' yet
    models.extend(stored_path_object.glob(model_pattern))

if len(models) == 0:
    print(f"No models found for game '{selected_game}'.")
else:
    models.sort()
    model_selector = widgets.SelectMultiple(
        options=models,
        value=[],
        description="Select Models",
        display="flex",
        flew_flow="column",
        align_items="stretch",
        style={"description_width": "initial"},
        disabled=False
    )
    model_selector.layout.width = '30%'
    display(model_selector)


SelectMultiple(description='Select Models', layout=Layout(width='30%'), options=(PosixPath('../models/Pong/0/c…

In [21]:
models_list = list(model_selector.value)
assert models_list, "Please select at least one model"
models_list

[PosixPath('../models/Pong/0/c51_classic_50M.gz'),
 PosixPath('../models/Pong/0/dqn_modern_50M.gz')]

# Setup commands arguments

### Load and select available modfications

In [22]:
import hackatari.core as core

avail_mod_for_game = core._available_modifications(selected_game).split('*')[1:]
modifies_list = [mod.split(':') for mod in avail_mod_for_game]

modifies_selector = widgets.VBox([
        widgets.HBox([
            widgets.Label(value=mod_name, layout={'width': '10%'}),
            widgets.Label(value=mod_info, layout={'width': '50%'}),
            widgets.Dropdown(options=[True, False], value=False, description='Value:',layout={'width': '15%'}, justify_content = 'flex-end')
        ], justify_content = 'space-between') for mod_name, mod_info in modifies_list
    ], layout={'width': '75%'})

display(modifies_selector)

VBox(children=(HBox(children=(Label(value=' ball_x_prev', layout=Layout(width='10%')), Label(value="\n\tint([x…

In [23]:
mod_args = ' '.join([f'{mod.children[0].value.strip()}' for mod in modifies_selector.children if mod.children[2].value])
if mod_args:
    mod_args = f'-m {mod_args}'
    print(mod_args)

-m down_drift left_drift


### Setup other arguments for executing commands

In [24]:
from ocatari.core import OCAtari
# parameters: difficulty and game_mode
g = OCAtari(selected_game)

game_text = widgets.Label  (
    value=f"For the selected game '{selected_game}':",
    disabled=False
)

mode_dropdown = widgets.Dropdown(
    options=g.env.env.ale.getAvailableModes(),
    value=0,
    description='game mode:',
    disabled=False,
    layout = {'width':'10%'}
)

difficulty_dropdown = widgets.Dropdown(
    options=g.env.env.ale.getAvailableDifficulties(),
    value=0,
    description='difficulty:',
    disabled=False,
    layout = {'width':'10%'}
)

g.close()


dp_dropdown = widgets.Dropdown(
    options=[True, False],
    value=False,
    description='dopamine_pooling:',
    style={'description_width': 'initial'},
    #layout=widgets.Layout(width='300px'),
    disabled=False,
    layout = {'width':'15%'}
)

window_inttext = widgets.IntText(
    value=4,
    description='window_size:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='120px'),
    disabled=False,
    #layout = {'width':'10%'}
)

frameskip_inttext = widgets.IntText(
    value=4,
    description='frameskip:',
    disabled=False,
    layout=widgets.Layout(width='120px'),
    #layout = {'width':'10%'}
)

has_dqn_model = any([ ("dqn" in model.as_posix() or "c51" in model.as_posix()) for model in models_list])

obs_dropdown = widgets.Dropdown(
    options=['dqn'] if has_dqn_model else ['ori', 'obj'],
    value='dqn',
    description='obs_mode:',
    disabled=False,
    layout = {'width':'10%'}
)

episodes_inttext = widgets.IntText(
    value=10,
    description='num_episodes:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='150px'),
    disabled=False,
    #layout = {'width':'10%'}
)

parameters_selector =  widgets.VBox( [game_text, 
                                     widgets.HBox([mode_dropdown, difficulty_dropdown, dp_dropdown, window_inttext, frameskip_inttext, obs_dropdown, episodes_inttext],
                                                  justify_content = 'space-between', layout = {'width':'100%'})
])

# Display the widgets
display(parameters_selector)



VBox(children=(Label(value="For the selected game 'Pong':"), HBox(children=(Dropdown(description='game mode:',…

In [25]:
stored_parameter = ""
stored_parameter += f" -mo {mode_dropdown.value}"
stored_parameter += f" -d {difficulty_dropdown.value}"
stored_parameter += " -dp" if dp_dropdown.value else ""
stored_parameter += f" -w {window_inttext.value}"
stored_parameter += f" -f {frameskip_inttext.value}"
stored_parameter += f" -obs {obs_dropdown.value}" if obs_dropdown.value else ""

print(stored_parameter)

eval_parameter = f" -e {episodes_inttext.value}"

 -mo 0 -d 0 -w 4 -f 4 -obs dqn


# Run Game

In [26]:
## prepare for the command
eval_script = Path(r"scripts/eval.py")
run_script = Path(r"scripts/run.py")

notebook_dir = path.abspath("")
project_root = path.dirname(notebook_dir)
print(f"Project Root: {project_root}")

# use posix_paths for compatibile with different OS
eval_path = (Path(project_root) / eval_script).as_posix()
run_path = (Path(project_root) / run_script).as_posix()

Project Root: /mnt/c/Users/peng_/workspace/HackAtari


### Execuate the command with eval.py (multiple modes)

In [27]:
# "eval.py" can accept a list of models
models_path = ' '.join( f'{model.as_posix()}' for model in models_list )


eval_command = f"python {eval_path} -g {selected_game} -a {models_path} {mod_args} {stored_parameter} {eval_parameter}"

print("eval command: " + eval_command)

!{eval_command}

eval command: python /mnt/c/Users/peng_/workspace/HackAtari/scripts/eval.py -g Pong -a ../models/Pong/0/c51_classic_50M.gz ../models/Pong/0/dqn_modern_50M.gz -m down_drift left_drift  -mo 0 -d 0 -w 4 -f 4 -obs dqn  -e 10
A.L.E: Arcade Learning Environment (version 0.10.2+c9d4b19)
[Powered by Stella]
Runing for episodes: 10
Runing for episodes: 10
Loaded agent from ../models/Pong/0/c51_classic_50M.gz
Episode 1: Reward = -20.0, Time = 1.90 seconds with 1490 steps and actions: {0: 238, 1: 407, 2: 419, 3: 157, 4: 40, 5: 229}
Episode 2: Reward = -21.0, Time = 1.22 seconds with 1124 steps and actions: {0: 206, 1: 596, 2: 120, 3: 66, 4: 31, 5: 105}
Episode 3: Reward = -20.0, Time = 1.22 seconds with 1132 steps and actions: {0: 210, 1: 533, 2: 181, 3: 82, 4: 20, 5: 106}
Episode 4: Reward = -20.0, Time = 1.21 seconds with 1170 steps and actions: {0: 230, 1: 526, 2: 185, 3: 91, 4: 22, 5: 116}
Episode 5: Reward = -20.0, Time = 1.25 seconds with 1208 steps and actions: {0: 211, 1: 555, 2: 214, 3: 

### Execute the command with run.py (accept only one model and visualize each espiode)

In [35]:
# "run.py" can only accept one model
assert len(models_list) > 0, "Please select at least one model"

run_model_dropdown = widgets.Dropdown(
    options=models_list,
    value=models_list[0],
    description="Select one model to play",
    style={'description_width': 'initial'},
    disabled=False,
    layout = {'width':'30%'}
)

display(run_model_dropdown)

Dropdown(description='Select one model to play', layout=Layout(width='30%'), options=(PosixPath('../models/Pon…

In [36]:

model_path = f'{run_model_dropdown.value.as_posix()}'

run_command = f"python {run_path} -g {selected_game} -a {model_path} {mod_args} {stored_parameter}"
print("run command: " + run_command)

!{run_command}

run command: python /mnt/c/Users/peng_/workspace/HackAtari/scripts/run.py -g Pong -a ../models/Pong/0/c51_classic_50M.gz -m down_drift left_drift  -mo 0 -d 0 -w 4 -f 4 -obs dqn
A.L.E: Arcade Learning Environment (version 0.10.2+c9d4b19)
[Powered by Stella]
Loaded agent from ../models/Pong/0/c51_classic_50M.gz
