diff --git a/doc/examples/Makefile b/doc/examples/Makefile deleted file mode 100644 index 1ac396d17..000000000 --- a/doc/examples/Makefile +++ /dev/null @@ -1,22 +0,0 @@ -# We do not automatically build the recording agent, which requires SDL. To build it, run -# -# > make recordingAgent -all: sharedLibraryAgent rlglueAgent fifoAgent - -sharedLibraryAgent: - make -f Makefile.sharedlibrary - -rlglueAgent: - make -f Makefile.rlglue - -fifoAgent: - make -f Makefile.fifo - -recordingAgent: - make -f Makefile.recording - -clean: - make -f Makefile.rlglue clean - make -f Makefile.sharedlibrary clean - make -f Makefile.fifo clean - make -f Makefile.recording clean diff --git a/doc/examples/Makefile.fifo b/doc/examples/Makefile.fifo deleted file mode 100644 index cf5006c8a..000000000 --- a/doc/examples/Makefile.fifo +++ /dev/null @@ -1,33 +0,0 @@ -# Modified from the sharedLibraryInterfaceExample's makefile. - -USE_SDL := 0 - -# This will likely need to be changed to suit your installation. -ALE := ../.. - -FLAGS := -I$(ALE)/src -I$(ALE)/src/controllers -I$(ALE)/src/os_dependent -I$(ALE)/src/environment -I$(ALE)/src/external -L$(ALE) -CXX := g++ -FILE := fifoInterfaceExample -LDFLAGS := -lale -lz - -UNAME_S := $(shell uname -s) -ifeq ($(UNAME_S),Linux) - FLAGS += -Wl,-rpath=$(ALE) -endif -ifeq ($(UNAME_S),Darwin) - FLAGS += -framework Cocoa -endif - -ifeq ($(strip $(USE_SDL)), 1) - DEFINES += -D__USE_SDL -DSOUND_SUPPORT - FLAGS += $(shell sdl-config --cflags) - LDFLAGS += $(shell sdl-config --libs) -endif - -all: fifoInterfaceExample - -fifoInterfaceExample: - $(CXX) $(DEFINES) $(FLAGS) $(FILE).cpp $(LDFLAGS) -o $(FILE) - -clean: - rm -rf fifoInterfaceExample *.o diff --git a/doc/examples/Makefile.recording b/doc/examples/Makefile.recording deleted file mode 100644 index 59d72aab8..000000000 --- a/doc/examples/Makefile.recording +++ /dev/null @@ -1,37 +0,0 @@ -# Modified from the sharedLibraryInterface's makefile. -# Video recording requires SDL. If you do not have SDL installed, you will not be able to -# compile this example. -# -# > make recordingAgent - -USE_SDL := 1 - -# This will likely need to be changed to suit your installation. -ALE := ../.. - -FLAGS := -I$(ALE)/src -I$(ALE)/src/controllers -I$(ALE)/src/os_dependent -I$(ALE)/src/environment -I$(ALE)/src/external -L$(ALE) -CXX := g++ -FILE := videoRecordingExample -LDFLAGS := -lale -lz - -UNAME_S := $(shell uname -s) -ifeq ($(UNAME_S),Linux) - FLAGS += -Wl,-rpath=$(ALE) -endif -ifeq ($(UNAME_S),Darwin) - FLAGS += -framework Cocoa -endif - -ifeq ($(strip $(USE_SDL)), 1) - DEFINES += -D__USE_SDL -DSOUND_SUPPORT - FLAGS += $(shell sdl-config --cflags) - LDFLAGS += $(shell sdl-config --libs) -endif - -all: videoRecordingExample - -videoRecordingExample: - $(CXX) $(DEFINES) $(FLAGS) $(FILE).cpp $(LDFLAGS) -o $(FILE) - -clean: - rm -rf videoRecordingExample *.o diff --git a/doc/examples/Makefile.rlglue b/doc/examples/Makefile.rlglue deleted file mode 100644 index 6a55007ed..000000000 --- a/doc/examples/Makefile.rlglue +++ /dev/null @@ -1,19 +0,0 @@ -# Modified from the C/C++ RL-Glue codec RLGlueAgent's makefile. - -AGENTLDFLAGS := -lrlutils -lrlagent -lrlgluenetdev -EXPLDFLAGS := -lrlutils -lrlexperiment -lrlgluenetdev -CFLAGS := -Wall -pedantic - -all: RLGlueAgent RLGlueExperiment - -RLGlueAgent: RLGlueAgent.o - $(CC) RLGlueAgent.o $(AGENTLDFLAGS) -o RLGlueAgent -RLGlueExperiment: RLGlueExperiment.o - $(CC) RLGlueExperiment.o $(EXPLDFLAGS) -o RLGlueExperiment - -%.o: %.c - $(CC) $(CFLAGS) -c $< - -clean: - rm -rf RLGlueAgent RLGlueExperiment *.o - diff --git a/doc/examples/Makefile.sharedLibraryWithModes b/doc/examples/Makefile.sharedLibraryWithModes deleted file mode 100644 index 12792743f..000000000 --- a/doc/examples/Makefile.sharedLibraryWithModes +++ /dev/null @@ -1,31 +0,0 @@ -USE_SDL := 0 - -# This will likely need to be changed to suit your installation. -ALE := ../.. - -FLAGS := -I$(ALE)/src -I$(ALE)/src/controllers -I$(ALE)/src/os_dependent -I$(ALE)/src/environment -I$(ALE)/src/external -L$(ALE) -CXX := g++ -FILE := ale-sharedLibraryInterfaceWithModesExample -LDFLAGS := -lale -lz - -UNAME_S := $(shell uname -s) -ifeq ($(UNAME_S),Linux) - FLAGS += -Wl,-rpath=$(ALE) -endif -ifeq ($(UNAME_S),Darwin) - FLAGS += -framework Cocoa -endif - -ifeq ($(strip $(USE_SDL)), 1) - DEFINES += -D__USE_SDL -DSOUND_SUPPORT - FLAGS += $(shell sdl-config --cflags) - LDFLAGS += $(shell sdl-config --libs) -endif - -all: sharedLibraryInterfaceWithModesExample - -sharedLibraryInterfaceWithModesExample: - $(CXX) $(DEFINES) $(FLAGS) $(FILE).cpp $(LDFLAGS) -o $(FILE) - -clean: - rm -rf sharedLibraryInterfaceWithModesExample *.o diff --git a/doc/examples/Makefile.sharedlibrary b/doc/examples/Makefile.sharedlibrary deleted file mode 100644 index 3efed9405..000000000 --- a/doc/examples/Makefile.sharedlibrary +++ /dev/null @@ -1,31 +0,0 @@ -USE_SDL := 0 - -# This will likely need to be changed to suit your installation. -ALE := ../.. - -FLAGS := -I$(ALE)/src -I$(ALE)/src/controllers -I$(ALE)/src/os_dependent -I$(ALE)/src/environment -I$(ALE)/src/external -L$(ALE) -CXX := g++ -FILE := sharedLibraryInterfaceExample -LDFLAGS := -lale -lz - -UNAME_S := $(shell uname -s) -ifeq ($(UNAME_S),Linux) - FLAGS += -Wl,-rpath=$(ALE) -endif -ifeq ($(UNAME_S),Darwin) - FLAGS += -framework Cocoa -endif - -ifeq ($(strip $(USE_SDL)), 1) - DEFINES += -D__USE_SDL -DSOUND_SUPPORT - FLAGS += $(shell sdl-config --cflags) - LDFLAGS += $(shell sdl-config --libs) -endif - -all: sharedLibraryInterfaceExample - -sharedLibraryInterfaceExample: - $(CXX) $(DEFINES) $(FLAGS) $(FILE).cpp $(LDFLAGS) -o $(FILE) - -clean: - rm -rf sharedLibraryInterfaceExample *.o diff --git a/doc/examples/RLGlueAgent.c b/doc/examples/RLGlueAgent.c deleted file mode 100644 index f637df092..000000000 --- a/doc/examples/RLGlueAgent.c +++ /dev/null @@ -1,125 +0,0 @@ -/* -* Copyright (C) 2008, Brian Tanner - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - */ - -/** This example was *minimally* adapted from the SkeletonAgent code from - * Brian Tanner. The chief modification is the two random actions, instead - * of one. The following is required to execute this code: - * - * - RL-Glue core - * - RL-Glue C/C++ codec - */ - -#include /* for printf */ -#include /* for strcmp */ -#include /*for time()*/ -#include /* agent_ function prototypes and RL-Glue types */ -#include /* helpful functions for allocating structs and cleaning them up */ - - -action_t this_action; -action_t last_action; - -observation_t *last_observation=0; - -int randInRange(int max){ - double r, x; - r = ((double)rand() / ((double)(RAND_MAX)+(double)(1))); - x = (r * (max+1)); - return (int)x; -} - -void agent_init(const char* task_spec) -{ - /*Seed the random number generator*/ - - srand(time(0)); - /*Here is where you might allocate storage for parameters (value function or policy, last action, last observation, etc)*/ - - /*Here you would parse the task spec if you felt like it*/ - - /*Allocate memory for a one-dimensional integer action using utility functions from RLStruct_util*/ - allocateRLStruct(&this_action,2,0,0); - last_observation=allocateRLStructPointer(0,0,0); - - /* That is equivalent to: - this_action.numInts = 1; - this_action.intArray = (int*)calloc(1,sizeof(int)); - this_action.numDoubles = 0; - this_action.doubleArray = 0; - this_action.numChars = 0; - this_action.charArray = 0; - */ -} - -const action_t *agent_start(const observation_t *this_observation) { - /* This agent always returns a random number, either 0 or 1 for its action */ - int theIntAction=randInRange(1); - this_action.intArray[0]=theIntAction; - - /* In a real action you might want to store the last observation and last action*/ - replaceRLStruct(&this_action, &last_action); - replaceRLStruct(this_observation, last_observation); - - return &this_action; -} - -const action_t *agent_step(double reward, const observation_t *this_observation) { - int row, col; - - /* This agent returns 0 or 1 randomly for its action */ - this_action.intArray[0] = randInRange(17); - this_action.intArray[1] = randInRange(17) + 18; - - /* Print out the RAM */ - for (row = 0; row < 8; row++) { - for (col = 0; col < 16; col++) - fprintf (stderr, "%2x ", this_observation->intArray[col + row*16]); - fprintf (stderr, "\n"); - } - - /* Print screen (make your terminal font very small to see this) */ - /* for (row = 0; row < 210; row++) { - for (col = 0; col < 160; col++) - fprintf (stderr, "%2x ", this_observation->intArray[128+col + row*160]); - fprintf (stderr, "\n"); - } */ - - fprintf (stderr, "\n"); - - /* In a real action you might want to store the last observation and last action*/ - replaceRLStruct(&this_action, &last_action); - replaceRLStruct(this_observation, last_observation); - - return &this_action; -} - -void agent_end(double reward) { - clearRLStruct(&last_action); - clearRLStruct(last_observation); -} - -void agent_cleanup() { - clearRLStruct(&this_action); - clearRLStruct(&last_action); - freeRLStructPointer(last_observation); -} - -const char* agent_message(const char* inMessage) { - if(strcmp(inMessage,"what is your name?")==0) - return "my name is skeleton_agent!"; - - return "I don't know how to respond to your message"; -} diff --git a/doc/examples/RLGlueExperiment.c b/doc/examples/RLGlueExperiment.c deleted file mode 100644 index c94d29ef2..000000000 --- a/doc/examples/RLGlueExperiment.c +++ /dev/null @@ -1,85 +0,0 @@ -/* -* Copyright (C) 2008, Brian Tanner - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - */ - -/** This example was *minimally* adapted from the SkeletonAgent code from - * Brian Tanner. The chief modification is the two random actions, instead - * of one. The following is required to execute this code: - * - * - RL-Glue core - * - RL-Glue C/C++ codec - */ - -#include /* for printf */ -#include /* RL_ function prototypes and RL-Glue types */ - -int whichEpisode=0; - -// This uses RL-Glue to run a single episode. -void runEpisode(int stepLimit) { - int terminal=RL_episode(stepLimit); - printf("Episode %d\t %d steps \t%f total reward\t %d natural end \n",whichEpisode,RL_num_steps(),RL_return(), terminal); - whichEpisode++; -} - -int main(int argc, char *argv[]) { - const char* task_spec; - const reward_observation_action_terminal_t *stepResponse; - const observation_action_t *startResponse; - - printf("\n\nExperiment starting up!\n"); - - - task_spec=RL_init(); - printf("RL_init called, the environment sent task spec: %s\n",task_spec); - - // RL_env_message and RL_agent_message may be used to communicate with the environment - // and agent, respectively. See RL-Glue documentation for details. - // const char* responseMessage; - // responseMessage=RL_agent_message("what is your name?"); - - printf("\n\n----------Running a few episodes----------\n"); - // Use the RL-Glue-provided RL_episode to run a few episodes of ALE. - // 0 means no limit at all. - runEpisode(10000); - runEpisode(0); - runEpisode(0); - runEpisode(0); - runEpisode(0); - RL_cleanup(); - - printf("\n\n----------Stepping through an episode----------\n"); - // The following demonstrates how to step through an episode. - task_spec=RL_init(); - - // Start the episode - startResponse=RL_start(); - printf("First action was: %d\n", startResponse->action->intArray[0]); - - // Run one step - stepResponse=RL_step(); - - // Run until end of episode - while(stepResponse->terminal != 1) { - stepResponse=RL_step(); - } - - // Demonstrates other RL-Glue functionality. - printf("It ran for %d steps, total reward was: %f\n",RL_num_steps(), RL_return()); - RL_cleanup(); - - - return 0; -} diff --git a/doc/examples/fifoInterfaceExample.cpp b/doc/examples/fifoInterfaceExample.cpp deleted file mode 100644 index 57c956af5..000000000 --- a/doc/examples/fifoInterfaceExample.cpp +++ /dev/null @@ -1,135 +0,0 @@ -/* ***************************************************************************** - * A.L.E (Arcade Learning Environment) - * Copyright (c) 2009-2013 by Yavar Naddaf, Joel Veness, Marc G. Bellemare, - * Matthew Hausknecht, and the Reinforcement Learning and Artificial Intelligence - * Laboratory - * Released under the GNU General Public License; see License.txt for details. - * - * Based on: Stella -- "An Atari 2600 VCS Emulator" - * Copyright (c) 1995-2007 by Bradford W. Mott and the Stella team - * - * ***************************************************************************** - * fifoExample.cpp - * - * Sample code for running a FIFO agent. This interface is provided for - * broader language compatibility; we recommend using the shared interface for - * C++ agents. - **************************************************************************** */ - -#include -#include -#include -#include -#include -#include - -// From RL-Glue agent example. -int randInRange(int max) { - double r, x; - r = ((double)rand() / ((double)(RAND_MAX) + (double)(1))); - x = (r * (max + 1)); - return (int)x; -} - -// Print the RAM string -void printRAM(char* str) { - // First we parse the ram (for pedagogical purposes) - std::vector ram; - - for (int offset = 0; offset < 128; offset++) { - // Crude but effective - char buffer[16]; - buffer[0] = str[offset * 2]; - buffer[1] = str[offset * 2 + 1]; - buffer[2] = 0; - - int value = strtol(buffer, NULL, 16); - - ram.push_back(value); - } - - // Now, if so desired, regurgitate the RAM. - const bool printRAM = false; - - if (printRAM) - for (int row = 0; row < 8; row++) { - for (int col = 0; col < 16; col++) - fprintf(stdout, "%2x ", ram[col + row * 16]); - fprintf(stdout, "\n"); - } -} - -// Read in RAM and RL data. -bool readData(FILE* alePipe) { - char buffer[65535]; - fgets(buffer, sizeof(buffer), alePipe); - - // Find the first colon, corresponding to the end of the RAM data - char* endRAM = strchr(buffer, ':'); - printRAM(buffer); - - // Now parse the terminal bit - bool terminal = (endRAM[1] == '1'); - - // Also output reward whenever nonzero - int reward = strtol(&endRAM[3], NULL, 10); - if (reward != 0) - std::cout << "Reward: " << reward << std::endl; - - return terminal; -} - -void agentMain(FILE* alePipe) { - // Read in screen width and height - char buffer[1024]; - fgets(buffer, sizeof(buffer), alePipe); - - std::cout << "ALE says: " << buffer << std::endl; - - // Request RAM & RL data from ALE - fputs("0,1,0,1\n", alePipe); - - int frameNumber = 0; - - // Now loop until the episode terminates. - while (true) { - // Read in data - bool terminal = readData(alePipe); - - frameNumber++; - - if (terminal) - break; - - // Write back a random action. - fprintf(alePipe, "%d,%d\n", randInRange(17), 18); - } - - std::cout << "Episode lasted " << frameNumber << " frames" << std::endl; -} - -int main(int argc, char** argv) { - if (argc < 2) { - std::cerr << "Usage: " << argv[0] << " rom_file" << std::endl; - std::cerr - << "Note: This example must be run from the same directory as the ALE " - "executable ('ale')." - << std::endl; - return 1; - } - - std::string romFile(argv[1]); - - // We actually fork two processes, ALE itself and an agent - std::string aleCmd("./ale -game_controller fifo "); - aleCmd += romFile; - - // Spawn the ALE in read/write mode - // We could also use named pipes but that is a bit messier - FILE* alePipe = popen(aleCmd.c_str(), "r+"); - - // Now run the agent & communicate with the ale - agentMain(alePipe); - - pclose(alePipe); -} diff --git a/doc/examples/python_example.py b/doc/examples/python_example.py deleted file mode 100755 index ca31663b6..000000000 --- a/doc/examples/python_example.py +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python -# python_example.py -# Author: Ben Goodrich -# -# This is a direct port to python of the shared library example from -# ALE provided in doc/examples/sharedLibraryInterfaceExample.cpp -import sys -from random import randrange -from ale_python_interface import ALEInterface - -if len(sys.argv) < 2: - print('Usage: %s rom_file' % sys.argv[0]) - sys.exit() - -ale = ALEInterface() - -# Get & Set the desired settings -ale.setInt(b'random_seed', 123) - -# Set USE_SDL to true to display the screen. ALE must be compilied -# with SDL enabled for this to work. On OSX, pygame init is used to -# proxy-call SDL_main. -USE_SDL = False -if USE_SDL: - if sys.platform == 'darwin': - import pygame - pygame.init() - ale.setBool('sound', False) # Sound doesn't work on OSX - elif sys.platform.startswith('linux'): - ale.setBool('sound', True) - ale.setBool('display_screen', True) - -# Load the ROM file -rom_file = str.encode(sys.argv[1]) -ale.loadROM(rom_file) - -# Get the list of legal actions -legal_actions = ale.getLegalActionSet() - -# Play 10 episodes -for episode in range(10): - total_reward = 0 - while not ale.game_over(): - a = legal_actions[randrange(len(legal_actions))] - # Apply an action and get the resulting reward - reward = ale.act(a); - total_reward += reward - print('Episode %d ended with score: %d' % (episode, total_reward)) - ale.reset_game() diff --git a/doc/examples/python_example_with_modes.py b/doc/examples/python_example_with_modes.py deleted file mode 100755 index af4151abf..000000000 --- a/doc/examples/python_example_with_modes.py +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/env python -# python_example_with_modes.py -# Author: Ben Goodrich & Marlos C. Machado -# -# This is a direct port to python of the shared library example from -# ALE provided in doc/examples/sharedLibraryInterfaceWithModesExample.cpp -import sys -from random import randrange -from ale_python_interface import ALEInterface - -if len(sys.argv) < 2: - print 'Usage:', sys.argv[0], 'rom_file' - sys.exit() - -ale = ALEInterface() - -# Get & Set the desired settings -ale.setInt('random_seed', 123) -# The default is already 0.25, this is just an example -ale.setFloat("repeat_action_probability", 0.25); - -# Set USE_SDL to true to display the screen. ALE must be compilied -# with SDL enabled for this to work. On OSX, pygame init is used to -# proxy-call SDL_main. -USE_SDL = True -if USE_SDL: - if sys.platform == 'darwin': - import pygame - pygame.init() - ale.setBool('sound', False) # Sound doesn't work on OSX - elif sys.platform.startswith('linux'): - ale.setBool('sound', True) - ale.setBool('display_screen', True) - -# Load the ROM file -ale.loadROM(sys.argv[1]) - -#Get the list of available modes and difficulties -avail_modes = ale.getAvailableModes() -avail_diff = ale.getAvailableDifficulties() - -print 'Number of available modes: ', len(avail_modes) -print 'Number of available difficulties: ', len(avail_diff) - -# Get the list of legal actions -legal_actions = ale.getLegalActionSet() - -# Play one episode in each mode and in each difficulty -for mode in avail_modes: - for diff in avail_diff: - - ale.setDifficulty(diff) - ale.setMode(mode) - ale.reset_game() - print 'Mode {0} difficulty {1}:'.format(mode, diff) - - total_reward = 0 - while not ale.game_over(): - a = legal_actions[randrange(len(legal_actions))] - # Apply an action and get the resulting reward - reward = ale.act(a); - total_reward += reward - - print 'Episode ended with score: ', total_reward diff --git a/doc/java-agent/code/build.xml b/doc/java-agent/code/build.xml deleted file mode 100644 index 1ee8e93f5..000000000 --- a/doc/java-agent/code/build.xml +++ /dev/null @@ -1,74 +0,0 @@ - - - - - - - - - - - Builds, tests, and runs the project ALEJavaAgent. - - - diff --git a/doc/java-agent/code/manifest.mf b/doc/java-agent/code/manifest.mf deleted file mode 100644 index 328e8e5bc..000000000 --- a/doc/java-agent/code/manifest.mf +++ /dev/null @@ -1,3 +0,0 @@ -Manifest-Version: 1.0 -X-COMMENT: Main-Class will be added automatically by build - diff --git a/doc/java-agent/code/nbproject/build-impl.xml b/doc/java-agent/code/nbproject/build-impl.xml deleted file mode 100644 index 81ede3065..000000000 --- a/doc/java-agent/code/nbproject/build-impl.xml +++ /dev/null @@ -1,894 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Must set src.dir - Must set test.src.dir - Must set build.dir - Must set dist.dir - Must set build.classes.dir - Must set dist.javadoc.dir - Must set build.test.classes.dir - Must set build.test.results.dir - Must set build.classes.excludes - Must set dist.jar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Must set javac.includes - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Must select some files in the IDE or set javac.includes - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - To run this application from the command line without Ant, try: - - - - - - - java -cp "${run.classpath.with.dist.jar}" ${main.class} - - - - - - - - - - - - To run this application from the command line without Ant, try: - - java -jar "${dist.jar.resolved}" - - - - - - - - To run this application from the command line without Ant, try: - - java -jar "${dist.jar.resolved}" - - - - - - - - - - - - - - - - - - - Must select one file in the IDE or set run.class - - - - Must select one file in the IDE or set run.class - - - - - - - - - - - - - - - - - - - - - - - Must select one file in the IDE or set debug.class - - - - - Must select one file in the IDE or set debug.class - - - - - Must set fix.includes - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Must select some files in the IDE or set javac.includes - - - - - - - - - - - - - - - - - - - - Some tests failed; see details above. - - - - - - - - - Must select some files in the IDE or set test.includes - - - - Some tests failed; see details above. - - - - - Must select one file in the IDE or set test.class - - - - - - - - - - - - - - - - - - - - - - - - - - - Must select one file in the IDE or set applet.url - - - - - - - - - Must select one file in the IDE or set applet.url - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/doc/java-agent/code/nbproject/configs/Human.properties b/doc/java-agent/code/nbproject/configs/Human.properties deleted file mode 100644 index e69de29bb..000000000 diff --git a/doc/java-agent/code/nbproject/configs/RL.properties b/doc/java-agent/code/nbproject/configs/RL.properties deleted file mode 100644 index 61c339ba8..000000000 --- a/doc/java-agent/code/nbproject/configs/RL.properties +++ /dev/null @@ -1 +0,0 @@ -main.class=ale.agents.RLAgent diff --git a/doc/java-agent/code/nbproject/genfiles.properties b/doc/java-agent/code/nbproject/genfiles.properties deleted file mode 100644 index f8c3cd57b..000000000 --- a/doc/java-agent/code/nbproject/genfiles.properties +++ /dev/null @@ -1,11 +0,0 @@ -build.xml.data.CRC32=d952cf82 -build.xml.script.CRC32=a7335e3d -build.xml.stylesheet.CRC32=28e38971@1.38.3.45 -# This file is used by a NetBeans-based IDE to track changes in generated files such as build-impl.xml. -# Do not edit this file. You may delete it but then the IDE will never regenerate such files for you. -nbproject/build-impl.xml.data.CRC32=d952cf82 -nbproject/build-impl.xml.script.CRC32=38f45e83 -nbproject/build-impl.xml.stylesheet.CRC32=229523de@1.38.3.45 -nbproject/profiler-build-impl.xml.data.CRC32=d952cf82 -nbproject/profiler-build-impl.xml.script.CRC32=abda56ed -nbproject/profiler-build-impl.xml.stylesheet.CRC32=f10cf54c@1.11.1 diff --git a/doc/java-agent/code/nbproject/project.properties b/doc/java-agent/code/nbproject/project.properties deleted file mode 100644 index e704b6a58..000000000 --- a/doc/java-agent/code/nbproject/project.properties +++ /dev/null @@ -1,73 +0,0 @@ -annotation.processing.enabled=true -annotation.processing.enabled.in.editor=false -annotation.processing.run.all.processors=true -annotation.processing.source.output=${build.generated.sources.dir}/ap-source-output -application.title=ALEJavaAgent -application.vendor=Marc G. Bellemare -build.classes.dir=${build.dir}/classes -build.classes.excludes=**/*.java,**/*.form -# This directory is removed when the project is cleaned: -build.dir=build -build.generated.dir=${build.dir}/generated -build.generated.sources.dir=${build.dir}/generated-sources -# Only compile against the classpath explicitly listed here: -build.sysclasspath=ignore -build.test.classes.dir=${build.dir}/test/classes -build.test.results.dir=${build.dir}/test/results -# Uncomment to specify the preferred debugger connection transport: -#debug.transport=dt_socket -debug.classpath=\ - ${run.classpath} -debug.test.classpath=\ - ${run.test.classpath} -# This directory is removed when the project is cleaned: -dist.dir=dist -dist.jar=${dist.dir}/ALEJavaAgent.jar -dist.javadoc.dir=${dist.dir}/javadoc -endorsed.classpath= -excludes= -includes=** -jar.compress=false -javac.classpath= -# Space-separated list of extra javac options -javac.compilerargs= -javac.deprecation=false -javac.processorpath=\ - ${javac.classpath} -javac.source=1.5 -javac.target=1.5 -javac.test.classpath=\ - ${javac.classpath}:\ - ${build.classes.dir}:\ - ${libs.junit.classpath}:\ - ${libs.junit_4.classpath} -javac.test.processorpath=\ - ${javac.test.classpath} -javadoc.additionalparam= -javadoc.author=false -javadoc.encoding=${source.encoding} -javadoc.noindex=false -javadoc.nonavbar=false -javadoc.notree=false -javadoc.private=false -javadoc.splitindex=true -javadoc.use=true -javadoc.version=false -javadoc.windowtitle= -main.class=ale.agents.HumanAgent -manifest.file=manifest.mf -meta.inf.dir=${src.dir}/META-INF -platform.active=default_platform -run.classpath=\ - ${javac.classpath}:\ - ${build.classes.dir} -# Space-separated list of JVM arguments used when running the project -# (you may also define separate properties like run-sys-prop.name=value instead of -Dname=value -# or test-sys-prop.name=value to set system properties for unit tests): -run.jvmargs= -run.test.classpath=\ - ${javac.test.classpath}:\ - ${build.test.classes.dir} -source.encoding=UTF-8 -src.dir=src -test.src.dir=test diff --git a/doc/java-agent/code/nbproject/project.xml b/doc/java-agent/code/nbproject/project.xml deleted file mode 100644 index d6e51be66..000000000 --- a/doc/java-agent/code/nbproject/project.xml +++ /dev/null @@ -1,15 +0,0 @@ - - - org.netbeans.modules.java.j2seproject - - - ALEJavaAgent - - - - - - - - - diff --git a/doc/java-agent/code/run_agent.perl b/doc/java-agent/code/run_agent.perl deleted file mode 100644 index 4c25188ce..000000000 --- a/doc/java-agent/code/run_agent.perl +++ /dev/null @@ -1,114 +0,0 @@ -#!/usr/bin/perl -use IPC::Open2; - -# ----------------------------------- -# test configuration -# ----------------------------------- - -# Original script by Joel Veness, modified by Marc G. Bellemare. Runs a -# Java agent for a given number of episodes. If the requested number of -# episodes is 0, the script runs forever. The default number of episodes -# is 1. - -$agentJarFile="dist/ALEJavaAgent.jar"; - -# disables buffered IO -$| = 1; - -$OS = $^O; -$numArgs = $#ARGV + 1; -die "Usage: $0 [-export_frames]" if ($numArgs < 1); - -$debug_mode = 1; # 0 off, 1 some information, 2 verbose -$rom = $ARGV[0]; -if ($numArgs > 1) { - $arg=$ARGV[1]; -} -$num_episodes = 1; - -$env_cmd = "./ale -game_controller fifo roms/$rom.bin"; -$agent_cmd = "java -Xmx1024M -jar $agentJarFile " . $arg; - -if ($OS eq "linux" or $OS eq "darwin") { - # platform specific code -} - - -# ----------------------------------- -# main loop -# ----------------------------------- - - local (*AGENT_READ, *AGENT_WRITE, *AGENT_ERR); - local (*ENV_READ, *ENV_WRITE, *ENV_ERR); - - $pid_env = open2(\*ENV_READ, \*ENV_WRITE, $env_cmd); - $pid_agent = open2(\*AGENT_READ, \*AGENT_WRITE, $agent_cmd); - - print "Started Agent with PID: $pid_agent\n" if $debug_mode > 1; - print "Started Environment with PID: $pid_env\n" if $debug_mode > 1; - - - -$episode = 1; - -$total_reward = 0; -$step = 1; -$episode_on = 0; -$ep_start_time = time; - -ALL_EPISODES: { - do { - # read from environment - die "environment terminated unexpectedly" unless kill(0, $pid_env); - $l = ; - - # send to agent - print AGENT_WRITE $l; - - # extract reward and terminal status, skip during handshaking - if ($episode_on) { - @f = split /:/, $l; - $tok = $f[$#f-1]; - @g = split /,/, $tok; - $terminate = $g[0]; - $total_reward += $g[1]; - - if ($terminate == 1) { - $ep_end_time = time; - $episode_on = 0; - $total_time = time - $ep_start_time; - print "Episode $episode $total_reward $total_time $step\n"; - - $ep_start_time = time; - $total_reward = 0; - $step = 1; - - last ALL_EPISODES if ++$episode > $num_episodes && $num_episodes > 0; - } - print "Time: $c, Reward: $g[1],\n" if $debug_mode >= 2; - } - - # read from agent, send to environment - die "agent terminated unexpectedly" unless kill(0, $pid_agent); - $a = ; - print ENV_WRITE $a; - print "Action: $a\n" if $debug_mode >= 2; - - # On system reset start the episode - $player_a_act = (split /,/, $a)[0]; - $episode_on = 1 if $player_a_act == 45; - - die "agent failed to send back an action" if length($player_a_act) == 0; - $step++; - } while (1); -} - -# Somewhat ugly; we have to write to the stream before we ask the agent -# for an action because of blocking sockets -print AGENT_WRITE "DIE\n"; -$a = ; - -# terminate agent/environment -waitpid($pid_agent, 0); -close ENV_READ; close ENV_WRITE; -kill $pid_env, 9; diff --git a/doc/java-agent/code/src/ale/agents/AbstractAgent.java b/doc/java-agent/code/src/ale/agents/AbstractAgent.java deleted file mode 100644 index 36e330f89..000000000 --- a/doc/java-agent/code/src/ale/agents/AbstractAgent.java +++ /dev/null @@ -1,247 +0,0 @@ -/* - * Java Arcade Learning Environment (A.L.E) Agent - * Copyright (C) 2011-2012 Marc G. Bellemare - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -package ale.agents; - -import ale.gui.AbstractUI; -import ale.gui.AgentGUI; -import ale.gui.NullUI; -import ale.io.ALEPipes; -import ale.io.ConsoleRAM; -import ale.io.RLData; -import ale.screen.ColorPalette; -import ale.screen.NTSCPalette; -import ale.screen.SECAMPalette; -import ale.screen.ScreenConverter; -import ale.screen.ScreenMatrix; -import java.awt.image.BufferedImage; -import java.io.IOException; - -/** An abstract agent class. New agents can be created by extending this class - * and implementing its abstract methods. - * - * @author Marc G. Bellemare - */ -public abstract class AbstractAgent { - /** Used to convert ALE screen data to GUI images */ - protected final ScreenConverter converter; - - /** The UI used for displaying images and receiving actions */ - protected AbstractUI ui; - /** The I/O object used to communicate with ALE */ - protected ALEPipes io; - - /** Parameters */ - /** Whether to use a GUI */ - protected boolean useGUI; - /** If non-null, we communicate via named pipes rather than stdin/stdout */ - protected String namedPipesBasename; - - /** Create a new agent that communicates with ALE via stdin/out and - * uses the graphical user interface. - */ - public AbstractAgent() { - this(true, null); - } - - /** Create a new agent with the specified parameters. The user can specify - * the base name for two FIFO pipes used to communicate with ALE. If - * namedPipesBasename is not null, then the files namedPipesBasename+"_in" - * and namedPipesBasename+"_out" are read and written to by the agent. - * See ALE documentation for more details on running with named pipes. - * - * @param useGUI If true, a GUI is used to display received screen data. - * @param namedPipesBasename If non-null, the base filename for the two FIFO - * files used to communicate with ALE. - */ - public AbstractAgent(boolean useGUI, String namedPipesBasename) { - this.useGUI = useGUI; - this.namedPipesBasename = namedPipesBasename; - - // Create the color palette we will use to interpret ALE data - ColorPalette palette = makePalette("NTSC"); - - // Create an object to convert indexed images to Java images - converter = new ScreenConverter(palette); - - init(); - } - - /** Create a color palette used to display the screen. The currently available - * choices are NTSC (128 colors) and SECAM (8 colors). - * - * @param paletteName The name of the palette (NTSC or SECAM). - * @return - */ - protected final ColorPalette makePalette(String paletteName) { - if (paletteName.equals("NTSC")) - return new NTSCPalette(); - else if (paletteName.equals("SECAM")) - return new SECAMPalette(); - else - throw new IllegalArgumentException("Invalid palette: "+paletteName); - } - - /** Initialize relevant bits of the agent - * - */ - public final void init() { - if (useGUI) { - // Create the GUI - ui = new AgentGUI(); - } - else { - ui = new NullUI(); - } - - // Create the relevant I/O objects - initIO(); - } - - /** Initialize the I/O object for this agent. - * - */ - protected void initIO() { - io = null; - - try { - // Initialize the pipes; use named pipes if requested - if (namedPipesBasename != null) - io = new ALEPipes(namedPipesBasename + "out", namedPipesBasename + "in"); - else - io = new ALEPipes(); - - // Determine which information to request from ALE - io.setUpdateScreen(useGUI || wantsScreenData()); - io.setUpdateRam(wantsRamData()); - io.setUpdateRL(wantsRLData()); - io.initPipes(); - } - catch (IOException e) { - System.err.println ("Could not initialize pipes: "+e.getMessage()); - System.exit(-1); - } - } - - /** The main program loop. In turn, we will obtain a new screen from ALE, - * pass it on to the agent and send back an action (which may be a reset - * request). - */ - public void run() { - boolean done = false; - - // Loop until we're done - while (!done) { - // Obtain relevant data from ALE - done = io.observe(); - // The I/O channel will return true once EOF is received - if (done) break; - - // Obtain the screen matrix - ScreenMatrix screen = io.getScreen(); - // Pass it on to UI - updateImage(screen); - // ... and to the agent - observe(screen, io.getRAM(), io.getRLData()); - - // Request an action from the agent - int action = selectAction(); - // Send it back to ALE - done = io.act(action); - - // Ask the agent whether it wants us to pause - long pauseLength = getPauseLength(); - // If so, pause! - if (pauseLength > 0) { - pause(pauseLength); - } - - // The agent also tells us when to terminate - done |= shouldTerminate(); - } - - // Clean up the GUI - ui.die(); - } - - /** Internal method to update the image displayed in the GUI. - * - * @param currentScreen - */ - protected void updateImage(ScreenMatrix currentScreen) { - // We know that the NullUI does not want image data, so don't spend time - // converting the image - if (ui instanceof NullUI) { - ui.updateFrameCount(); - return; - } - - // Convert the screen matrix to an image - BufferedImage img = converter.convert(currentScreen); - - // Provide the new image to the UI - ui.updateFrameCount(); - ui.setImage(img); - ui.refresh(); - } - - protected void pause(long waitTime) { - try { - Thread.sleep(waitTime); - } - catch (Exception e) { - } - } - - /** Returns how long to pause for, in milliseconds, before the next time step. - * - * @return - */ - public abstract long getPauseLength(); - /** Returns the agent's next action. - * - * @return - */ - public abstract int selectAction(); - /** Provides the agent with the latest screen, RAM and RL data. - * - * @param screen - * @param ram - * @param rlData - */ - public abstract void observe(ScreenMatrix screen, ConsoleRAM ram, RLData rlData); - /** Returns true to indicate that we should exit the program. - * - * @return - */ - public abstract boolean shouldTerminate(); - /** Returns true if we want to receive the screen matrix from ALE. - * - * @return - */ - public abstract boolean wantsScreenData(); - /** Returns true if we want to receive the RAM from ALE. - * - * @return - */ - public abstract boolean wantsRamData(); - /** Returns true if we want to receive RL data from ALE. - * - * @return - */ - public abstract boolean wantsRLData(); -} diff --git a/doc/java-agent/code/src/ale/agents/HumanAgent.java b/doc/java-agent/code/src/ale/agents/HumanAgent.java deleted file mode 100644 index 9208722e4..000000000 --- a/doc/java-agent/code/src/ale/agents/HumanAgent.java +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Java Arcade Learning Environment (A.L.E) Agent - * Copyright (C) 2011-2012 Marc G. Bellemare - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -package ale.agents; - -import ale.io.ConsoleRAM; -import ale.io.RLData; -import ale.movie.MovieGenerator; -import ale.screen.ScreenMatrix; -import java.awt.image.BufferedImage; - -/** An 'agent' meant to be controlled by a human. Used to play the game and - * demonstrate the GUI. - * - * @author Marc G. Bellemare - */ -public class HumanAgent extends AbstractAgent { - /** Whether we want to export screen data to disk */ - protected boolean exportFrames; - /** The base filename used for exporting screen data. The files will be name - * sequentially, e.g. frame_000000.png, frame_000001.png, etc. - * @see MovieGenerator - */ - protected String exportFrameBasename = "frames/frame_"; - - /** The object used to save frames to the disk */ - protected MovieGenerator movieGenerator; - - /** Variables to enforce 60 frames per second */ - protected long lastFrameTime; - protected long lastWaitTime; - protected final int framesPerSecond = 60; - protected long millisFraction = 0; - protected long timeError = 0; - - /** Variables to display relevant RL information */ - protected int rewardDisplayCounter = 0; - protected int lastReward = 0; - - /** Keep track of whether we told the user that the game is over */ - protected boolean displayedGameOver = false; - - protected int numFramesToDisplayRewardFor = framesPerSecond * 1; - - public HumanAgent() { - super(); - } - - public HumanAgent(boolean useGUI, String namedPipesName, boolean exportFrames) { - super(useGUI, namedPipesName); - - this.exportFrames = exportFrames; - - // If we want to export frames, we also need to create the relevant object - if (this.exportFrames) { - movieGenerator = new MovieGenerator(exportFrameBasename); - } - } - - public boolean wantsScreenData() { - return true; - } - - public boolean wantsRamData() { - return false; - } - - public boolean wantsRLData() { - return true; - } - - public boolean shouldTerminate() { - // Terminate if the 'q' key was pressed on the UI - return ui.quitRequested(); - } - - @Override - public long getPauseLength() { - // The idea here is to try and compensate for I/O delays by adjusting - // the pause length from step to step - long targetDelta = 1000 / framesPerSecond; - long deltaRemainder = 1000 % framesPerSecond; - millisFraction += deltaRemainder; - - // Correct for fractional deltas - while (millisFraction > framesPerSecond) { - targetDelta += 1; - millisFraction -= framesPerSecond; - } - - long time = System.currentTimeMillis(); - if (lastFrameTime == 0) { - timeError += targetDelta; - } - else { - long deltaTime = time - lastFrameTime; - // Correct the timing by how much elapsed - timeError += targetDelta - (deltaTime - lastWaitTime); - } - - lastFrameTime = time; - - if (timeError > 0) { - lastWaitTime = timeError; - timeError = 0; - return lastWaitTime; - } - else { // Don't wait if we're behind - lastWaitTime = 0; - return 0; - } - } - - @Override - public int selectAction() { - // Obtain the action from the UI - int action = ui.getKeyboardAction(); - - return action; - } - - @Override - public void observe(ScreenMatrix screen, ConsoleRAM ram, RLData rlData) { - // Export frames if so desired - if (exportFrames) { - BufferedImage image = converter.convert(screen); - movieGenerator.record(image); - } - - // Display reward information via messages - if (rlData.reward != 0) - ui.addMessage("Reward: "+rlData.reward); - // Also print out 'game over' when we received the terminal bit - if (rlData.isTerminal) { - if (!displayedGameOver) { - ui.addMessage("GAME OVER"); - displayedGameOver = true; - } - } - else - displayedGameOver = false; - } - - /** A simple main class for running the Human agent. - * - * @param args - */ - public static void main(String[] args) { - // Parameters; default values - boolean useGUI = true; - String namedPipesName = null; - boolean exportFrames = false; - - // Parse arguments - int argIndex = 0; - - boolean doneParsing = (args.length == 0); - - // Loop through the list of arguments - while (!doneParsing) { - // -nogui: do not display the Java GUI - if (args[argIndex].equals("-nogui")) { - useGUI = false; - argIndex++; - } - // -named_pipes : use to communicate with ALE via named pipes - // (instead of stdin/out) - else if (args[argIndex].equals("-named_pipes") && (argIndex + 1) < args.length) { - namedPipesName = args[argIndex+1]; - - argIndex += 2; - } - // -export_frames: use this to save frames as PNG images - else if (args[argIndex].equals("-export_frames")) { - exportFrames = true; - argIndex++; - } - // If the argument is unrecognized, exit - else { - printUsage(); - System.exit(-1); - } - - // Once we have parsed all arguments, stop - if (argIndex >= args.length) - doneParsing = true; - } - - HumanAgent agent = new HumanAgent(useGUI, namedPipesName, exportFrames); - - agent.run(); - } - - /** Prints out command-line usage text. - * - */ - public static void printUsage() { - System.err.println ("Invalid argument."); - System.err.println ("Usage: java HumanAgent [-nogui] [-named_pipes filename] [-export_frames]\n"); - System.err.println ("Example: java HumanAgent -named_pipes /tmp/ale_fifo_"); - System.err.println (" Will start an agent that communicates with ALE via named pipes \n"+ - " /tmp/ale_fifo_in and /tmp/ale_fifo_out"); - } -} diff --git a/doc/java-agent/code/src/ale/agents/RLAgent.java b/doc/java-agent/code/src/ale/agents/RLAgent.java deleted file mode 100644 index ca3bcd8e8..000000000 --- a/doc/java-agent/code/src/ale/agents/RLAgent.java +++ /dev/null @@ -1,225 +0,0 @@ -/* - * Java Arcade Learning Environment (A.L.E) Agent - * Copyright (C) 2011-2012 Marc G. Bellemare - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -package ale.agents; - -import ale.io.Actions; -import ale.io.ConsoleRAM; -import ale.io.RLData; -import ale.rl.FeatureMap; -import ale.rl.FrameHistory; -import ale.rl.SarsaLearner; -import ale.screen.ScreenMatrix; - -/** A RL agent which learns using the SARSA(lambda) algorithm and acts according - * to an epsilon-greedy policy. - * At the time of writing this comment, the agent is in charge of resetting the - * system at the end of an episode. - * - * @author Marc G. Bellemare - */ -public class RLAgent extends AbstractAgent { - protected final int numActions = Actions.numPlayerActions; - - /** The map from raw screen data to a feature vector */ - protected FeatureMap featureMap; - /** The agent core (SARSA and epsilon-greedy) */ - protected SarsaLearner learner; - - /** The action selected by the learner */ - protected int learnerAction; - /** A history of recent screen data */ - protected FrameHistory history; - - /** Keeping track of how many episodes we've played */ - protected int episodeNumber; - protected int maxNumEpisodes = 10; - - /** Whether this is the first step of a given episode */ - protected boolean firstStep; - /** Whether we should send a reset action */ - protected boolean requestReset; - - /** Creates a new RL agent. - * - * @param useGUI - * @param pipesBasename - */ - public RLAgent(boolean useGUI, String pipesBasename) { - super(useGUI, pipesBasename); - featureMap = new FeatureMap(); - // Create a new learner - learner = new SarsaLearner(featureMap.numFeatures(), numActions); - - // Normalize the learning rate by the total number of features - learner.setAlpha(0.01 / featureMap.numFeatures()); - - int requiredHistoryLength = featureMap.historyLength(); - - history = new FrameHistory(requiredHistoryLength); - - requestReset = true; - episodeNumber = 1; - } - - public boolean shouldTerminate() { - // Terminate when we are told to do so by the outside world - return (io.wantsTerminate() || episodeNumber > maxNumEpisodes); - } - - @Override - public long getPauseLength() { - return 0; - } - - @Override - public int selectAction() { - // If reset is requested, send it - if (requestReset) { - firstStep = true; - requestReset = false; - return Actions.map("system_reset"); - } - - // Otherwise send back the action taken by the learner (see rlStep()) - else - return learnerAction; - } - - @Override - public void observe(ScreenMatrix image, ConsoleRAM ram, RLData rlData) { - // Convert the image history to a feature vector - history.addFrame(image); - - rlStep(image, ram, rlData); - } - - /** Take one RL step by observing an image and selecting the next action. - * This is done by invoking the SarsaLearner's agent_ methods. - * - * @param image - * @param ram - * @param features - */ - public void rlStep(ScreenMatrix image, ConsoleRAM ram, RLData rlData) { - // Obtain the feature vector for this image - double[] features = featureMap.getFeatures(history); - - if (firstStep) { - // On the first step, no reward is computed - learnerAction = learner.agent_start(features); - - firstStep = false; - } - else { - boolean terminal = rlData.isTerminal; - double reward = rlData.reward; - - // Regular RL step - if (!terminal) - learnerAction = learner.agent_step(reward, features); - // When we receive the terminal signal, we disregard the screen data - // and instead transit to the 'null state' - else - episodeEnd(reward); - } - } - - /** Perform an end-of-episode learning step */ - protected void episodeEnd(double reward) { - learner.agent_end(reward); - // As a sanity check we set learnerAction; this is overriden by the reset - learnerAction = Actions.map("player_a_noop"); - - // We will want to reset, since we have reached the end of the episode - requestReset = true; - - // Print the episode number - System.err.println ("Episode "+episodeNumber); - episodeNumber++; - - if (episodeNumber > maxNumEpisodes) - System.err.println (maxNumEpisodes+" episodes, terminating..."); - } - - public boolean wantsRamData() { - return false; - } - - public boolean wantsRLData() { - return true; - } - - public boolean wantsScreenData() { - return true; - } - - /** Main class for running the RL agent. - * - * @param args - */ - public static void main(String[] args) { - // Parameters; default values - boolean useGUI = true; - String namedPipesName = null; - - // Parse arguments - int argIndex = 0; - - boolean doneParsing = (args.length == 0); - - // Loop through the list of arguments - while (!doneParsing) { - // -nogui: do not display the Java GUI - if (args[argIndex].equals("-nogui")) { - useGUI = false; - argIndex++; - } - // -named_pipes : use to communicate with ALE via named pipes - // (instead of stdin/out) - else if (args[argIndex].equals("-named_pipes") && (argIndex + 1) < args.length) { - namedPipesName = args[argIndex+1]; - - argIndex += 2; - } - // If the argument is unrecognized, exit - else { - printUsage(); - System.exit(-1); - } - - // Once we have parsed all arguments, stop - if (argIndex >= args.length) - doneParsing = true; - } - - RLAgent agent = new RLAgent(useGUI, namedPipesName); - - agent.run(); - } - - /** Prints out command-line usage text. - * - */ - public static void printUsage() { - System.err.println ("Invalid argument."); - System.err.println ("Usage: java RLAgent [-nogui] [-named_pipes filename]\n"); - System.err.println ("Example: java RLAgent -named_pipes /tmp/ale_fifo_"); - System.err.println (" Will start an agent that communicates with ALE via named pipes \n"+ - " /tmp/ale_fifo_in and /tmp/ale_fifo_out"); - } -} diff --git a/doc/java-agent/code/src/ale/gui/AbstractUI.java b/doc/java-agent/code/src/ale/gui/AbstractUI.java deleted file mode 100644 index 06a65c354..000000000 --- a/doc/java-agent/code/src/ale/gui/AbstractUI.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Java Arcade Learning Environment (A.L.E) Agent - * Copyright (C) 2011-2012 Marc G. Bellemare - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -package ale.gui; - -import java.awt.image.BufferedImage; - -/** An interface describing a UI. This gets subclassed into a graphical UI, - * a command-line UI, etc... as needed. - * - * @author Marc G. Bellemare - */ -public interface AbstractUI { - /** This method is called to notify the UI that we want to terminate. */ - public void die(); - /** Notifies the UI that it should refresh its display */ - public void refresh(); - - /** Sets the screen image to be displayed in the GUI */ - public void setImage(BufferedImage img); - - /** Provides a string to be displayed (at the bottom of the GUI if using a GUI) */ - public void setCenterString(String s); - public void addMessage(String s); - - /** Obtain an ALE action from the UI, e.g. via the keyboard. - * - * @return - */ - public int getKeyboardAction(); - /** Returns true if the user requested the end of the program, e.g. via a - * keypress. - * @return - */ - public boolean quitRequested(); - - /** A method called to notify the UI that a new frame has been processed. - * Used to display frames per second information. - */ - public void updateFrameCount(); -} diff --git a/doc/java-agent/code/src/ale/gui/AgentGUI.java b/doc/java-agent/code/src/ale/gui/AgentGUI.java deleted file mode 100644 index a72b65c64..000000000 --- a/doc/java-agent/code/src/ale/gui/AgentGUI.java +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Java Arcade Learning Environment (A.L.E) Agent - * Copyright (C) 2011-2012 Marc G. Bellemare - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -package ale.gui; - -import java.awt.image.BufferedImage; -import javax.swing.JFrame; - -/** GUI for the Java ALE agent. - * - * @author Marc G. Bellemare - */ -public final class AgentGUI extends JFrame implements AbstractUI { - /** An object in which we display the screen image */ - protected final ScreenDisplay panel; - /** An object that listens for key presses */ - protected final KeyboardControl keyboard; - - /** Create a new GUI - * - */ - public AgentGUI(){ - // Create the keyboard and image panel - keyboard = new KeyboardControl(); - panel = new ScreenDisplay(); - add(panel); - - this.addKeyListener(keyboard); - this.setSize(panel.getPreferredSize()); - - pack(); - setLocationRelativeTo(null); - - setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); - setVisible(true); - } - - /** When die() is called, we want to safely close the GUI */ - public void die() { - this.dispose(); - } - - public void setImage(BufferedImage img) { - panel.setImage(img); - } - - public void setCenterString(String s) { - panel.setCenterString(s); - } - - public void addMessage(String s) { - panel.addMessage(s); - } - - public int getKeyboardAction() { - return keyboard.toALEAction(); - } - - public void updateFrameCount() { - panel.updateFrameCount(); - } - - public boolean quitRequested() { - return (keyboard.quit == true); - } - - public void refresh() { - this.repaint(); - } -} diff --git a/doc/java-agent/code/src/ale/gui/KeyboardControl.java b/doc/java-agent/code/src/ale/gui/KeyboardControl.java deleted file mode 100644 index 512113bfe..000000000 --- a/doc/java-agent/code/src/ale/gui/KeyboardControl.java +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Java Arcade Learning Environment (A.L.E) Agent - * Copyright (C) 2011-2012 Marc G. Bellemare - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -package ale.gui; - -import ale.io.Actions; -import java.awt.event.KeyEvent; -import java.awt.event.KeyListener; - -/** A crude keyboard controller. The following keys are mapped: - * R - reset - * Q - quit - * space - fire - * ASWD, arrow keys - joystick movement - * - * @author Marc G. Bellemare - */ -public class KeyboardControl implements KeyListener { - /** Variables used to keep track of which keys are pressed */ - public boolean up, down; - public boolean left, right; - public boolean fire; - - public boolean reset; - - public boolean quit; - - /** Creates a new keyboard controller. - * - */ - public KeyboardControl() { - // Initially all keys are assumed not pressed - up = down = left = right = fire = false; - reset = false; - quit = false; - } - - public void keyTyped(KeyEvent e) { - } - - public void keyPressed(KeyEvent e) { - // Parse different key presses by setting the relevant boolean flags - switch (e.getKeyCode()) { - case KeyEvent.VK_UP: - case KeyEvent.VK_W: - up = true; - break; - case KeyEvent.VK_DOWN: - case KeyEvent.VK_S: - down = true; - break; - case KeyEvent.VK_LEFT: - case KeyEvent.VK_A: - left = true; - break; - case KeyEvent.VK_RIGHT: - case KeyEvent.VK_D: - right = true; - break; - case KeyEvent.VK_SPACE: - fire = true; - break; - case KeyEvent.VK_R: - reset = true; - break; - case KeyEvent.VK_ESCAPE: - quit = true; - break; - } - } - - public void keyReleased(KeyEvent e) { - // Opposite of keyPressed; sets the relevant boolean flag to false - switch (e.getKeyCode()) { - case KeyEvent.VK_UP: - case KeyEvent.VK_W: - up = false; - break; - case KeyEvent.VK_DOWN: - case KeyEvent.VK_S: - down = false; - break; - case KeyEvent.VK_LEFT: - case KeyEvent.VK_A: - left = false; - break; - case KeyEvent.VK_RIGHT: - case KeyEvent.VK_D: - right = false; - break; - case KeyEvent.VK_SPACE: - fire = false; - break; - case KeyEvent.VK_R: - reset = false; - break; - case KeyEvent.VK_ESCAPE: - quit = false; - break; - } - } - - /** An array to map a bit-wise representation of the keypresses to ALE actions. - * 1 = fire, 2 = up, 4 = right, 8 = left, 16 = down - * - * -1 indicate an invalid combination, e.g. left/right or up/down. These should - * be filtered out in toALEAction. - */ - private int[] bitKeysMap = new int[] { - 0, 1, 2, 10, 3, 11, 6, 14, 4, 12, 7, 15, -1, -1, -1, -1, - 5, 13, -1, -1, 8, 16, -1, -1, 9, 17, -1, -1, -1, -1, -1, -1 - }; - - /** Converts the current keypresses to an ALE action (for player A). - * - * @return - */ - public int toALEAction() { - int bitfield = 0; - - // Reset overrides everything - if (reset) return Actions.map("system_reset"); - - // Cancel out left/right, up/down; obtain the corresponding bit representation - if (left == right) bitfield |= 0; - else if (left) bitfield |= 0x08; - else if (right) bitfield |= 0x04; - - if (up == down) bitfield |= 0; - else if (up) bitfield |= 0x02; - else if (down) bitfield |= 0x10; - - if (fire) bitfield |= 0x01; - - // Map the bits to an ALE action - return bitKeysMap[bitfield]; - } - -} diff --git a/doc/java-agent/code/src/ale/gui/MessageHistory.java b/doc/java-agent/code/src/ale/gui/MessageHistory.java deleted file mode 100644 index 96111ba2b..000000000 --- a/doc/java-agent/code/src/ale/gui/MessageHistory.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Java Arcade Learning Environment (A.L.E) Agent - * Copyright (C) 2011-2012 Marc G. Bellemare - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -package ale.gui; - -import java.util.LinkedList; -import java.util.List; - -/** Encapsulates a list of messages. Each message is timestamped. - * - * @author Marc G. Bellemare - */ -public class MessageHistory { - public class Message { - protected String text; - protected long timeStamp; - - public Message(String text, long timeStamp) { - this.text = text; - this.timeStamp = timeStamp; - } - - public String getText() { return text; } - public long getTimeStamp() { return timeStamp; } - } - - /** A list of messages, with the first element being the oldest */ - protected LinkedList messages; - - public MessageHistory() { - messages = new LinkedList(); - } - - /** Adds a message to our history. The time at which the message was added - * is also recorded. - * - * @param text The message to be added. - */ - public void addMessage(String text) { - long currentTime = System.currentTimeMillis(); - - messages.addLast(new Message(text, currentTime)); - } - - /** Returns a list of current messages */ - public List getMessages() { - return messages; - } - - /** Remove any message which is older than 'maxAge'. The age of a message is - * found by comparing its timestamp with the current time. - * - * @param maxAge The maximum age, in milliseconds, of a message. - */ - public void update(long maxAge) { - long currentTime = System.currentTimeMillis(); - - while (!messages.isEmpty()) { - Message m = messages.getFirst(); - - // Delete this message if it is old enough - long age = currentTime - m.timeStamp; - if (age > maxAge) - messages.removeFirst(); - else - break; // Messages are ordered by timestamp so we can stop - } - } -} diff --git a/doc/java-agent/code/src/ale/gui/NullUI.java b/doc/java-agent/code/src/ale/gui/NullUI.java deleted file mode 100644 index 332995f0f..000000000 --- a/doc/java-agent/code/src/ale/gui/NullUI.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Java Arcade Learning Environment (A.L.E) Agent - * Copyright (C) 2011-2012 Marc G. Bellemare - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -package ale.gui; - -import java.awt.image.BufferedImage; - -/** An empty UI for running console experiments. All abstract methods are implemented - * to do nothing. - * - * @author Marc G. Bellemare - */ -public class NullUI implements AbstractUI { - public void die() { - } - - public void setImage(BufferedImage img) { - } - - public void setCenterString(String s) { - } - - public void addMessage(String s) { - } - - public int getKeyboardAction() { - return 0; - } - - public void updateFrameCount() { - } - - public boolean quitRequested() { - return false; - } - - public void refresh() { - } -} diff --git a/doc/java-agent/code/src/ale/gui/ScreenDisplay.java b/doc/java-agent/code/src/ale/gui/ScreenDisplay.java deleted file mode 100644 index aea402918..000000000 --- a/doc/java-agent/code/src/ale/gui/ScreenDisplay.java +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Java Arcade Learning Environment (A.L.E) Agent - * Copyright (C) 2011-2012 Marc G. Bellemare - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -package ale.gui; - -import java.awt.Color; -import java.awt.Dimension; -import java.awt.Graphics; -import java.awt.Graphics2D; -import java.awt.image.BufferedImage; -import javax.swing.JPanel; - -/** Displays the current Atari frame in a simple GUI. - * - * @author Marc G. Bellemare - */ -public class ScreenDisplay extends JPanel { - - /** The image to be displayed */ - BufferedImage image; - /** The scale at which we want to display (3x normal height) */ - int yScaleFactor = 3; - /** The x-axis scale at which we want to display (6x normal width) */ - int xScaleFactor = 6; - /** The default screen width */ - int defaultWidth = 160; - /** The default screen height */ - int defaultHeight = 210; - /** The height of the status bar at the bottom of the GUI */ - int statusBarHeight = 20; - /** Variables storing some relevant GUI dimensions */ - int statusBarY; - int windowWidth; - int windowHeight; - /** Variables used to compute the GUI frames per second */ - int frameCount = 0; - double fps = 0; - long frameTime = 0; - int updateRate = 5; // How often to update FPS, in hertz - double fpsAlpha = 0.9; - - /** Additional user strings to be displayed */ - String centerString; - MessageHistory messages; - - long maxMessageAge = 3000; - - public ScreenDisplay() { - super(); - - messages = new MessageHistory(); - } - - public Dimension getPreferredSize() { - int width, height; - - statusBarY = defaultHeight * yScaleFactor; - width = defaultWidth * xScaleFactor; - height = statusBarY + statusBarHeight; - - windowWidth = width; - windowHeight = height; - - return new Dimension(width, height); - } - - @Override - public void paintComponent(Graphics g) { - super.paintComponent(g); - drawImages(g); - } - - public void setImage(BufferedImage img) { - synchronized (this) { - this.image = img; - } - } - - public void setCenterString(String s) { - synchronized (this) { - centerString = s; - } - } - - public void addMessage(String s) { - synchronized (this) { - messages.addMessage(s); - } - } - - /** This methods calculates how many frames per second are being displayed. - * Exponential averaging is used for smoothness. - */ - public void updateFrameCount() { - synchronized (this) { - frameCount++; - long time = System.currentTimeMillis(); - - // If one second has elapsed, update FPS - if (time - frameTime >= 1000 / updateRate) { - if (fps == 0) { - fps = frameCount; - } else { - // Compute the exact number of (fractional) ticks since FPS update - double ticksSinceUpdate = (time - frameTime) * updateRate / 1000.0; - double alpha = Math.pow(fpsAlpha, ticksSinceUpdate); - - fps = alpha * fps + (1 - alpha) * (frameCount * updateRate / ticksSinceUpdate); - } - - frameCount = 0; - frameTime = time; - } - } - } - - /** Helper method that the display by the given (x,y) factors. - * - * @param g - * @param xFactor - * @param yFactor - */ - private void rescale(Graphics g, double xFactor, double yFactor) { - if (g instanceof Graphics2D) { - Graphics2D g2d = (Graphics2D) g; - g2d.scale(xFactor, yFactor); - } - } - - public void drawImages(Graphics g) { - synchronized (this) { - // Do some message cleanup if necessary - messages.update(maxMessageAge); - - // Zoom up on the Atari image - rescale(g, xScaleFactor, yScaleFactor); - // draw the atari image - if (image != null) { - g.drawImage(image, 0, 0, null); - } - - // Zoom out to draw text - rescale(g, 1.0 / xScaleFactor, 1.0 / yScaleFactor); - - int statusBarTextOffset = statusBarY + 15; - - // draw FPS information in the bottom left corner - if (fps > 0) { - g.setColor(Color.BLACK); - double roundedFPS = (Math.round(fps * 10) / 10.0); - g.drawString("FPS: " + roundedFPS, 0, statusBarTextOffset); - } - - // Draw a string center-bottom - if (centerString != null) { - int stringLength = g.getFontMetrics().stringWidth(centerString); - g.drawString(centerString, (windowWidth - stringLength) / 2, statusBarTextOffset); - } - - int textOffset = statusBarY - 4; - - g.setColor(Color.YELLOW); - - // Draw messages in the bottom right corner - for (MessageHistory.Message m : messages.getMessages()) { - // Draw one message - String text = m.getText(); - int stringLength = g.getFontMetrics().stringWidth(text); - g.drawString(text, windowWidth - stringLength - 2, textOffset); - - // Decrement textOffset so that the next (older) message - // is drawn on top of it - textOffset -= g.getFontMetrics().getHeight(); - } - } - } -} diff --git a/doc/java-agent/code/src/ale/io/ALEPipes.java b/doc/java-agent/code/src/ale/io/ALEPipes.java deleted file mode 100644 index dfa424e54..000000000 --- a/doc/java-agent/code/src/ale/io/ALEPipes.java +++ /dev/null @@ -1,351 +0,0 @@ -/* - * Java Arcade Learning Environment (A.L.E) Agent - * Copyright (C) 2011-2012 Marc G. Bellemare - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -package ale.io; - -import ale.screen.ScreenMatrix; -import java.io.BufferedReader; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.PrintStream; - -/** - * Class that communicates with ALE via pipes. - * The protocol used here is ALE 0.3. - * - * @author Marc G. Bellemare - */ -public class ALEPipes { - /** Data structure holding the screen image */ - protected ScreenMatrix screen; - /** Data structure holding the RAM data */ - protected ConsoleRAM ram; - /** Data structure holding RL data */ - protected RLData rlData; - /** Whether termination was requested from the I/O channel */ - protected boolean terminateRequested; - - /** Input object */ - protected final BufferedReader in; - /** Output object */ - protected final PrintStream out; - - /** Flags indicating the kind of data we want to receive from ALE */ - protected boolean updateScreen, updateRam, updateRLData; - /** We will request that ALE sends data every 'frameskip' frames. */ - protected int frameskip; - - /** The action we send for player B (always noop in this case) */ - protected final int playerBAction = Actions.map("player_b_noop"); - - /** A state variable used to track of whether we should receive or send data */ - protected boolean hasObserved; - - protected boolean useRLE = true; - - /** Uses stdin/stdout for communication */ - public ALEPipes() throws IOException { - this(new BufferedReader(new InputStreamReader(System.in)), System.out); - } - - /** Uses named pipes */ - public ALEPipes(String pInfile, String pOutfile) throws IOException { - this(new BufferedReader(new InputStreamReader(new FileInputStream(pInfile))), - new PrintStream(new FileOutputStream(pOutfile))); - } - - /** Initialize the default variables and set the I/O streams. - * - * @param in - * @param out - */ - private ALEPipes(BufferedReader in, PrintStream out) { - updateScreen = true; - updateRam = false; - updateRLData = true; - frameskip = 0; - - this.in = in; - this.out = out; - } - - /** Closes the I/O channel. - * - */ - public void close() { - try { - in.close(); - out.close(); - } - catch (IOException e) { - // Not sure what to do if we can't close streams... - } - } - - public void setUpdateScreen(boolean updateScreen) { - this.updateScreen = updateScreen; - } - - public void setUpdateRam(boolean updateRam) { - this.updateRam = updateRam; - } - - public void setUpdateRL(boolean updateRL) { - this.updateRLData = updateRL; - } - - /** A blocking method that sends initial information to ALE. See the - * documentation for protocol details. - * - */ - public void initPipes() throws IOException { - // Read in the width and height of the screen - // Format: -\n - String line = in.readLine(); - String[] tokens = line.split("-"); - int width = Integer.parseInt(tokens[0]); - int height = Integer.parseInt(tokens[1]); - - // Do some error checking - our width and height should be positive - if (width <= 0 || height <= 0) { - throw new RuntimeException("Invalid width/height: "+width+"x"+height); - } - - // Create the data structures used to store received information - screen = new ScreenMatrix(width, height); - ram = new ConsoleRAM(); - rlData = new RLData(); - - // Now send back our preferences - // Format: ,,,\n - out.printf("%d,%d,%d,%d\n", updateScreen? 1:0, updateRam? 1:0, frameskip, - updateRLData? 1:0); - out.flush(); - } - - public int getFrameSkip() { - return frameskip; - } - - public void setFrameSkip(int frameskip) { - this.frameskip = frameskip; - } - - /** Returns the screen matrix from ALE. - * - * @return - */ - public ScreenMatrix getScreen() { - return screen; - } - - /** Returns the RAM from ALE. - * - * @return - */ - public ConsoleRAM getRAM() { - return ram; - } - - public RLData getRLData() { - return rlData; - } - - public boolean wantsTerminate() { - return terminateRequested; - } - - /** A blocking method which will get the next time step from ALE. - * - */ - public boolean observe() { - // Ensure that observe() is not called twice, as it will otherwise block - // as both ALE and the agent wait for data. - if (hasObserved) { - throw new RuntimeException("observe() called without subsequent act()."); - } - else - hasObserved = true; - - String line = null; - - // First read in a new line from ALE - try { - line = in.readLine(); - if (line == null) return true; - } - catch (IOException e) { - return true; - } - - // Catch the special keyword 'DIE' - if (line.equals("DIE")) { - terminateRequested = true; - return false; - } - - // Ignore blank lines (still send an action) - if (line.length() > 0) { - // The data format is: - // :::\n - // Some of these elements may be missing, in which case the separating - // colons are not sent. For example, if we only want ram and rl data, - // the format is :: - - String[] tokens = line.split(":"); - - int tokenIndex = 0; - - // If necessary, first read the RAM data - if (updateRam) - readRam(tokens[tokenIndex++]); - - // Then update the screen - if (updateScreen) { - String screenString = tokens[tokenIndex++]; - - if (useRLE) - readScreenRLE(screenString); - else - readScreenMatrix(screenString); - } - - // Finally obtain RL data - if (updateRLData) { - readRLData(tokens[tokenIndex++]); - } - } - - return false; - } - - /** After a call to observe(), send back the necessary action. - * - * @param act - * @return - */ - public boolean act(int act) { - // Ensure that we called observe() last - if (!hasObserved) { - throw new RuntimeException("act() called before observe()."); - } - else - hasObserved = false; - - sendAction(act); - - return false; - } - - /** Helper function to send out an action to ALE */ - public void sendAction(int act) { - // Send player A's action, as well as the NOOP for player B - // Format: ,\n - out.printf("%d,%d\n", act, 18); - out.flush(); - } - - /** Read in RL data from a given line. - * - * @param line - */ - public void readRLData(String line) { - // Parse RL data - // Format: :\n - String[] tokens = line.split(","); - - // Parse the terminal bit - rlData.isTerminal = (Integer.parseInt(tokens[0]) == 1); - rlData.reward = Integer.parseInt(tokens[1]); - } - - /** Reads the console RAM from a string - * @param line The RAM-part of the string sent by ALE. - */ - public void readRam(String line) { - int offset = 0; - - // Read in all of the RAM - // Format: ... - // where ri is 2 characters representing an integer between 0 and 0xFF - for (int ptr = 0; ptr < ConsoleRAM.RAM_SIZE; ptr++) { - int v = Integer.parseInt(line.substring(offset, offset + 2), 16); - ram.ram[ptr] = v; - - offset += 2; - } - } - - /** Reads the screen matrix update from a string. The string only contains the - * pixels that differ from the previous frame. - * - * @param line The screen part of the string sent by ALE. - */ - public void readScreenMatrix(String line) { - int ptr = 0; - - // 0.3 protocol - send everything - for (int y = 0; y < screen.height; y++) - for (int x = 0; x < screen.width; x++) { - int v = byteAt(line, ptr); - screen.matrix[x][y] = v; - ptr += 2; - } - } - - /** Parses a hex byte in the given String, at position 'ptr'. */ - private int byteAt(String line, int ptr) { - int ld = line.charAt(ptr+1); - int hd = line.charAt(ptr); - - if (ld >= 'A') ld -= 'A' - 10; - else ld -= '0'; - if (hd >= 'A') hd -= 'A' - 10; - else hd -= '0'; - - return (hd << 4) + ld; - } - - /** Read in a run-length encoded screen. ALE 0.3-0.4 */ - public void readScreenRLE(String line) { - int ptr = 0; - - // 0.3 protocol - send everything - int y = 0; - int x = 0; - - while (ptr < line.length()) { - // Read in the next run - int v = byteAt(line, ptr); - int l = byteAt(line, ptr + 2); - ptr += 4; - - for (int i = 0; i < l; i++) { - screen.matrix[x][y] = v; - if (++x >= screen.width) { - x = 0; - y++; - - if (y >= screen.height && i < l - 1) - throw new RuntimeException ("Invalid run length data."); - } - } - } - } -} diff --git a/doc/java-agent/code/src/ale/io/Actions.java b/doc/java-agent/code/src/ale/io/Actions.java deleted file mode 100644 index f05a38658..000000000 --- a/doc/java-agent/code/src/ale/io/Actions.java +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Java Arcade Learning Environment (A.L.E) Agent - * Copyright (C) 2011-2012 Marc G. Bellemare - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -package ale.io; - -import java.util.HashMap; - -/** A static container for Atari actions. - * - * @author Marc G. Bellemare - */ -public class Actions { - /** The number of player actions available to each player */ - public static int numPlayerActions = 18; - - /** A list of all the action names */ - public static String[] actionNames = { - "player_a_noop", - "player_a_fire", - "player_a_up", - "player_a_right", - "player_a_left", - "player_a_down", - "player_a_upright", - "player_a_upleft", - "player_a_downright", - "player_a_downleft", - "player_a_upfire", - "player_a_rightfire", - "player_a_leftfire", - "player_a_downfire", - "player_a_uprightfire", - "player_a_upleftfire", - "player_a_downrightfire", - "player_a_downleftfire", - "player_b_noop", - "player_b_fire", - "player_b_up", - "player_b_right", - "player_b_left", - "player_b_down", - "player_b_upright", - "player_b_upleft", - "player_b_downright", - "player_b_downleft", - "player_b_upfire", - "player_b_rightfire", - "player_b_leftfire", - "player_b_downfire", - "player_b_uprightfire", - "player_b_upleftfire", - "player_b_downrightfire", - "player_b_downleftfire", - "reset", - "undefined", - "random", - // MGB v0.2 actions - "save_state", - "load_state", - "system_reset" - }; - - /** A HashMap mapping action names to action indices */ - public static HashMap actionsMap; - - /** Maps a given action name to its corresponding integer value */ - public static int map(String actionName) { - if (actionsMap == null) makeMap(); - - return actionsMap.get(actionName).intValue(); - } - - /** Construct the map from names to actions */ - public static void makeMap() { - actionsMap = new HashMap(); - - for (int i = 0; i < actionNames.length; i++) { - int v; - - if (i < numPlayerActions * 2) v = i; - // Special actions (not player-related) start at 40 - else { - v = i + 4; - } - actionsMap.put(actionNames[i], new Integer(v)); - } - } -} diff --git a/doc/java-agent/code/src/ale/io/ConsoleRAM.java b/doc/java-agent/code/src/ale/io/ConsoleRAM.java deleted file mode 100644 index 5425d8d99..000000000 --- a/doc/java-agent/code/src/ale/io/ConsoleRAM.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Java Arcade Learning Environment (A.L.E) Agent - * Copyright (C) 2011-2012 Marc G. Bellemare - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -package ale.io; - -/** This class encapsulates RAM data. - * - * @author Marc G. Bellemare - */ -public class ConsoleRAM { - public static final int RAM_SIZE = 128; - - public int[] ram; - - public ConsoleRAM() { - ram = new int[RAM_SIZE]; - } -} diff --git a/doc/java-agent/code/src/ale/io/RLData.java b/doc/java-agent/code/src/ale/io/RLData.java deleted file mode 100644 index ca98d71db..000000000 --- a/doc/java-agent/code/src/ale/io/RLData.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Java Arcade Learning Environment (A.L.E) Agent - * Copyright (C) 2011-2012 Marc G. Bellemare - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -package ale.io; - -/** This class encapsulates RL data. - * - * @author Marc G. Bellemare - */ -public class RLData { - /** How much reward was received */ - public int reward; - /** Indicates the end of an episode */ - public boolean isTerminal; -} diff --git a/doc/java-agent/code/src/ale/movie/MovieGenerator.java b/doc/java-agent/code/src/ale/movie/MovieGenerator.java deleted file mode 100644 index 224349895..000000000 --- a/doc/java-agent/code/src/ale/movie/MovieGenerator.java +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Java Arcade Learning Environment (A.L.E) Agent - * Copyright (C) 2011-2012 Marc G. Bellemare - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -package ale.movie; - -import java.awt.image.BufferedImage; -import java.io.File; -import java.io.IOException; -import java.text.NumberFormat; -import javax.imageio.ImageIO; - -/** A class for exporting screen images to PNG files. - * - * @author Marc G. Bellemare - */ -public class MovieGenerator { - /** How many times to show the same image sequence before moving on to the next */ - protected String baseFilename; - - /** The current frame index (used to obtain the PNG filename) */ - protected int pngIndex = 0; - - /** How many digits to use in generating the filename */ - protected final int indexDigits = 6; - - /** Create a new MovieGenerator that saves images to /tmp/frames/atari_xxxxxx.png - * - */ - public MovieGenerator() { - this("/tmp/frames/atari_"); - } - - /** Create a new MovieGenerator with the specified base filename. To this - * base filename is appended a frame number and ".png" in order to obtain - * the full filename. - * - * @param baseFilename - */ - public MovieGenerator(String baseFilename) { - this.baseFilename = baseFilename; - - // Create the relevant directory if necessary - File fp = new File(baseFilename); - File directory = fp.getParentFile(); - - // Create the directory if necessary; fail if it exists and is not a directory - if (!directory.isDirectory()) { - if (!directory.exists()) - directory.mkdir(); - else - throw new IllegalArgumentException("File "+directory.getAbsolutePath()+" exists, "+ - "is not a directory."); - } - } - - /** This method saves the given image to disk as the next frame. It then - * increments pngIndex. - * - * @param image - */ - public void record(BufferedImage image) { - // We need a filename in order to save frames - if (baseFilename == null) - throw new IllegalArgumentException("Base filename is not defined."); - - // Create a formatter to generate 6-digit indices - NumberFormat formatter = NumberFormat.getInstance(); - formatter.setMinimumIntegerDigits(indexDigits); - formatter.setGroupingUsed(false); - - // Obtain a 6-digit character representation of pngIndex - String indexString = formatter.format(pngIndex); - - // Create the full filename - String filename = baseFilename + indexString + ".png"; - - // Save the image to disk - try { - ImageIO.write(image, "png", new File(filename)); - } catch (IOException e) { - throw new RuntimeException(e); - } - - // Increment pngIndex so that the next frame has a different filename - pngIndex++; - } -} diff --git a/doc/java-agent/code/src/ale/rl/FeatureMap.java b/doc/java-agent/code/src/ale/rl/FeatureMap.java deleted file mode 100644 index 86d68e655..000000000 --- a/doc/java-agent/code/src/ale/rl/FeatureMap.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Java Arcade Learning Environment (A.L.E) Agent - * Copyright (C) 2011-2012 Marc G. Bellemare - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -package ale.rl; - -import ale.screen.ScreenMatrix; - -/** A simple RL feature set for Atari agents. The screen is divided into blocks. - * Within each block, we encode the presence or absence of a color. The number - * of colors is restricted to reduce the number of active features. - * - * @author Marc G. Bellemare - */ -public class FeatureMap { - /** The number of colors used in our feature set */ - protected final int numColors; - /** The number of columns (y bins) in the quantization */ - protected final int numColumns; - /** The number of rows (x bins) in the quantization */ - protected final int numRows; - - /** Create a new FeatureMap with fixed parameter settings: 16 columns, - * 21 rows and 8 colors (SECAM). - */ - public FeatureMap() { - // Some hardcoded feature set parameters - numColumns = 16; - numRows = 21; - numColors = 8; - } - - /** Returns a quantized version of the last screen. - * - * @param history - * @return - */ - public double[] getFeatures(FrameHistory history) { - // Obtain the last screen - ScreenMatrix screen = history.getLastFrame(0); - - int blockWidth = screen.width / numColumns; - int blockHeight = screen.height / numRows; - - int featuresPerBlock = numColors; - double[] features = new double[numFeatures()]; - - int blockIndex = 0; - - // For each pixel block - for (int by = 0; by < numRows; by++) { - for (int bx = 0; bx < numColumns; bx++) { - boolean[] hasColor = new boolean[numColors]; - int xo = bx * blockWidth; - int yo = by * blockHeight; - - // Determine which colors are present - for (int x = xo; x < xo + blockWidth; x++) - for (int y = yo; y < yo + blockHeight; y++) { - int pixelColor = screen.matrix[x][y]; - hasColor[encode(pixelColor)] = true; - } - - // Add all colors present to our feature set - for (int c = 0; c < numColors; c++) - if (hasColor[c]) - features[c + blockIndex] = 1.0; - - // Increment the feature offset in the big feature vector - blockIndex += featuresPerBlock; - } - } - - return features; - } - - /** SECAM encoding of colors; we end up with 8 possible colors. - * - * @param color - * @return - */ - protected int encode(int color) { - return (color & 0xF) >> 1; - } - - /** Returns the number of features in this FeatureMap. - * - * @return - */ - public int numFeatures() { - return numColumns * numRows * numColors; - } - - /** Returns the length of history required to compute features. - * - * @return - */ - public int historyLength() { - return 1; - } -} diff --git a/doc/java-agent/code/src/ale/rl/FrameHistory.java b/doc/java-agent/code/src/ale/rl/FrameHistory.java deleted file mode 100644 index 723e70899..000000000 --- a/doc/java-agent/code/src/ale/rl/FrameHistory.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Java Arcade Learning Environment (A.L.E) Agent - * Copyright (C) 2011-2012 Marc G. Bellemare - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -package ale.rl; - -import ale.screen.ScreenMatrix; -import java.util.LinkedList; - -/** A time-ordered list of frames. - * - * @author Marc G. Bellemare - */ -public class FrameHistory implements Cloneable { - /** The list of recent frames */ - protected LinkedList frames; - - /** The maximum length of history we need to keep */ - protected int maxLength; - - /** Create a new FrameHistory which needs to keep no more than the last - * 'maxLength' frames. - * - * @param maxLength - */ - public FrameHistory(int maxLength) { - this.maxLength = maxLength; - frames = new LinkedList(); - } - - /** Append a new frame to the end of the history. - * - * @param frame - */ - public void addFrame(ScreenMatrix frame) { - frames.addLast(frame); - while (frames.size() > maxLength) - frames.removeFirst(); - } - - /** Removes the t-to-last frame. For example, removeLast(0) removes the - * last frame added by addFrame(frame). - */ - public void removeLast(int t) { - frames.remove(frames.size() - t - 1); - } - - public int maxHistoryLength() { - return maxLength; - } - - /** Returns the t-to-last frame. For example, getLastFrame(0) returns the - * last frame added by addFrame(frame). - */ - public ScreenMatrix getLastFrame(int t) { - return frames.get(frames.size() - t - 1); - } - - public Object clone() { - try { - FrameHistory obj = (FrameHistory)super.clone(); - - obj.frames = new LinkedList(); - // Copy over the frames; we do not clone them - for (ScreenMatrix screen : this.frames) { - obj.frames.add(screen); - } - return obj; - } - catch (CloneNotSupportedException e) { - return null; - } - } -} diff --git a/doc/java-agent/code/src/ale/rl/LinearModel.java b/doc/java-agent/code/src/ale/rl/LinearModel.java deleted file mode 100644 index 7a93e12da..000000000 --- a/doc/java-agent/code/src/ale/rl/LinearModel.java +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Java Arcade Learning Environment (A.L.E) Agent - * Copyright (C) 2011-2012 Marc G. Bellemare - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -package ale.rl; - -//import Jama.Matrix; -import java.io.Serializable; - -/** - * Defines a linear regression model. Uses double[] as its feature representation. - * - * @author Marc G. Bellemare - */ -public class LinearModel implements Serializable, Cloneable { - /** Whether we should use the bias weights as well */ - protected boolean useBias = false; - /** Learning rate for modifying weights */ - protected double alpha = 0.1; - - /** How many features this model expects. */ - protected int numFeatures; - /** The last prediction made by the model */ - protected double prediction; - /** The set of weights used to predict */ - protected double[] weights; - /** The model's bias term */ - protected double bias; - - /** - * Create a new LinearModel. - * - * @param numFeatures The length of the state vector used by this model. - * @param useBias Whether to use a bias term. - */ - public LinearModel(int numFeatures, boolean useBias) { - this.useBias = useBias; - this.numFeatures = numFeatures; - - // Initialize weights to 0 - weights = new double[numFeatures]; - bias = 0; - prediction = 0; - } - - /** Sets the learning rate for this model. - * - * @param alpha - */ - public void setAlpha(double alpha) { - this.alpha = alpha; - } - - /** Returns the learning rate for this model. - * - * @return - */ - public double getAlpha() { - return alpha; - } - - public double[] getWeights() { - return weights; - } - - public boolean getUseBias() { - return useBias; - } - - public double getPrediction() { - return prediction; - } - - /** Makes a prediction for the given feature vector. The prediction is - * the dot product of the weight vector with the feature vector. - * - * @param features - * @return - */ - public double predict(double[] features) { - prediction = 0; - - // Dot product - for (int i = 0; i < features.length; i++) { - prediction += weights[i] * features[i]; - } - - // Add bias if so desired - if (useBias) - prediction += bias; - - return prediction; - } - - /** Updates the weights by a 'delta' gradient-ish quantity (e.g., TD - * error). - * - * @param lastFeatures - * @param delta - */ - public void updateWeightsDelta(double[] lastFeatures, double delta) { - // Update the bias - if (useBias) { - bias += alpha * delta; - } - - // Update other weights - for (int index = 0; index < lastFeatures.length; index++) { - double value = lastFeatures[index]; - - weights[index] += alpha * (delta * value); - } - } -} diff --git a/doc/java-agent/code/src/ale/rl/SarsaLearner.java b/doc/java-agent/code/src/ale/rl/SarsaLearner.java deleted file mode 100644 index a84b01232..000000000 --- a/doc/java-agent/code/src/ale/rl/SarsaLearner.java +++ /dev/null @@ -1,294 +0,0 @@ -/* - * Java Arcade Learning Environment (A.L.E) Agent - * Copyright (C) 2011-2012 Marc G. Bellemare - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -package ale.rl; - -import java.util.ArrayList; - -/** - * A class that acts accordingly to an epsilon-greedy policy and learns using - * SARSA(lambda). - * - * @author Marc G. Bellemare - */ -public class SarsaLearner { - /** The number of actions considered by the learner */ - protected int numActions; - - /** The usual set of SARSA/epsilon-greedy parameters */ - /** Learning rate */ - protected double alpha = 0.1; - /** Discount factor */ - protected double gamma = 0.999; - /** Eligibility trace parameter */ - protected double lambda = 0.9; - /** Probability of a random action */ - protected double epsilon = 0.05; - - /** A set of variables used to perform the SARSA update */ - protected int lastAction; - protected int action; - protected double[] lastFeatures; - protected double[] features; - - /** Eligibility traces; need separate traces for each action */ - protected double[][] traces; - - /** Q-value models, one per action */ - protected LinearModel[] valueFunction; - - /** The threshold below which we consider traces to be 0 */ - public static final double minTrace = 0.01; - - /** Creates a new SarsaLearner that expects a feature vector of size - * 'numFeatures' and takes one of 'numActions' actions. - * - * @param numFeatures - * @param numActions - */ - public SarsaLearner(int numFeatures, int numActions) { - this.numActions = numActions; - - // Create the linear models - createModels(numActions, numFeatures); - } - - /** Sets the learning rate for the value function approximators. - * - * @param alpha - */ - public void setAlpha(double alpha) { - this.alpha = alpha; - for (int a = 0; a < numActions; a++) - valueFunction[a].setAlpha(alpha); - } - - /** Begins a new episode with the given feature vector. - * - * @param features - * @return - */ - public int agent_start(double[] features) { - lastFeatures = null; - traces = null; - - this.features = (double[])features.clone(); - - return actAndLearn(features, 0.0); - } - - /** Takes one step in the RL environment. Agent_start must be called at - * least once prior to calling agent_step. - * - * @param pReward - * @param features - * @return - */ - public int agent_step(double pReward, double[] features) { - lastFeatures = this.features; - this.features = (double[])features.clone(); - - return actAndLearn(this.features, pReward); - } - - /** The very last step, when the agent receives the last reward but not the - * subsequent state (because that state is the terminal state). - * - * @param pReward - */ - public void agent_end(double pReward) { - learn(features, action, pReward, null, 0); - } - - /** Take an action and learn from the option given the current state and - * current observation. - * - * @param features - * @param pObservation - * @return - */ - public int actAndLearn(double[] features, double pReward) { - lastAction = action; - - // Get the next action - action = selectAction(features); - - // If this is not the first step... - if (lastFeatures != null) { - // Perform a SARSA update - learn(lastFeatures, lastAction, pReward, features, action); - } - - return action; - } - - /** The core of the SARSA learning algorithm. See Sutton and Barto (1998). - * - * @param lastFeatures - * @param lastAction - * @param reward - * @param features - * @param action - */ - public void learn(double[] lastFeatures, int lastAction, - double reward, double[] features, int action) { - // Compute Q(s,a) - double oldValue = valueFunction[lastAction].predict(lastFeatures); - - // Early exit for diverging agents - if (Double.isNaN(oldValue) || oldValue >= 10E7) - throw new RuntimeException("Diverged."); - - // Compute Q(s',a') - double newValue; - - // ... if s' is null (terminal state), then Q(s',a') is assumed to be 0 - if (features != null) - newValue = valueFunction[action].predict(features); - else - newValue = 0; - - // Compute the TD error - double delta = reward + gamma * newValue - oldValue; - - // Update the eligibility traces - updateTraces(lastFeatures, lastAction); - - // With traces, we update *all* models - for (int a = 0; a < numActions; a++) { - LinearModel model = valueFunction[a]; - // Perform a TD error linear approximation udpate - model.updateWeightsDelta(traces[a], delta); - } - } - - /** Updates the eligibility traces for all actions. The action that was - * actually taken has the current feature vector added to its traces; - * all others are simply decayed. - * - * @param features - * @param lastAction - */ - public void updateTraces(double[] features, int lastAction) { - if (traces == null) { - traces = new double[numActions][]; - traces[lastAction] = (double[])features.clone(); - - for (int a = 0; a < numActions; a++) - if (a != lastAction) - traces[a] = new double[features.length]; - } - else { - for (int a = 0; a < numActions; a++) { - // For the selected action, decay its trace and add the new - // state vector - if (a != lastAction) - decayTraces(traces[a], gamma*lambda); - else - replaceTraces(traces[a], gamma*lambda, features); - } - } - } - - /** Decays the given eligibility traces. - * - * @param traces - * @param factor - */ - protected void decayTraces(double[] traces, double factor) { - for (int f = 0; f < traces.length; f++) - traces[f] *= factor; - } - - /** Replacing traces. This, of course, assumes a sparse feature vector. - * - * @param traces - * @param factor - * @param state - */ - protected void replaceTraces(double[] traces, double factor, double[] state) { - for (int f = 0; f < traces.length; f++) { - // If the feature is currently 0, decay its trace - if (state[f] == 0) - traces[f] *= factor; - else - traces[f] = state[f]; - } - } - - /** Epsilon-greedy action selection. - * - * @param pState - * @return - */ - public int selectAction(double[] pState) { - double[] values = new double[numActions]; - - double bestValue = Double.NEGATIVE_INFINITY; - double worstValue = Double.POSITIVE_INFINITY; - - int bestAction = -1; - ArrayList ties = new ArrayList(); - - // E-greedy - if (Math.random() < epsilon) { - int r = (int)(Math.random() * numActions); - return r; - } - - // Greedy selection, with random tie-breaking - for (int a = 0; a < numActions; a++) { - double v = valueFunction[a].predict(pState); - - values[a] = v; - if (v > bestValue) { - bestValue = v; - bestAction = a; - ties.clear(); - ties.add(bestAction); - } - else if (v == bestValue) { - ties.add(a); - } - - if (v < worstValue) - worstValue = v; - } - - // Tie-breaker - if (ties.size() > 1) { - int r = (int)(Math.random() * ties.size()); - bestAction = ties.get(r); - } - - return bestAction; - } - - /** This method constructs the set of models used by this agent. - * - * @param numActions The number of actions available to the agent. - * @param pObservationDim The dimension of the observation vector. - */ - protected final void createModels(int numActions, int numFeatures) { - valueFunction = new LinearModel[numActions]; - - for (int a = 0; a < numActions; a++) { - valueFunction[a] = new LinearModel(numFeatures, true); - valueFunction[a].setAlpha(alpha); - } - } -} diff --git a/doc/java-agent/code/src/ale/screen/ColorPalette.java b/doc/java-agent/code/src/ale/screen/ColorPalette.java deleted file mode 100644 index e46d28c55..000000000 --- a/doc/java-agent/code/src/ale/screen/ColorPalette.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Java Arcade Learning Environment (A.L.E) Agent - * Copyright (C) 2011-2012 Marc G. Bellemare - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -package ale.screen; - -import java.awt.Color; - -/** Defines a palette of colors. Up to 256 entries. 0 is always black. - * - * @author Marc G. Bellemare - */ -public abstract class ColorPalette { - /** 256 colors in this palette */ - public static final int MAX_ENTRIES = 256; - - /** A map of screen indices to RGB colors. */ - protected Color[] map; - /** How many entries our map contains. */ - protected int numEntries; - - /** Create a new map, with entry #0 being black. - * - */ - public ColorPalette() { - map = new Color[MAX_ENTRIES]; - // 0 is always black - set(Color.BLACK, 0); - } - - /** Returns how many entries are contained in this color map. - * - * @return - */ - public int numEntries() { - return this.numEntries; - } - - /** Adds Color c at index i. - * - * @param c Color - * @param i index - */ - public Color set(Color c, int i) { - Color oldColor = map[i]; - - map[i] = c; - if (oldColor == null) numEntries++; - - return oldColor; - } - - /** Returns the color indexed by i, possibly null. - * - * @param i - * @return - */ - public Color get(int i) { - return map[i]; - } - - /** Returns whether palette index i has an associated color. - * - * @param i - * @return - */ - public boolean hasEntry(int i) { - return (map[i] != null); - } -} diff --git a/doc/java-agent/code/src/ale/screen/NTSCPalette.java b/doc/java-agent/code/src/ale/screen/NTSCPalette.java deleted file mode 100644 index 4e1eb3568..000000000 --- a/doc/java-agent/code/src/ale/screen/NTSCPalette.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Java Arcade Learning Environment (A.L.E) Agent - * Copyright (C) 2011-2012 Marc G. Bellemare - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -package ale.screen; - -import java.awt.Color; - -/** This class defines the NTSC color palette. The entries in this class were - * taken from Stella. - * - * @author Marc G. Bellemare - */ -public class NTSCPalette extends ColorPalette { - // 128 unique colors, with odd indices being mapped to the color below - - protected int[] colorData = new int[]{ - 0x000000, 0, 0x4a4a4a, 0, 0x6f6f6f, 0, 0x8e8e8e, 0, - 0xaaaaaa, 0, 0xc0c0c0, 0, 0xd6d6d6, 0, 0xececec, 0, - 0x484800, 0, 0x69690f, 0, 0x86861d, 0, 0xa2a22a, 0, - 0xbbbb35, 0, 0xd2d240, 0, 0xe8e84a, 0, 0xfcfc54, 0, - 0x7c2c00, 0, 0x904811, 0, 0xa26221, 0, 0xb47a30, 0, - 0xc3903d, 0, 0xd2a44a, 0, 0xdfb755, 0, 0xecc860, 0, - 0x901c00, 0, 0xa33915, 0, 0xb55328, 0, 0xc66c3a, 0, - 0xd5824a, 0, 0xe39759, 0, 0xf0aa67, 0, 0xfcbc74, 0, - 0x940000, 0, 0xa71a1a, 0, 0xb83232, 0, 0xc84848, 0, - 0xd65c5c, 0, 0xe46f6f, 0, 0xf08080, 0, 0xfc9090, 0, - 0x840064, 0, 0x97197a, 0, 0xa8308f, 0, 0xb846a2, 0, - 0xc659b3, 0, 0xd46cc3, 0, 0xe07cd2, 0, 0xec8ce0, 0, - 0x500084, 0, 0x68199a, 0, 0x7d30ad, 0, 0x9246c0, 0, - 0xa459d0, 0, 0xb56ce0, 0, 0xc57cee, 0, 0xd48cfc, 0, - 0x140090, 0, 0x331aa3, 0, 0x4e32b5, 0, 0x6848c6, 0, - 0x7f5cd5, 0, 0x956fe3, 0, 0xa980f0, 0, 0xbc90fc, 0, - 0x000094, 0, 0x181aa7, 0, 0x2d32b8, 0, 0x4248c8, 0, - 0x545cd6, 0, 0x656fe4, 0, 0x7580f0, 0, 0x8490fc, 0, - 0x001c88, 0, 0x183b9d, 0, 0x2d57b0, 0, 0x4272c2, 0, - 0x548ad2, 0, 0x65a0e1, 0, 0x75b5ef, 0, 0x84c8fc, 0, - 0x003064, 0, 0x185080, 0, 0x2d6d98, 0, 0x4288b0, 0, - 0x54a0c5, 0, 0x65b7d9, 0, 0x75cceb, 0, 0x84e0fc, 0, - 0x004030, 0, 0x18624e, 0, 0x2d8169, 0, 0x429e82, 0, - 0x54b899, 0, 0x65d1ae, 0, 0x75e7c2, 0, 0x84fcd4, 0, - 0x004400, 0, 0x1a661a, 0, 0x328432, 0, 0x48a048, 0, - 0x5cba5c, 0, 0x6fd26f, 0, 0x80e880, 0, 0x90fc90, 0, - 0x143c00, 0, 0x355f18, 0, 0x527e2d, 0, 0x6e9c42, 0, - 0x87b754, 0, 0x9ed065, 0, 0xb4e775, 0, 0xc8fc84, 0, - 0x303800, 0, 0x505916, 0, 0x6d762b, 0, 0x88923e, 0, - 0xa0ab4f, 0, 0xb7c25f, 0, 0xccd86e, 0, 0xe0ec7c, 0, - 0x482c00, 0, 0x694d14, 0, 0x866a26, 0, 0xa28638, 0, - 0xbb9f47, 0, 0xd2b656, 0, 0xe8cc63, 0, 0xfce070, 0 - }; - - /** Create a new NTSC color palette. - * - */ - public NTSCPalette() { - super(); - - // Set the palette as given above - for (int index = 0; index < colorData.length; index++) { - int v = colorData[index & ~0x1]; - int r = (v & 0xFF0000) >> 16; - int g = (v & 0x00FF00) >> 8; - int b = v & 0x0000FF; - - super.set(new Color(r, g, b), index); - } - } -} diff --git a/doc/java-agent/code/src/ale/screen/SECAMPalette.java b/doc/java-agent/code/src/ale/screen/SECAMPalette.java deleted file mode 100644 index ee03bb67f..000000000 --- a/doc/java-agent/code/src/ale/screen/SECAMPalette.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Java Arcade Learning Environment (A.L.E) Agent - * Copyright (C) 2011-2012 Marc G. Bellemare - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -package ale.screen; - -import java.awt.Color; - -/** This class defines the SECAM color palette. The entries in this class were - * taken from Stella. - * - * @author Marc G. Bellemare - */ -public class SECAMPalette extends ColorPalette { - // 8 unique colors, with odd indices being mapped to the color below - - protected int[] colorData = new int[]{ - 0x000000, 0, 0x2121ff, 0, 0xf03c79, 0, 0xff50ff, 0, - 0x7fff00, 0, 0x7fffff, 0, 0xffff3f, 0, 0xffffff, 0, - 0x000000, 0, 0x2121ff, 0, 0xf03c79, 0, 0xff50ff, 0, - 0x7fff00, 0, 0x7fffff, 0, 0xffff3f, 0, 0xffffff, 0, - 0x000000, 0, 0x2121ff, 0, 0xf03c79, 0, 0xff50ff, 0, - 0x7fff00, 0, 0x7fffff, 0, 0xffff3f, 0, 0xffffff, 0, - 0x000000, 0, 0x2121ff, 0, 0xf03c79, 0, 0xff50ff, 0, - 0x7fff00, 0, 0x7fffff, 0, 0xffff3f, 0, 0xffffff, 0, - 0x000000, 0, 0x2121ff, 0, 0xf03c79, 0, 0xff50ff, 0, - 0x7fff00, 0, 0x7fffff, 0, 0xffff3f, 0, 0xffffff, 0, - 0x000000, 0, 0x2121ff, 0, 0xf03c79, 0, 0xff50ff, 0, - 0x7fff00, 0, 0x7fffff, 0, 0xffff3f, 0, 0xffffff, 0, - 0x000000, 0, 0x2121ff, 0, 0xf03c79, 0, 0xff50ff, 0, - 0x7fff00, 0, 0x7fffff, 0, 0xffff3f, 0, 0xffffff, 0, - 0x000000, 0, 0x2121ff, 0, 0xf03c79, 0, 0xff50ff, 0, - 0x7fff00, 0, 0x7fffff, 0, 0xffff3f, 0, 0xffffff, 0, - 0x000000, 0, 0x2121ff, 0, 0xf03c79, 0, 0xff50ff, 0, - 0x7fff00, 0, 0x7fffff, 0, 0xffff3f, 0, 0xffffff, 0, - 0x000000, 0, 0x2121ff, 0, 0xf03c79, 0, 0xff50ff, 0, - 0x7fff00, 0, 0x7fffff, 0, 0xffff3f, 0, 0xffffff, 0, - 0x000000, 0, 0x2121ff, 0, 0xf03c79, 0, 0xff50ff, 0, - 0x7fff00, 0, 0x7fffff, 0, 0xffff3f, 0, 0xffffff, 0, - 0x000000, 0, 0x2121ff, 0, 0xf03c79, 0, 0xff50ff, 0, - 0x7fff00, 0, 0x7fffff, 0, 0xffff3f, 0, 0xffffff, 0, - 0x000000, 0, 0x2121ff, 0, 0xf03c79, 0, 0xff50ff, 0, - 0x7fff00, 0, 0x7fffff, 0, 0xffff3f, 0, 0xffffff, 0, - 0x000000, 0, 0x2121ff, 0, 0xf03c79, 0, 0xff50ff, 0, - 0x7fff00, 0, 0x7fffff, 0, 0xffff3f, 0, 0xffffff, 0, - 0x000000, 0, 0x2121ff, 0, 0xf03c79, 0, 0xff50ff, 0, - 0x7fff00, 0, 0x7fffff, 0, 0xffff3f, 0, 0xffffff, 0, - 0x000000, 0, 0x2121ff, 0, 0xf03c79, 0, 0xff50ff, 0, - 0x7fff00, 0, 0x7fffff, 0, 0xffff3f, 0, 0xffffff, 0 - }; - - /** Creates a new SECAM palette. - * - */ - public SECAMPalette() { - super(); - - // Set the palette as given above - for (int index = 0; index < colorData.length; index++) { - int v = colorData[index & ~0x1]; - int r = (v & 0xFF0000) >> 16; - int g = (v & 0x00FF00) >> 8; - int b = v & 0x0000FF; - - super.set(new Color(r, g, b), index); - } - } -} diff --git a/doc/java-agent/code/src/ale/screen/ScreenConverter.java b/doc/java-agent/code/src/ale/screen/ScreenConverter.java deleted file mode 100644 index 37504a748..000000000 --- a/doc/java-agent/code/src/ale/screen/ScreenConverter.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Java Arcade Learning Environment (A.L.E) Agent - * Copyright (C) 2011-2012 Marc G. Bellemare - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -package ale.screen; - -import java.awt.Color; -import java.awt.image.BufferedImage; - -/** Converts a ScreenMatrix to a BufferedImage, using a ColorMap. - * - * @author Marc G. Bellemare - */ -public class ScreenConverter { - /** The map from screen indices to RGB colors */ - protected ColorPalette colorMap; - - /** Create a new ScreenConverter with the desired color palette - * - * @param cMap - */ - public ScreenConverter(ColorPalette cMap) { - colorMap = cMap; - } - - /** Transforms a ScreenMatrix into a BufferedImage. - * - * @param m - * @return - */ - public BufferedImage convert(ScreenMatrix m) { - // Create a new image, of the same width and height as the screen matrix - BufferedImage img = new BufferedImage(m.width, m.height, BufferedImage.TYPE_INT_RGB); - - // Map each pixel - for (int x = 0; x < m.width; x++) - for (int y = 0; y < m.height; y++) { - int index = m.matrix[x][y]; - Color c = colorMap.get(index); - img.setRGB(x, y, c.getRGB()); - } - - return img; - } -} diff --git a/doc/java-agent/code/src/ale/screen/ScreenMatrix.java b/doc/java-agent/code/src/ale/screen/ScreenMatrix.java deleted file mode 100644 index cf58c01a9..000000000 --- a/doc/java-agent/code/src/ale/screen/ScreenMatrix.java +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Java Arcade Learning Environment (A.L.E) Agent - * Copyright (C) 2011-2012 Marc G. Bellemare - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -package ale.screen; - -import java.io.BufferedReader; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.PrintStream; - -/** Encapsulates screen matrix data. Also provides basic save/load operations on - * screen data. - * - * @author Marc G. Bellemare - */ -public class ScreenMatrix implements Cloneable { - public int[][] matrix; - public int width; - public int height; - - /** Create a new, blank screen matrix with the given dimensions. - * - * @param w width - * @param h height - */ - public ScreenMatrix(int w, int h) { - matrix = new int[w][h]; - width = w; - height = h; - } - - /** Load a screen from a text file, in ALE format. The first line contains - * , . - * Each subsequent line (210 of them) contains a screen row with comma-separated - * values. - * - * @param filename - */ - public ScreenMatrix(String filename) throws IOException { - // Create a BufferedReader to read in the data - BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(filename))); - - // Obtain the width and height - String line = in.readLine(); - String[] tokens = line.split(","); - - width = Integer.parseInt(tokens[0]); - height = Integer.parseInt(tokens[1]); - - this.matrix = new int[width][height]; - - int rowIndex = 0; - - // Read in the screen row-by-row, each separated by a newline - while ((line = in.readLine()) != null) { - // A row is a comma-separated list of integer values - tokens = line.split(","); - assert (tokens.length == width); - - for (int x = 0; x < tokens.length; x++) { - this.matrix[x][rowIndex] = Integer.parseInt(tokens[x]); - } - - rowIndex++; - } - } - - /** Saves this screen matrix as a text file. Can then be loaded using the - * relevant constructor. - * - * @param filename - * @throws IOException - */ - public void saveData(String filename) throws IOException { - PrintStream out = new PrintStream(new FileOutputStream(filename)); - - // Width,height\n - out.println(width+","+height); - - // Print the matrix, one row per line - for (int y = 0; y < height; y++) { - // Data is comma separated - for (int x = 0; x < width; x++) { - out.print(matrix[x][y]); - if (x < width - 1) out.print(","); - } - - out.println(); - } - } - - /** Clones this screen matrix. Data is copied. - * - * @return - */ - @Override - public Object clone() { - try { - ScreenMatrix img = (ScreenMatrix)super.clone(); - - // Create a new matrix which we will fill with the proper data - img.matrix = new int[this.width][this.height]; - - for (int x = 0; x < this.width; x++) { - System.arraycopy(this.matrix[x], 0, img.matrix[x], 0, this.height); - } - return img; - } - catch (CloneNotSupportedException e) { - return null; - } - } -} diff --git a/doc/java-agent/manual/Makefile b/doc/java-agent/manual/Makefile deleted file mode 100644 index ec1c41d27..000000000 --- a/doc/java-agent/manual/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -FILE=java-agent - -all: $(FILE).pdf - -%.pdf: $(FILE).tex - pdflatex $(FILE).tex - pdflatex $(FILE).tex - -clean: - rm -f *.{aux,log} diff --git a/doc/java-agent/manual/java-agent.pdf b/doc/java-agent/manual/java-agent.pdf deleted file mode 100644 index 42a07dd6c..000000000 Binary files a/doc/java-agent/manual/java-agent.pdf and /dev/null differ diff --git a/doc/java-agent/manual/java-agent.tex b/doc/java-agent/manual/java-agent.tex deleted file mode 100644 index cd2a8a634..000000000 --- a/doc/java-agent/manual/java-agent.tex +++ /dev/null @@ -1,185 +0,0 @@ -% A LaTeX file that describes how to install, run and modify the sample java -% agent for ALE 0.5. -% -% Created by Marc G. Bellemare - -\documentclass[12pt]{article} - -\usepackage{fullpage} - -\title{ALE Java Agent Tutorial} -\author{Marc G. Bellemare\\ mgbellemare@ualberta.ca} - -\begin{document} - -\maketitle - -\section{Requirements} - -To run this agent, you will need: - -\begin{itemize} - \item{Java 1.6} - \item{ALE} - \item{At least one ROM file} - \item{Apache Ant} - \item{Perl (optional)} -\end{itemize} - -Although not strictly necessary, we recommend the use of Apache Ant to build -the Java agent. - -This tutorial assumes that you have installed ALE 0.5 in a directory named \verb+ale_0_5+. See -the main manual (\verb+/path/to/ale_0_5/doc/manual/manual.pdf+) for ALE installation details. - -\section{Installation/Compilation} - -Installing the Java agent is simple. Assuming you have extracted the Java agent -package to the directory \verb+ale_java_agent+, you may simply use Ant to build the -jar file containing the agent code: - -\begin{verbatim} -> cd ale_java_agent -ale_java_agent> ant jar -\end{verbatim} - -Voil\`a! Your Java agent is compiled and good to go. - -\subsection{Perl script setup}\label{subsec:perl_script} - -For your convenience, we offer a perl script that automates running ALE and -the Java agent. To use this perl script, you should copy or link the ALE -executable, configuration file and roms directory accordingly. Assuming that -your roms are located at \verb+/path/to/atari_roms+ and that you have -installed ALE at \verb+/path/to/ale_0_5+, you may then (on Mac OS X and *NIX system): - -\begin{verbatim} -> cd /path/to/ale_java_agent -/path/to/ale_java_agent> ln -s /path/to/ale_0_5/ale . -/path/to/ale_java_agent> ln -s /path/to/ale_0_5/stellarc . -/path/to/ale_0_5> ln -s /path/to/atari_roms roms -\end{verbatim} - -In particular, if running the Java agent from its package path, i.e. -\begin{center}\verb+/path/to/ale/doc/java-agent+\end{center} you may simply do -\begin{verbatim} -doc/java-agent/code> ln -s ../../../ale . -doc/java-agent/code> ln -s ../../../stellarc . -\end{verbatim} - -Your \verb+ale_java_agent+ directory is now set up. - -\section{Running} - -If you did not perform the optional perl script setup (Section -\ref{subsec:perl_script}), or do not have perl installed, skip to Section -\ref{subsec:named_pipes} for information on running ALE via named pipes. The named pipes -interface is somewhat deprecated and we encourage you to use the perl script if possible. The -C++ fifo interface example -\begin{center}\verb+doc/examples/fifoInterfaceExample.cpp+\end{center} -can also easily be adapted to run your Java agent together with ALE. - -Assuming that everything is installed correctly, you should now be able to -run the HumanAgent using the provided perl script: - -\begin{verbatim} -ale_java_agent> perl run_agent.perl space_invaders -\end{verbatim} - -\subsection{Named Pipes}\label{subsec:named_pipes} - -To communicate with ALE via named pipes, you need to start ALE and the Java -agent in separate consoles. First, we create two named pipes: - -\begin{verbatim} -/path/to/ale_0_5> mkfifo ale_fifo_in -/path/to/ale_0_5> mkfifo ale_fifo_out -\end{verbatim} - -Then we run ALE and the Java agent in separate processes. We specify the path -of both named pipes using the \verb+-named_pipes+ option: - -\begin{verbatim} -Terminal 1 -/path/to/ale_0_5> ./ale -game_controller fifo_named - /path/to/atari_roms/beam_rider.bin - -Terminal 2 -ale_java_agent> java -cp dist/ALEJavaAgent.jar ale.agents.HumanAgent - -named_pipes /path/to/ale_0_5/ale_fifo_ -\end{verbatim} - -\section{Recording screen data} - -We provide facilities for recording received screen data to PNG files. We do -so by passing the \verb+-export_frames+ command-line argument to the Java -HumanAgent class. The perl script is already set up to pass all arguments -beyond the first to the Java agent: - -\begin{verbatim} -ale_java_agent> perl run_agent.perl beam_rider -export_frames -\end{verbatim} - -Frames will be saved in the \verb+ale_java_agent/frames+ directory, starting -with \verb+frame_000000.png+ and subsequently incrementing the index. The -equivalent command with named pipes is: - -\begin{verbatim} -ale_java_agent> java -cp dist/ALEJavaAgent.jar ale.agents.HumanAgent - -named_pipes /path/to/ale_0_5/ale_fifo_ -export_frames -\end{verbatim} - -For further information, see the class \verb+ale.movie.MovieGenerator+. In -\verb+ale.agents.HumanAgent+, the variable \verb+exportFramesBasename+ defines -the pathname to which frames are saved. - -\section{Code listing} - -To complete this tutorial, we give a list of the classes provided in the -Java agent package, along with a short description. - -\begin{enumerate} - \item{\verb+ale.agents+} - \begin{itemize} - \item{\verb+AbstractAgent+ Abstract class; interfaces with ALE and the GUI.} - \item{\verb+HumanAgent+ extends \verb+AbstractAgent+. Defines an agent controlled by the user via the keyboard.} - \item{\verb+RLAgent+ extends \verb+AbstractAgent+. A simple learning agent that uses SARSA and $\epsilon$-greedy.} - \end{itemize} - \item{\verb+ale.gui+} - \begin{itemize} - \item{\verb+AbstractUI+ Abstract class; defines a user interface.} - \item{\verb+AgentGUI+ extends \verb+AbstractUI+. Defines a graphical user interface.} - \item{\verb+NullUI+ extends \verb+AbstractUI+. The \verb+/dev/null+ of uesr interfaces.} - \item{\verb+KeyboardControl+. Receives keystrokes and converts them to ALE actions.} - \item{\verb+ScreenDisplay+. Responsible for displaying images on the screen.} - \item{\verb+MessageHistory+. A helper class for display messages on-screen.} - \end{itemize} - \item{\verb+ale.io+} - \begin{itemize} - \item{\verb+ALEPipes+. Communicates with ALE via stdin/out or named pipes.} - \item{\verb+Actions+. Helper class mapping action names to integers.} - \item{\verb+ConsoleRAM+. Encapsulates RAM data.} - \item{\verb+RLData+. Encapsulates RL data.} - \end{itemize} - \item{\verb+ale.movie+} - \begin{itemize} - \item{\verb+MovieGenerator+. Helper class to save screen data to PNG files.} - \end{itemize} - \item{\verb+ale.rl+} - \begin{itemize} - \item{\verb+FeatureMap+. Maps screen data to feature vectors.} - \item{\verb+FrameHistory+. Stores a list of recent frames.} - \item{\verb+LinearModel+. A linear regression predictor. Used for approximating value functions.} - \item{\verb+SarsaLearner+. The core SARSA algorithm.} - \end{itemize} - \item{\verb+ale.screen+} - \begin{itemize} - \item{\verb+ColorPalette+. Abstract class; defines basic colour palette functionality.} - \item{\verb+NTSCPalette+. Defines the NTSC colour palette (128 colors).} - \item{\verb+SECAMPalette+. Defines the SECAM colour palette (8 colors).} - \item{\verb+ScreenConverter+. Converts ScreenMatrix objects to Java image objects.} - \item{\verb+ScreenMatrix+. Encapsulates screen data.} - \end{itemize} -\end{enumerate} - -\end{document} diff --git a/doc/manual/Makefile b/doc/manual/Makefile deleted file mode 100644 index f4ffb3ae0..000000000 --- a/doc/manual/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -FILE=manual - -all: $(FILE).pdf - -%.pdf: $(FILE).tex - pdflatex $(FILE).tex - pdflatex $(FILE).tex - pdflatex $(FILE).tex - -clean: - rm -f *.{aux,log,toc,out} diff --git a/doc/scripts/test_ale.sh b/doc/scripts/test_ale.sh deleted file mode 100755 index 214457c05..000000000 --- a/doc/scripts/test_ale.sh +++ /dev/null @@ -1,177 +0,0 @@ -#!/usr/bin/env bash -set +e - -if [ $# -lt 1 ]; then - echo "usage: $0 ROM" - exit -fi -ROM=$1 -LOG="log.txt" -PASSED_TESTS=0 -FAILED_TESTS=0 -SKIPPED_TESTS=0 - -function LOG { - echo "$1" - echo "$1" >> $LOG -} - -# TEST (string command) -function TEST { - LOG "==> TEST: \"$1\"" - $1 >>$LOG 2>&1 - RETVAL=$? - if [ $RETVAL -ne 0 ]; then - LOG "==> FAILURE!!!" - FAILED_TESTS=$((FAILED_TESTS+1)) - else - PASSED_TESTS=$((PASSED_TESTS+1)) - fi -} - -# TEST_CMAKE_BUILD (Bool USE_SDL, Bool USE_RLGLUE) -function TEST_CMAKE_BUILD { - echo "=========== [ CMAKE BUILD TESTS ] ===========" - LOG "USE_SDL = $1; USE_RLGLUE = $2;" - TEST "cmake -DUSE_SDL=$1 -DUSE_RLGLUE=$2" - TEST "make clean" - TEST "make -j16" -} - -# TEST_MAKEFILE_BUILD (Bool USE_SDL, Bool USE_RLGLUE) -function TEST_MAKEFILE_BUILD { - LOG "=========== [ MAKEFILE BUILD TESTS ] ===========" - LOG "USE_SDL = $1; USE_RLGLUE = $2;" - ORIG="USE_SDL\ *:= 0" - REPL="USE_SDL := $1" - sed -e "s/$ORIG/$REPL/" makefile.$suffix > makefile.tmp - ORIG="USE_RLGLUE\ *:= 0" - REPL="USE_RLGLUE := $2" - - # Needed because Mac OS X's sed requires a backup extension - if [ "$suffix" = "mac" ]; then - sed -i '' "s/$ORIG/$REPL/" makefile.tmp - else - sed -i "s/$ORIG/$REPL/" makefile.tmp - fi - - TEST "make -f makefile.tmp clean" - TEST "make -f makefile.tmp -j16" - rm makefile.tmp -} - -# TEST_SHARED_LIBRARY_EXAMPLE (Bool USE_SDL) -function TEST_SHARED_LIBRARY_EXAMPLE { - cd doc/examples - LOG "=========== [ SHARED LIBRARY TESTS ] ===========" - ORIG="USE_SDL\ *:= 0" - REPL="USE_SDL := $1" - sed -e "s/$ORIG/$REPL/" Makefile.sharedlibrary > makefile.tmp - TEST "make -f makefile.tmp clean" - TEST "make -f makefile.tmp" - rm makefile.tmp - if [ -f "sharedLibraryInterfaceExample" ]; then - export DYLD_LIBRARY_PATH="../..:." - TEST "./sharedLibraryInterfaceExample $ROM" - else - LOG "Skipping TEST: ./sharedLibraryInterfaceExample" - SKIPPED_TESTS=$((SKIPPED_TESTS+1)) - fi - cd ../.. - cat doc/examples/$LOG >> $LOG - rm doc/examples/$LOG -} - -# TEST_RLGLUE_EXAMPLE -function TEST_RLGLUE_EXAMPLE { - LOG "=========== [ RL-GLUE TESTS ] ===========" - cd doc/examples - TEST "make -f Makefile.rlglue clean" - TEST "make -f Makefile.rlglue" - if [[ -f "RLGlueExperiment" && -f "RLGlueAgent" && -f "../../ale" ]]; then - rl_glue &> /dev/null & - ../../ale -game_controller rlglue $ROM &> /dev/null & - ./RLGlueAgent &> /dev/null & - TEST "./RLGlueExperiment" - else - LOG "Skipping TEST: ./RLGlueExperiment" - SKIPPED_TESTS=$((SKIPPED_TESTS+1)) - fi - cd ../.. - cat doc/examples/$LOG >> $LOG - rm doc/examples/$LOG -} - -# TEST_PYTHON_EXAMPLE (Bool USE_SDL) -function TEST_PYTHON_EXAMPLE { - LOG "=========== [ PYTHON EXAMPLE TESTS ] ===========" - INSTALLED=`pip list | grep ale-python-interface` - if [ -z "$INSTALLED" ]; then - LOG "==> ale-python-interface not installed. Skipping this test..." - SKIPPED_TESTS=$((SKIPPED_TESTS+1)) - else - cd doc/examples - if [ $1 -eq 1 ]; then - mv python_example.py tmp.py - ORIG="USE_SDL = False" - REPL="USE_SDL = True" - sed -e "s/$ORIG/$REPL/" tmp.py > python_example.py - TEST "python python_example.py $ROM" - mv tmp.py python_example.py - else - TEST "python python_example.py $ROM" - fi - cd ../.. - cat doc/examples/$LOG >> $LOG - rm doc/examples/$LOG - fi -} - -unamestr=`uname -s` -echo `uname -a` >> $LOG -if [[ "$unamestr" == 'Linux' ]]; then - suffix="unix" -elif [[ "$unamestr" == 'Darwin' ]]; then - suffix="mac" -else - echo "Unknown platform: $unamestr" - exit -fi - -# Makefile Test without SDL or RL_Glue -USE_SDL=0 -USE_RLGLUE=0 -TEST_MAKEFILE_BUILD $USE_SDL $USE_RLGLUE -TEST_SHARED_LIBRARY_EXAMPLE $USE_SDL - -# Makefile Test with SDL and RL_Glue -USE_SDL=1 -USE_RLGLUE=1 -TEST_MAKEFILE_BUILD $USE_SDL $USE_RLGLUE -TEST_SHARED_LIBRARY_EXAMPLE $USE_SDL -TEST_RLGLUE_EXAMPLE - -# CMake with no SDL/RL_Glue -USE_SDL=0 -USE_RLGLUE=0 -TEST_CMAKE_BUILD $USE_SDL $USE_RLGLUE -TEST_SHARED_LIBRARY_EXAMPLE $USE_SDL - -# CMake with SDL/RL_Glue -USE_SDL=1 -USE_RLGLUE=1 -TEST_CMAKE_BUILD $USE_SDL $USE_RLGLUE -TEST_SHARED_LIBRARY_EXAMPLE $USE_SDL -TEST_RLGLUE_EXAMPLE - -# Test the python examples -USE_SDL=0 -TEST_PYTHON_EXAMPLE $USE_SDL -USE_SDL=1 -TEST_PYTHON_EXAMPLE $USE_SDL - -LOG "==> All Tests Finished" -LOG "==> Tests Passed: $PASSED_TESTS" -LOG "==> Tests Failed: $FAILED_TESTS" -LOG "==> Tests Skipped: $SKIPPED_TESTS" -LOG "==> See $LOG for output" diff --git a/doc/manual/figures/ale.gif b/docs/manual/figures/ale.gif similarity index 100% rename from doc/manual/figures/ale.gif rename to docs/manual/figures/ale.gif diff --git a/doc/manual/manual.pdf b/docs/manual/manual.pdf similarity index 100% rename from doc/manual/manual.pdf rename to docs/manual/manual.pdf diff --git a/doc/manual/manual.tex b/docs/manual/manual.tex similarity index 100% rename from doc/manual/manual.tex rename to docs/manual/manual.tex diff --git a/examples/cpp-interface/CMakeLists.txt b/examples/cpp-interface/CMakeLists.txt new file mode 100644 index 000000000..f83a1fddb --- /dev/null +++ b/examples/cpp-interface/CMakeLists.txt @@ -0,0 +1,11 @@ +cmake_minimum_required(VERSION 3.14) + +project(example-cpp-lib) + +find_package(ale REQUIRED) + +add_executable(sharedLibraryInterfaceExample sharedLibraryInterfaceExample.cpp) +target_link_libraries(sharedLibraryInterfaceExample ale::ale-lib) + +add_executable(sharedLibraryInterfaceWithModesExample sharedLibraryInterfaceWithModesExample.cpp) +target_link_libraries(sharedLibraryInterfaceWithModesExample ale::ale-lib) diff --git a/doc/examples/sharedLibraryInterfaceExample.cpp b/examples/cpp-interface/sharedLibraryInterfaceExample.cpp similarity index 100% rename from doc/examples/sharedLibraryInterfaceExample.cpp rename to examples/cpp-interface/sharedLibraryInterfaceExample.cpp diff --git a/doc/examples/sharedLibraryInterfaceWithModesExample.cpp b/examples/cpp-interface/sharedLibraryInterfaceWithModesExample.cpp similarity index 100% rename from doc/examples/sharedLibraryInterfaceWithModesExample.cpp rename to examples/cpp-interface/sharedLibraryInterfaceWithModesExample.cpp diff --git a/examples/python-interface/python_example.py b/examples/python-interface/python_example.py new file mode 100755 index 000000000..385f14e44 --- /dev/null +++ b/examples/python-interface/python_example.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python +# python_example.py +# Author: Ben Goodrich +# +# This is a direct port to python of the shared library example from +# ALE provided in examples/sharedLibraryInterfaceExample.cpp +import sys +from random import randrange +from ale_py import ALEInterface + +if len(sys.argv) < 2: + print(f"Usage: {sys.argv[0]} rom_file") + sys.exit() + +ale = ALEInterface() + +# Get & Set the desired settings +ale.setInt("random_seed", 123) + +# Set USE_SDL to true to display the screen. ALE must be compilied +# with SDL enabled for this to work. On OSX, pygame init is used to +# proxy-call SDL_main. +USE_SDL = False +if USE_SDL: + ale.setBool("sound", True) + ale.setBool("display_screen", True) + +# Load the ROM file +rom_file = sys.argv[1] +ale.loadROM(rom_file) + +# Get the list of legal actions +legal_actions = ale.getLegalActionSet() + +# Play 10 episodes +for episode in range(10): + total_reward = 0 + while not ale.game_over(): + a = legal_actions[randrange(len(legal_actions))] + # Apply an action and get the resulting reward + reward = ale.act(a) + total_reward += reward + print("Episode %d ended with score: %d" % (episode, total_reward)) + ale.reset_game() diff --git a/examples/python-interface/python_example_with_modes.py b/examples/python-interface/python_example_with_modes.py new file mode 100755 index 000000000..62c16fbe9 --- /dev/null +++ b/examples/python-interface/python_example_with_modes.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python +# python_example_with_modes.py +# Author: Ben Goodrich & Marlos C. Machado +# +# This is a direct port to python of the shared library example from +# ALE provided in doc/examples/sharedLibraryInterfaceWithModesExample.cpp +import sys +from random import randrange +from ale_py import ALEInterface + +if len(sys.argv) < 2: + print(f"Usage: {sys.argv[0]} rom_file") + sys.exit() + +ale = ALEInterface() + +# Get & Set the desired settings +ale.setInt("random_seed", 123) +# The default is already 0.25, this is just an example +ale.setFloat("repeat_action_probability", 0.25) + +# Set USE_SDL to true to display the screen. ALE must be compilied +# with SDL enabled for this to work. On OSX, pygame init is used to +# proxy-call SDL_main. +USE_SDL = False +if USE_SDL: + ale.setBool("sound", True) + ale.setBool("display_screen", True) + +# Load the ROM file +ale.loadROM(sys.argv[1]) + +# Get the list of available modes and difficulties +avail_modes = ale.getAvailableModes() +avail_diff = ale.getAvailableDifficulties() + +print(f"Number of available modes: {len(avail_modes)}") +print(f"Number of available difficulties: {len(avail_diff)}") + +# Get the list of legal actions +legal_actions = ale.getLegalActionSet() + +# Play one episode in each mode and in each difficulty +for mode in avail_modes: + for diff in avail_diff: + + ale.setDifficulty(diff) + ale.setMode(mode) + ale.reset_game() + print(f"Mode {mode} difficulty {diff}:") + + total_reward = 0 + while not ale.game_over(): + a = legal_actions[randrange(len(legal_actions))] + # Apply an action and get the resulting reward + reward = ale.act(a) + total_reward += reward + + print(f"Episode ended with score: {total_reward}") diff --git a/examples/video-recording/CMakeLists.txt b/examples/video-recording/CMakeLists.txt new file mode 100644 index 000000000..c03020564 --- /dev/null +++ b/examples/video-recording/CMakeLists.txt @@ -0,0 +1,8 @@ +cmake_minimum_required(VERSION 3.14) + +project(video-recording) + +find_package(ale REQUIRED) + +add_executable(videoRecordingExample videoRecordingExample.cpp) +target_link_libraries(videoRecordingExample ale::ale-lib) diff --git a/doc/scripts/videoRecordingExampleJoinMacOSX.sh b/examples/video-recording/macos-join-video.sh similarity index 84% rename from doc/scripts/videoRecordingExampleJoinMacOSX.sh rename to examples/video-recording/macos-join-video.sh index e4ac6a936..c33e02689 100755 --- a/doc/scripts/videoRecordingExampleJoinMacOSX.sh +++ b/examples/video-recording/macos-join-video.sh @@ -5,8 +5,8 @@ # -i record/%06d.png indicates we should use sequentially numbered frames in directory 'record' # -i sound.wav indicates the location of the sound file # -f mov specifies a MOV format -# -c:a mp3 specifies the sound codec +# -c:a mp3 specifies the sound codec # -c:v libx264 specifies the video codec # -pix_fmt yuv420p is needed on Mac OS X for playback with QuickTime Player -# -ffmpeg -r 60 -i record/%06d.png -i record/sound.wav -f mov -c:a mp3 -c:v libx264 -pix_fmt yuv420p agent.mov +# +ffmpeg -r 60 -i record/%06d.png -i record/sound.wav -f mov -c:a mp3 -c:v libx264 -pix_fmt yuv420p agent.mov diff --git a/doc/scripts/videoRecordingExampleJoinUnix.sh b/examples/video-recording/nix-join-video.sh similarity index 81% rename from doc/scripts/videoRecordingExampleJoinUnix.sh rename to examples/video-recording/nix-join-video.sh index 5b70dde64..a9c52e1af 100755 --- a/doc/scripts/videoRecordingExampleJoinUnix.sh +++ b/examples/video-recording/nix-join-video.sh @@ -1,19 +1,17 @@ #!/bin/bash -# A script to generate an ALE video with FFMpeg, *nix systems. +# A script to generate an ALE video with FFMpeg, *nix systems. # -r ## specifies the frame rate # -i record/%06d.png indicates we should use sequentially numbered frames in directory 'record' # -i sound.wav indicates the location of the sound file # -f mov specifies a MOV format -# -c:a mp3 specifies the sound codec +# -c:a mp3 specifies the sound codec # -c:v libx264 specifies the video codec -# +# - -# Attempt to use ffmpeg. If this fails, use avconv (fix for Ubuntu 14.04). +# Attempt to use ffmpeg. If this fails, use avconv (fix for Ubuntu 14.04). { ffmpeg -r 60 -i record/%06d.png -i record/sound.wav -f mov -c:a mp3 -c:v libx264 agent.mov } || { avconv -r 60 -i record/%06d.png -i record/sound.wav -f mov -c:a mp3 -c:v libx264 agent.mov } - diff --git a/doc/examples/videoRecordingExample.cpp b/examples/video-recording/videoRecordingExample.cpp similarity index 96% rename from doc/examples/videoRecordingExample.cpp rename to examples/video-recording/videoRecordingExample.cpp index 79c92ea42..3b8a24b35 100644 --- a/doc/examples/videoRecordingExample.cpp +++ b/examples/video-recording/videoRecordingExample.cpp @@ -73,7 +73,7 @@ int main(int argc, char** argv) { std::cout << std::endl; std::cout << "Recording complete. To create a video, you may want to run \n" - " doc/scripts/videoRecordingExampleJoinXXX.sh. See manual for " + " examples/video-recording/*-join-video.sh. See docs for " "details.." << std::endl;