From 66c32a4d7fde3dac2c25a98cc23bfa7546f78c1e Mon Sep 17 00:00:00 2001 From: Edouard360 Date: Fri, 6 Oct 2017 11:10:09 +0200 Subject: [PATCH] Correction of the pylint tests Why this change was necessary: * Norms are important * Camel case instead of dash * for loops instead of enumerate * change some file names * import grouped and ordered correctly * warning about spacing and hyphens --- .pylintrc | 2 +- .travis.yml | 4 +- networking/hlt_networking.py | 28 ++-- networking/pipe_socket_translator.py | 57 ++++---- networking/start_game.py | 53 +++++--- public/MyBot.py | 24 ++-- public/OpponentBot.py | 24 ++-- public/context.py | 5 - public/hlt.py | 17 ++- public/models/agent/Agent.py | 52 ++++++++ .../{vanillaAgent.py => VanillaAgent.py} | 22 +-- public/models/agent/__init__.py | 1 - public/models/agent/agent.py | 42 ------ public/models/bot/Bot.py | 7 + .../bot/{improvedBot.py => ImprovedBot.py} | 10 +- public/models/bot/RandomBot.py | 12 ++ public/models/bot/TrainedBot.py | 47 +++++++ public/models/bot/__init__.py | 1 - public/models/bot/bot.py | 6 - public/models/bot/randomBot.py | 13 -- public/models/bot/trainedBot.py | 42 ------ pylint_checks.txt | 2 - requirements.txt | 2 +- tests/reward_test.py | 27 +++- tests/util.py | 7 +- train/experience.py | 6 +- train/main.py | 34 ++--- train/reward.py | 125 ++++++++++-------- train/worker.py | 46 +++++-- visualize/static/visualizer.js | 2 +- visualize/visualize.py | 43 +++--- 31 files changed, 429 insertions(+), 334 deletions(-) delete mode 100644 public/context.py create mode 100644 public/models/agent/Agent.py rename public/models/agent/{vanillaAgent.py => VanillaAgent.py} (80%) delete mode 100644 public/models/agent/agent.py create mode 100644 public/models/bot/Bot.py rename public/models/bot/{improvedBot.py => ImprovedBot.py} (64%) create mode 100644 public/models/bot/RandomBot.py create mode 100644 public/models/bot/TrainedBot.py delete mode 100644 public/models/bot/__init__.py delete mode 100644 public/models/bot/bot.py delete mode 100644 public/models/bot/randomBot.py delete mode 100644 public/models/bot/trainedBot.py delete mode 100644 pylint_checks.txt diff --git a/.pylintrc b/.pylintrc index 03b76bb..abe13e9 100644 --- a/.pylintrc +++ b/.pylintrc @@ -38,7 +38,7 @@ enable=indexing-exception,old-raise-syntax # --enable=similarities". If you want to run only the classes checker, but have # no Warning level messages displayed, use"--disable=all --enable=classes # --disable=W" -disable=design,similarities,no-self-use,attribute-defined-outside-init,locally-disabled,star-args,pointless-except,bad-option-value,global-statement,fixme,suppressed-message,useless-suppression,locally-enabled,no-member,no-name-in-module,import-error,unsubscriptable-object,unbalanced-tuple-unpacking,undefined-variable,not-context-manager +disable=invalid-unary-operand-type,design,similarities,no-self-use,attribute-defined-outside-init,locally-disabled,star-args,pointless-except,bad-option-value,global-statement,fixme,suppressed-message,useless-suppression,locally-enabled,no-member,no-name-in-module,import-error,unsubscriptable-object,unbalanced-tuple-unpacking,undefined-variable,not-context-manager # Set the cache size for astng objects. diff --git a/.travis.yml b/.travis.yml index 0beac11..590f538 100644 --- a/.travis.yml +++ b/.travis.yml @@ -23,9 +23,7 @@ install: script: # Tests - python -m unittest discover -v - # Style checks - # Temporary workaround - - for i in `cat pylint_checks.txt` ; do pylint $i ;done + - find . -iname "*.py" | xargs pylint # Coverage checks - py.test --cov=train tests/ diff --git a/networking/hlt_networking.py b/networking/hlt_networking.py index e9a404c..3cecc94 100644 --- a/networking/hlt_networking.py +++ b/networking/hlt_networking.py @@ -1,38 +1,40 @@ +"""The HLT class to handle the connection""" import socket from public.hlt import GameMap, translate_cardinal class HLT: + """The HLT class to handle the connection""" def __init__(self, port): - _connection = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - _connection.connect(('localhost', port)) + connection = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + connection.connect(('localhost', port)) print('Connected to intermediary on port #' + str(port)) - self._connection = _connection + self.connection = connection def get_string(self): - newString = "" + new_string = "" buffer = '\0' while True: - buffer = self._connection.recv(1).decode('ascii') + buffer = self.connection.recv(1).decode('ascii') if buffer != '\n': - newString += str(buffer) + new_string += str(buffer) else: - return newString + return new_string - def sendString(self, s): + def send_string(self, s): s += '\n' - self._connection.sendall(bytes(s, 'ascii')) + self.connection.sendall(bytes(s, 'ascii')) def get_init(self): - myID = int(self.get_string()) + my_id = int(self.get_string()) game_map = GameMap(self.get_string(), self.get_string(), self.get_string()) - return myID, game_map + return my_id, game_map def send_init(self, name): - self.sendString(name) + self.send_string(name) def send_frame(self, moves): - self.sendString(' '.join( + self.send_string(' '.join( str(move.square.x) + ' ' + str(move.square.y) + ' ' + str(translate_cardinal(move.direction)) for move in moves)) diff --git a/networking/pipe_socket_translator.py b/networking/pipe_socket_translator.py index df3a7a9..2f11d16 100644 --- a/networking/pipe_socket_translator.py +++ b/networking/pipe_socket_translator.py @@ -1,11 +1,11 @@ +""" +To be launched by the Halite program as an intermediary, +in order to enable a pipe player to join. +""" import socket import sys -# logging.basicConfig(filename='example.log', level=logging.DEBUG) - try: - # Connect - # logging.warning("connecting") socket_ = socket.socket(socket.AF_INET, socket.SOCK_STREAM) socket_.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) socket_.bind(('localhost', int(sys.argv[1]))) # This is where the port is selected @@ -13,50 +13,47 @@ connection, _ = socket_.accept() - # IO Functions - def sendStringPipe(toBeSent): - sys.stdout.write(toBeSent + '\n') + def send_string_pipe(to_be_sent): + sys.stdout.write(to_be_sent + '\n') sys.stdout.flush() - def getStringPipe(): - str = sys.stdin.readline().rstrip('\n') - return (str) + def get_string_pipe(): + str_pipe = sys.stdin.readline().rstrip('\n') + return str_pipe - def sendStringSocket(toBeSent): - global connection - toBeSent += '\n' - connection.sendall(bytes(toBeSent, 'ascii')) + def send_string_socket(to_be_sent): + to_be_sent += '\n' + connection.sendall(bytes(to_be_sent, 'ascii')) - def getStringSocket(): - global connection - newString = "" + def get_string_socket(): + new_string = "" buffer = '\0' while True: buffer = connection.recv(1).decode('ascii') if buffer != '\n': - newString += str(buffer) + new_string += str(buffer) else: - return newString + return new_string while True: # Handle Init IO - sendStringSocket(getStringPipe()) # Player ID - sendStringSocket(getStringPipe()) # Map Dimensions - sendStringSocket(getStringPipe()) # Productions - sendStringSocket(getStringPipe()) # Starting Map - sendStringPipe(getStringSocket()) # Player Name / Ready Response + send_string_socket(get_string_pipe()) # Player ID + send_string_socket(get_string_pipe()) # Map Dimensions + send_string_socket(get_string_pipe()) # Productions + send_string_socket(get_string_pipe()) # Starting Map + send_string_pipe(get_string_socket()) # Player Name / Ready Response # Run Frame Loop - while (getStringPipe() == 'Get map and play!'): # while True: - sendStringSocket('Get map and play!') - sendStringSocket(getStringPipe()) # Frame Map - sendStringPipe(getStringSocket()) # Move List - sendStringSocket('Stop playing!') + while get_string_pipe() == 'Get map and play!': # while True: + send_string_socket('Get map and play!') + send_string_socket(get_string_pipe()) # Frame Map + send_string_pipe(get_string_socket()) # Move List + send_string_socket('Stop playing!') -except Exception as e: +except ConnectionError as e: # logging.warning(traceback.format_exc()) pass diff --git a/networking/start_game.py b/networking/start_game.py index 7ff8b48..cee7983 100644 --- a/networking/start_game.py +++ b/networking/start_game.py @@ -1,10 +1,16 @@ +"""The start_game function to launch the halite.exe""" import subprocess import argparse import os -def start_game(port=2000, width=10,height=10, max_strength=25, max_turn=25, max_game=1, silent_bool=True, timeout=True, quiet=True, - n_pipe_players=1, slave_players=[]): +def start_game(port=2000, width=10, height=10, max_strength=25, max_turn=25, max_game=1, + silent_bool=True, timeout=True, quiet=True, + n_pipe_players=1, slave_players=None): + """ + The start_game function to launch the halite.exe. + Execute with the -h option for help. + """ path_to_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) for i in range(n_pipe_players): subprocess.call([path_to_root + "/networking/kill.sh", str(port + i)]) # Free the necessary ports @@ -24,7 +30,7 @@ def start_game(port=2000, width=10,height=10, max_strength=25, max_turn=25, max_ ] slave_players = [ "python3 " + path_to_root + "/public/" + slave_player + ' slave' for slave_player in slave_players - ] # slave is the slave argument + ] if slave_players is not None else [] # slave is the slave argument players = pipe_players + slave_players # "python3 " + path_to_root + "/networking/pipe_socket_translator.py " + str(port+1) n_player = '' if len(players) > 1 else '-n 1 ' @@ -40,19 +46,36 @@ def start_game(port=2000, width=10,height=10, max_strength=25, max_turn=25, max_ if __name__ == '__main__': parser = argparse.ArgumentParser() - parser.add_argument("-p", "--port", type=int, help="the port for the simulation - Useless if there are no pipe_players", default=2000) - parser.add_argument("-t", "--timeout", help="Doens't timeout if you set this flag is set", action="store_true", default=False) - parser.add_argument("-j", "--silent", help="Doesn't print *.hlt file", action="store_true", default=False) - parser.add_argument("-q", "--quiet", help="Doesn't output information to the console", action="store_true", default=False) - parser.add_argument("-s", "--strength", help="The max strength of the squares, if needed", type=int, default=25) - parser.add_argument("-dw", "--width", help="The width of the game", type=int, default=10) - parser.add_argument("-dh", "--height", help="The height of the game", type=int, default=10) - parser.add_argument("-m", "--maxturn", help="The total number of turns per game (maximum)", type=int, default=25) - parser.add_argument("-g", "--maxgame", help="The total number of games to play", type=int, default=1) # -1 for infinite game - parser.add_argument("-pp", "--n_pipe_players",help="The number of pipe players. You need to handle these players yourself. Each of them has a port assigned.", type=int, default=0) - parser.add_argument("-sp", "--slave_players", help="The slave players. Handled by the halite.exe. You should write one of these two strings: 'MyBot.py' or 'OpponentBot.py' (multiple time if desired) ",nargs='+', default=[]) + parser.add_argument("-p", "--port", type=int, + help="the port for the simulation - Useless if there are no pipe_players", + default=2000) + parser.add_argument("-t", "--timeout", help="Doens't timeout if you set this flag is set", + action="store_true", default=False) + parser.add_argument("-j", "--silent", help="Doesn't print *.hlt file", + action="store_true", default=False) + parser.add_argument("-q", "--quiet", help="Doesn't output information to the console", + action="store_true", default=False) + parser.add_argument("-s", "--strength", help="The max strength of the squares, if needed", + type=int, default=25) + parser.add_argument("-dw", "--width", help="The width of the game", + type=int, default=10) + parser.add_argument("-dh", "--height", help="The height of the game", + type=int, default=10) + parser.add_argument("-m", "--maxturn", help="The total number of turns per game (maximum)", + type=int, default=25) + parser.add_argument("-g", "--maxgame", help="The total number of games to play", + type=int, default=1) # -1 for infinite game + parser.add_argument("-pp", "--n_pipe_players", + help="The number of pipe players. You need to handle these players yourself. " + "Each of them has a port assigned.", + type=int, default=0) + parser.add_argument("-sp", "--slave_players", + help="The slave players. Handled by the halite.exe. " + "You should write one of these two strings: " + "'MyBot.py' or 'OpponentBot.py' (multiple time if desired) ", + nargs='+', default=[]) args = parser.parse_args() - start_game(port=args.port, width=args.width,height=args.height, max_strength=args.strength, max_turn=args.maxturn, + start_game(port=args.port, width=args.width, height=args.height, max_strength=args.strength, max_turn=args.maxturn, silent_bool=args.silent, timeout=args.timeout, max_game=args.maxgame, quiet=args.quiet, n_pipe_players=args.n_pipe_players, slave_players=args.slave_players) diff --git a/public/MyBot.py b/public/MyBot.py index 9429dac..ecadf7c 100644 --- a/public/MyBot.py +++ b/public/MyBot.py @@ -1,27 +1,29 @@ -import sys +"""The MyBot.py file that executes the TrainedBot.py""" import os +import sys + sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +try: + from public.models.bot.TrainedBot import TrainedBot + from networking.hlt_networking import HLT +except: + raise mode = 'server' if (len(sys.argv) == 1) else 'local' - -if mode == 'server' or sys.argv[1]=='slave': # 'server' mode +if mode == 'server' or sys.argv[1] == 'slave': # 'server' mode import hlt else: # 'local' mode - import context - port = int(sys.argv[1]) if len(sys.argv) > 1 else 2000 - hlt = context.HLT(port=port) - -from public.models.bot.trainedBot import TrainedBot + hlt = HLT(port=port) bot = TrainedBot() while True: - myID, game_map = hlt.get_init() + my_id, game_map = hlt.get_init() hlt.send_init("MyBot") - bot.setID(myID) + bot.set_id(my_id) - while (mode == 'server' or hlt.get_string() == 'Get map and play!'): + while mode == 'server' or hlt.get_string() == 'Get map and play!': game_map.get_frame(hlt.get_string()) moves = bot.compute_moves(game_map) hlt.send_frame(moves) diff --git a/public/OpponentBot.py b/public/OpponentBot.py index 32ca428..67344c3 100644 --- a/public/OpponentBot.py +++ b/public/OpponentBot.py @@ -1,27 +1,29 @@ -import sys +"""The Opponent.py file that executes the ImprovedBot.py""" import os +import sys + sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +try: + from public.models.bot.ImprovedBot import ImprovedBot + from networking.hlt_networking import HLT +except: + raise mode = 'server' if (len(sys.argv) == 1) else 'local' - -if mode == 'server' or sys.argv[1]=='slave': # 'server' mode +if mode == 'server' or sys.argv[1] == 'slave': # 'server' mode import hlt else: # 'local' mode - import context - port = int(sys.argv[1]) if len(sys.argv) > 1 else 2000 - hlt = context.HLT(port=port) - -from public.models.bot.improvedBot import ImprovedBot + hlt = HLT(port=port) bot = ImprovedBot() while True: - myID, game_map = hlt.get_init() + my_id, game_map = hlt.get_init() hlt.send_init("OpponentBot") - bot.setID(myID) + bot.set_id(my_id) - while (mode == 'server' or hlt.get_string() == 'Get map and play!'): + while mode == 'server' or hlt.get_string() == 'Get map and play!': game_map.get_frame(hlt.get_string()) moves = bot.compute_moves(game_map) hlt.send_frame(moves) diff --git a/public/context.py b/public/context.py deleted file mode 100644 index 6aa2548..0000000 --- a/public/context.py +++ /dev/null @@ -1,5 +0,0 @@ -import sys -import os - -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) -from networking.hlt_networking import HLT diff --git a/public/hlt.py b/public/hlt.py index 26093ed..acc0a1d 100644 --- a/public/hlt.py +++ b/public/hlt.py @@ -1,3 +1,4 @@ +"""The original but corrected hlt.py file for communication with halite.""" import sys from collections import namedtuple from itertools import chain, zip_longest @@ -24,6 +25,8 @@ def opposite_cardinal(direction): class GameMap: + """The GameMap on which to play.""" + def __init__(self, size_string, production_string, map_string=None): self.width, self.height = tuple(map(int, size_string.split())) self.production = tuple( @@ -57,12 +60,14 @@ def __iter__(self): return chain.from_iterable(self.contents) def neighbors(self, square, n=1, include_self=False): - "Iterable over the n-distance neighbors of a given square. For single-step neighbors, the enumeration index provides the direction associated with the neighbor." + """Iterable over the n-distance neighbors of a given square. + For single-step neighbors, the enumeration index provides + the direction associated with the neighbor. + """ assert isinstance(include_self, bool) assert isinstance(n, int) and n > 0 if n == 1: - combos = ((0, -1), (1, 0), (0, 1), (-1, 0), (0, - 0)) # NORTH, EAST, SOUTH, WEST, STILL ... matches indices provided by enumerate(game_map.neighbors(square)) + combos = ((0, -1), (1, 0), (0, 1), (-1, 0), (0, 0)) else: combos = ((dx, dy) for dy in range(-n, n + 1) for dx in range(-n, n + 1) if abs(dx) + abs(dy) <= n) return (self.contents[(square.y + dy) % self.height][(square.x + dx) % self.width] for dx, dy in combos if @@ -96,9 +101,9 @@ def get_string(): def get_init(): - playerID = int(get_string()) + player_id = int(get_string()) m = GameMap(get_string(), get_string()) - return playerID, m + return player_id, m def send_init(name): @@ -106,7 +111,7 @@ def send_init(name): def translate_cardinal(direction): - "Translate direction constants used by this Python-based bot framework to that used by the official Halite game environment." + "Beware the direction are changed! Important for visualization" return (direction + 1) % 5 diff --git a/public/models/agent/Agent.py b/public/models/agent/Agent.py new file mode 100644 index 0000000..b41ee37 --- /dev/null +++ b/public/models/agent/Agent.py @@ -0,0 +1,52 @@ +"""The Agent general class""" +import os + +import numpy as np + +from train.reward import local_state_from_global, normalize_game_state + + +class Agent: + """The Agent general class""" + + def __init__(self, name, experience): + self.name = name + self.experience = experience + if self.experience is not None: + try: + self.experience.metric = np.load(os.path.abspath( + os.path.join(os.path.dirname(__file__), '..')) + + '/variables/' + self.name + '/' + + self.name + '.npy') + except FileNotFoundError: + print("Metric file not found") + self.experience.metric = np.array([]) + + def get_policies(self, sess, game_state): + policies = np.zeros(game_state[0].shape + (5,)) + for y in range(len(game_state[0])): + for x in range(len(game_state[0][0])): + if game_state[0][y][x] == 1: + policies[y][x] = self.get_policy(sess, + normalize_game_state(local_state_from_global(game_state, x, y))) + return policies + + def get_policy(self, sess, state): + pass + + def choose_actions(self, sess, game_state, debug=False): + # Here the state is not yet normalized ! + moves = np.zeros_like(game_state[0], dtype=np.int64) - 1 + for y in range(len(game_state[0])): + for x in range(len(game_state[0][0])): + if game_state[0][y][x] == 1: + moves[y][x] = self.choose_action(sess, + normalize_game_state(local_state_from_global(game_state, x, y)), + debug=debug) + return moves + + def choose_action(self, sess, state, frac_progress=1.0, debug=False): + pass + + def update_agent(self, sess): + pass diff --git a/public/models/agent/vanillaAgent.py b/public/models/agent/VanillaAgent.py similarity index 80% rename from public/models/agent/vanillaAgent.py rename to public/models/agent/VanillaAgent.py index 77a03c8..8779dab 100644 --- a/public/models/agent/vanillaAgent.py +++ b/public/models/agent/VanillaAgent.py @@ -1,12 +1,14 @@ +"""The Vanilla Agent""" import numpy as np import tensorflow as tf import tensorflow.contrib.slim as slim -from public.models.agent.agent import Agent +from public.models.agent.Agent import Agent class VanillaAgent(Agent): - def __init__(self, experience, lr = 1e-2, s_size = 9 * 3, a_size = 5, h_size = 50): # all these are optional ? + """The Vanilla Agent""" + def __init__(self, experience=None, lr=1e-2, s_size=9 * 3, a_size=5, h_size=50): # all these are optional ? super(VanillaAgent, self).__init__('vanilla-cin', experience) # These lines established the feed-forward part of the network. The agent takes a state and produces an action. @@ -30,22 +32,22 @@ def __init__(self, experience, lr = 1e-2, s_size = 9 * 3, a_size = 5, h_size = 5 self.tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=tf.get_variable_scope().name) self.gradients = tf.gradients(loss, self.tvars) - self.gradientHolders = [] - for idx, var in enumerate(self.tvars): + self.gradient_holders = [] + for idx in range(len(self.tvars)): placeholder = tf.placeholder(tf.float32, name=str(idx) + '_holder') - self.gradientHolders.append(placeholder) + self.gradient_holders.append(placeholder) global_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'global') optimizer = tf.train.AdamOptimizer(learning_rate=lr) - self.updateGlobal = optimizer.apply_gradients(zip(self.gradientHolders, global_vars)) # self.tvars + self.update_global = optimizer.apply_gradients(zip(self.gradient_holders, global_vars)) # self.tvars - def get_policy(self,sess, state): + def get_policy(self, sess, state): return sess.run(self.policy, feed_dict={self.state_in: [state.reshape(-1)]}) def choose_action(self, sess, state, frac_progress=1.0, debug=False): # it only a state, not the game state... # Here the state is normalized ! - if (np.random.uniform() >= frac_progress): + if np.random.uniform() >= frac_progress: a = np.random.choice(range(5)) else: a_dist = sess.run(self.policy, feed_dict={self.state_in: [state.reshape(-1)]}) @@ -64,5 +66,5 @@ def update_agent(self, sess): self.action_holder: moves, self.reward_holder: rewards} grads = sess.run(self.gradients, feed_dict=feed_dict) - feed_dict = dict(zip(self.gradientHolders, grads)) - _ = sess.run(self.updateGlobal, feed_dict=feed_dict) + feed_dict = dict(zip(self.gradient_holders, grads)) + _ = sess.run(self.update_global, feed_dict=feed_dict) diff --git a/public/models/agent/__init__.py b/public/models/agent/__init__.py index 849b75f..e69de29 100644 --- a/public/models/agent/__init__.py +++ b/public/models/agent/__init__.py @@ -1 +0,0 @@ -# TODO: import via the agent package diff --git a/public/models/agent/agent.py b/public/models/agent/agent.py deleted file mode 100644 index 563b647..0000000 --- a/public/models/agent/agent.py +++ /dev/null @@ -1,42 +0,0 @@ -import numpy as np -import os -from train.reward import localStateFromGlobal, normalizeGameState - - -class Agent: - def __init__(self, name, experience): - self.name = name - self.experience = experience - if self.experience is not None: - try: - self.experience.metric = np.load(os.path.abspath(os.path.join(os.path.dirname(__file__), - '..')) + '/variables/' + self.name + '/' + self.name + '.npy') - except: - print("Metric file not found") - self.experience.metric = np.array([]) - - def get_policies(self,sess, game_state): - policies = np.zeros(game_state[0].shape + (5,)) - for y in range(len(game_state[0])): - for x in range(len(game_state[0][0])): - if (game_state[0][y][x] == 1): - policies[y][x] = self.get_policy(sess, normalizeGameState(localStateFromGlobal(game_state, x, y))) - return policies - - def get_policy(self,sess, state): - pass - - def choose_actions(self, sess, game_state, debug=False): - # Here the state is not yet normalized ! - moves = np.zeros_like(game_state[0], dtype=np.int64) - 1 - for y in range(len(game_state[0])): - for x in range(len(game_state[0][0])): - if (game_state[0][y][x] == 1): - moves[y][x] = self.choose_action(sess, normalizeGameState(localStateFromGlobal(game_state, x, y)), debug=debug) - return moves - - def choose_action(self, sess, state, frac_progress=1.0, debug=False): - pass - - def update_agent(self, sess): - pass diff --git a/public/models/bot/Bot.py b/public/models/bot/Bot.py new file mode 100644 index 0000000..f553835 --- /dev/null +++ b/public/models/bot/Bot.py @@ -0,0 +1,7 @@ +"""The General Bot class""" +class Bot: + def compute_moves(self, game_map): + pass + + def set_id(self, my_id): + self.my_id = my_id diff --git a/public/models/bot/improvedBot.py b/public/models/bot/ImprovedBot.py similarity index 64% rename from public/models/bot/improvedBot.py rename to public/models/bot/ImprovedBot.py index b56a706..c655eea 100644 --- a/public/models/bot/improvedBot.py +++ b/public/models/bot/ImprovedBot.py @@ -1,16 +1,18 @@ +"""The Improved Bot""" import random from public.hlt import Move, NORTH, STILL, WEST -from public.models.bot.bot import Bot +from public.models.bot.Bot import Bot class ImprovedBot(Bot): - def compute_moves(self, game_map, sess=None): + def compute_moves(self, game_map): + """Compute the moves given a game_map""" moves = [] for square in game_map: - if square.owner == self.myID: + if square.owner == self.my_id: for direction, neighbor in enumerate(game_map.neighbors(square)): - if neighbor.owner != self.myID and neighbor.strength < square.strength: + if neighbor.owner != self.my_id and neighbor.strength < square.strength: moves += [Move(square, direction)] if square.strength < 5 * square.production: moves += [Move(square, STILL)] diff --git a/public/models/bot/RandomBot.py b/public/models/bot/RandomBot.py new file mode 100644 index 0000000..1827185 --- /dev/null +++ b/public/models/bot/RandomBot.py @@ -0,0 +1,12 @@ +"""The Random Bot""" +import random + +from public.hlt import EAST, Move, NORTH, SOUTH, STILL, WEST +from public.models.bot.Bot import Bot + + +class RandomBot(Bot): + def compute_moves(self, game_map): + """Compute the moves given a game_map""" + return [Move(square, random.choice((NORTH, EAST, SOUTH, WEST, STILL))) for square in game_map if + square.owner == self.my_id] diff --git a/public/models/bot/TrainedBot.py b/public/models/bot/TrainedBot.py new file mode 100644 index 0000000..f02205f --- /dev/null +++ b/public/models/bot/TrainedBot.py @@ -0,0 +1,47 @@ +"""The Trained Bot""" +import os + +import tensorflow as tf + +from public.models.agent.VanillaAgent import VanillaAgent +from public.models.bot.Bot import Bot +from train.reward import format_moves, get_game_state + + +class TrainedBot(Bot): + """The trained bot""" + + def __init__(self): + os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' + tf.reset_default_graph() + + with tf.device("/cpu:0"): + with tf.variable_scope('global'): + self.agent = VanillaAgent() + + global_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='global') + saver = tf.train.Saver(global_variables) + init = tf.global_variables_initializer() + + self.sess = tf.Session() + self.sess.run(init) + try: + saver.restore(self.sess, os.path.abspath( + os.path.join(os.path.dirname(__file__), '..')) + + '/variables/' + self.agent.name + '/' + + self.agent.name) + except FileNotFoundError: + print("Model not found - initiating new one") + + def compute_moves(self, game_map): + """Compute the moves given a game_map""" + game_state = get_game_state(game_map, self.my_id) + return format_moves(game_map, self.agent.choose_actions(self.sess, game_state, debug=True)) + + def get_policies(self, game_state): + """Compute the policies given a game_state""" + return self.agent.get_policies(self.sess, game_state) + + def close(self): + """Close the tensorflow session""" + self.sess.close() diff --git a/public/models/bot/__init__.py b/public/models/bot/__init__.py deleted file mode 100644 index 1a1aa6a..0000000 --- a/public/models/bot/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# TODO: import via the bot package diff --git a/public/models/bot/bot.py b/public/models/bot/bot.py deleted file mode 100644 index 4fa4cd5..0000000 --- a/public/models/bot/bot.py +++ /dev/null @@ -1,6 +0,0 @@ -class Bot: - def compute_moves(self, game_map): - pass - - def setID(self, myID): - self.myID = myID diff --git a/public/models/bot/randomBot.py b/public/models/bot/randomBot.py deleted file mode 100644 index be16972..0000000 --- a/public/models/bot/randomBot.py +++ /dev/null @@ -1,13 +0,0 @@ -import random - -from public.hlt import EAST, Move, NORTH, SOUTH, STILL, WEST -from public.models.bot.bot import Bot - - -class RandomBot(Bot): - def __init__(self, myID): - super(RandomBot, self).__init__(myID) - - def compute_moves(self, game_map, sess=None): - [Move(square, random.choice((NORTH, EAST, SOUTH, WEST, STILL))) for square in game_map if - square.owner == self.myID] diff --git a/public/models/bot/trainedBot.py b/public/models/bot/trainedBot.py deleted file mode 100644 index 2ea9ba5..0000000 --- a/public/models/bot/trainedBot.py +++ /dev/null @@ -1,42 +0,0 @@ -from public.models.agent.vanillaAgent import VanillaAgent -from public.models.bot.bot import Bot -from train.reward import formatMoves, getGameState -import tensorflow as tf -import os - - -class TrainedBot(Bot): - def __init__(self): - lr = 5*1e-3; - s_size = 9 * 3; - a_size = 5; - h_size = 50 - os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' - tf.reset_default_graph() - - with tf.device("/cpu:0"): - with tf.variable_scope('global'): - self.agent = VanillaAgent(None, lr, s_size, a_size, h_size) - - global_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='global') - saver = tf.train.Saver(global_variables) - init = tf.global_variables_initializer() - - self.sess = tf.Session() - self.sess.run(init) - try: - saver.restore(self.sess, os.path.abspath(os.path.join(os.path.dirname(__file__), - '..')) + '/variables/' + self.agent.name + '/' + self.agent.name) - except Exception: - print("Model not found - initiating new one") - - def compute_moves(self, game_map): - game_state = getGameState(game_map, self.myID) - return formatMoves(game_map, self.agent.choose_actions(self.sess, game_state, debug=True)) - - def get_policies(self, game_state): - # Warning this is not hereditary - return self.agent.get_policies(self.sess, game_state) - - def close(self): - self.sess.close() diff --git a/pylint_checks.txt b/pylint_checks.txt deleted file mode 100644 index eecdc7e..0000000 --- a/pylint_checks.txt +++ /dev/null @@ -1,2 +0,0 @@ -train/__init__.py -train/experience.py diff --git a/requirements.txt b/requirements.txt index 8b7417f..0a080ab 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,5 +3,5 @@ coverage>=3.6 pytest-cov pytest-xdist coveralls -pylint +pylint>=1.6 flask \ No newline at end of file diff --git a/tests/reward_test.py b/tests/reward_test.py index 6398ff0..56e5815 100644 --- a/tests/reward_test.py +++ b/tests/reward_test.py @@ -1,28 +1,38 @@ """ Tests the reward function """ -from train.reward import discount_rewards, rawRewards, allRewards -from train.experience import ExperienceVanilla -from train.worker import Worker import unittest import numpy as np + from tests.util import game_states_from_url +from train.experience import ExperienceVanilla +from train.reward import discount_rewards, raw_rewards_function, all_rewards_function +from train.worker import Worker class TestReward(unittest.TestCase): + """ + Tests the reward function + """ def test_length_discount_rewards(self): + """ + Test the length of the discount reward + """ self.assertTrue(len(discount_rewards(np.array([1]))) == 1) self.assertTrue(len(discount_rewards(np.array([1, 3]))) == 2) def test_reward(self): - GAME_URL = 'https://s3.eu-central-1.amazonaws.com/halite-python-rl/hlt-games/trained-bot.hlt' - game_states, moves = game_states_from_url(GAME_URL) + """ + Test the length of the discount reward + """ + game_url = 'https://s3.eu-central-1.amazonaws.com/halite-python-rl/hlt-games/trained-bot.hlt' + game_states, moves = game_states_from_url(game_url) - raw_rewards = rawRewards(game_states) + raw_rewards = raw_rewards_function(game_states) self.assertTrue(len(raw_rewards) == len(game_states) - 1) - all_states, all_moves, all_rewards = allRewards(game_states, moves) + all_states, all_moves, all_rewards = all_rewards_function(game_states, moves) self.assertTrue(len(all_states) >= len(game_states) - 1) self.assertTrue(len(all_moves) >= len(moves)) self.assertTrue(len(all_rewards) == len(all_moves) and len(all_states) == len(all_moves)) @@ -34,6 +44,9 @@ def test_reward(self): self.assertTrue(len(batch_rewards) == len(batch_moves) and len(batch_states) == len(batch_moves)) def test_worker(self): + """ + Test if the worker port initiate and terminate with good port + """ worker = Worker(2000, 2, None) self.assertTrue(worker.port == 2002) worker.p.terminate() diff --git a/tests/util.py b/tests/util.py index 6681bb4..3e4137e 100644 --- a/tests/util.py +++ b/tests/util.py @@ -1,15 +1,16 @@ +"""Importing the game from aws""" import json import urllib.request import numpy as np -def game_states_from_url(GAME_URL): +def game_states_from_url(game_url): """ We host known games on aws server and we run the tests according to these games, from which we know the output - :param GAME_URL: The url of the game on the server (string). + :param game_url: The url of the game on the server (string). :return: """ - game = json.loads(urllib.request.urlopen(GAME_URL).readline().decode("utf-8")) + game = json.loads(urllib.request.urlopen(game_url).readline().decode("utf-8")) owner_frames = np.array(game["frames"])[:, :, :, 0][:, np.newaxis, :, :] strength_frames = np.array(game["frames"])[:, :, :, 1][:, np.newaxis, :, :] diff --git a/train/experience.py b/train/experience.py index 146933d..49704b5 100644 --- a/train/experience.py +++ b/train/experience.py @@ -3,7 +3,7 @@ """ import numpy as np -from train.reward import allRewards, rawRewardsMetric +from train.reward import all_rewards_function, raw_rewards_metric class Experience: @@ -24,7 +24,7 @@ def batch(self, size): pass def compute_metric(self, game_states): - production_increments = np.sum(np.sum(rawRewardsMetric(game_states), axis=2), axis=1) + production_increments = np.sum(np.sum(raw_rewards_metric(game_states), axis=2), axis=1) self.metric = np.append(self.metric, production_increments.dot(np.linspace(2.0, 1.0, num=len(game_states) - 1))) def save_metric(self, name): @@ -42,7 +42,7 @@ def __init__(self): def add_episode(self, game_states, moves): self.compute_metric(game_states) - all_states, all_moves, all_rewards = allRewards(game_states, moves) + all_states, all_moves, all_rewards = all_rewards_function(game_states, moves) self.states = np.concatenate((self.states, all_states.reshape(-1, 27)), axis=0) self.moves = np.concatenate((self.moves, all_moves)) diff --git a/train/main.py b/train/main.py index 1739bb3..e693ffb 100644 --- a/train/main.py +++ b/train/main.py @@ -1,40 +1,39 @@ -import multiprocessing +"""This main.py file runs the training.""" import threading import os import sys import tensorflow as tf +from public.models.agent.VanillaAgent import VanillaAgent +from train.experience import ExperienceVanilla +from train.worker import Worker + os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) -from public.models.agent.vanillaAgent import VanillaAgent -from train.experience import ExperienceVanilla -from train.worker import Worker port = int(sys.argv[1]) if len(sys.argv) > 1 else 2000 tf.reset_default_graph() # Clear the Tensorflow graph. with tf.device("/cpu:0"): - lr = 1e-3; - s_size = 9 * 3; - a_size = 5; + lr = 1e-3 + s_size = 9 * 3 + a_size = 5 h_size = 50 with tf.variable_scope('global'): master_experience = ExperienceVanilla() master_agent = VanillaAgent(master_experience, lr, s_size, a_size, h_size) - num_workers = 5 # multiprocessing.cpu_count()# (2) Maybe set max number of workers / number of available CPU threads + num_workers = 5 n_simultations = 500 workers = [] if num_workers > 1: for i in range(num_workers): with tf.variable_scope('worker_' + str(i)): - experience = ExperienceVanilla() - agent = VanillaAgent(experience, lr, s_size, a_size, h_size) - workers.append(Worker(port, i, agent)) + workers.append(Worker(port, i, VanillaAgent(ExperienceVanilla(), lr, s_size, a_size, h_size))) else: workers.append(Worker(port, 0, master_agent)) # We need only to save the global @@ -46,19 +45,20 @@ with tf.Session() as sess: sess.run(init) try: - saver.restore(sess, os.path.abspath(os.path.dirname(__file__))+'/../public/models/variables/' + master_agent.name+'/'+master_agent.name) - except Exception: + saver.restore(sess, os.path.abspath( + os.path.dirname(__file__)) + '/../public/models/variables/' + master_agent.name + '/' + master_agent.name) + except FileNotFoundError: print("Model not found - initiating new one") coord = tf.train.Coordinator() worker_threads = [] - print("I'm the main thread running on CPU #%s" % multiprocessing.current_process().name) + print("I'm the main thread running on CPU") - if (num_workers == 1): - workers[0].work(sess, coord, saver, n_simultations) + if num_workers == 1: + workers[0].work(sess, saver, n_simultations) else: for worker in workers: - worker_work = lambda: worker.work(sess, coord, saver, n_simultations) + worker_work = lambda worker=worker: worker.work(sess, saver, n_simultations) t = threading.Thread(target=(worker_work)) # Process instead of threading.Thread multiprocessing.Process t.start() worker_threads.append(t) diff --git a/train/reward.py b/train/reward.py index 36a8233..4ef8143 100644 --- a/train/reward.py +++ b/train/reward.py @@ -1,33 +1,34 @@ +"""The reward.py file to compute the reward""" import numpy as np -from public.hlt import NORTH, EAST, SOUTH, WEST, STILL, Move +from public.hlt import NORTH, EAST, SOUTH, WEST, Move STRENGTH_SCALE = 255 PRODUCTION_SCALE = 10 -def getGameState(game_map, myID): +def get_game_state(game_map, my_id): game_state = np.reshape( - [[(square.owner == myID) + 0, square.strength, square.production] for square in game_map], + [[(square.owner == my_id) + 0, square.strength, square.production] for square in game_map], [game_map.height, game_map.width, 3]) return np.swapaxes(np.swapaxes(game_state, 2, 0), 1, 2) -def normalizeGameState(game_state): +def normalize_game_state(game_state): return game_state / np.array([1, STRENGTH_SCALE, PRODUCTION_SCALE])[:, np.newaxis, np.newaxis] -def getGameProd(game_state): +def get_game_prod(game_state): return np.sum(game_state[0] * game_state[2]) -def getStrength(game_state): +def get_strength(game_state): return np.sum(game_state[0] * game_state[1]) - # np.sum([square.strength for square in game_map if square.owner == myID]) + # np.sum([square.strength for square in game_map if square.owner == my_id]) -def getNumber(game_state): +def get_number(game_state): return np.sum(game_state[0]) - # np.sum([square.strength for square in game_map if square.owner == myID]) + # np.sum([square.strength for square in game_map if square.owner == my_id]) def discount_rewards(r, gamma=0.8): @@ -39,107 +40,117 @@ def discount_rewards(r, gamma=0.8): discounted_r[t] = running_add return discounted_r -def take_surrounding_square(game_state, x, y, size = 1): + +def take_surrounding_square(game_state, x, y, size=1): return np.take(np.take(game_state, range(y - size, y + size + 1), axis=1, mode='wrap'), range(x - size, x + size + 1), axis=2, mode='wrap') -def take_surrounding_losange(game_state, x, y, size = 2): - np.take(np.take(game_state, y, axis=1, mode='wrap'), - range(x - 2, x + 2 + 1), axis=2, mode='wrap') - np.take(np.take(game_state, y+1, axis=1, mode='wrap'), - range(x - 1, x + 1 + 1), axis=2, mode='wrap') - np.take(np.take(game_state, y-1, axis=1, mode='wrap'), - range(x - 1, x + 1 + 1), axis=2, mode='wrap') - np.take(np.take(game_state, y+2, axis=1, mode='wrap'), - x, axis=2, mode='wrap') - np.take(np.take(game_state, y-2, axis=1, mode='wrap'), - x, axis=2, mode='wrap') - -def localStateFromGlobal(game_state, x, y, size=1): + +def local_state_from_global(game_state, x, y, size=1): # TODO: for now we still take a square, but a more complex shape could be better. return np.take(np.take(game_state, range(y - size, y + size + 1), axis=1, mode='wrap'), range(x - size, x + size + 1), axis=2, mode='wrap') -def rawRewardsMetric(game_states): +def raw_rewards_metric(game_states): return np.array([game_states[i + 1][0] * game_states[i + 1][2] - game_states[i][0] * game_states[i][2] for i in range(len(game_states) - 1)]) -def rawRewards(game_states): - return np.array([0.0001*np.power(game_states[i + 1][0] * game_states[i + 1][2] - game_states[i][0] * game_states[i][2],4) - for i in range(len(game_states) - 1)]) +def raw_rewards_function(game_states): + return np.array( + [0.0001 * np.power(game_states[i + 1][0] * game_states[i + 1][2] - game_states[i][0] * game_states[i][2], 4) + for i in range(len(game_states) - 1)]) -def strengthRewards(game_states): - return np.array([(getStrength(game_states[i + 1]) - getStrength(game_states[i])) + +def strength_rewards(game_states): + return np.array([(get_strength(game_states[i + 1]) - get_strength(game_states[i])) for i in range(len(game_states) - 1)]) -def discountedReward(next_reward, move_before, strength_before, discount_factor=1.0): +def discounted_reward_function(next_reward, move_before, strength_before, discount_factor=1.0): + """ + Given all the below arguments, return the discounted reward. + :param next_reward: + :param move_before: + :param strength_before: + :param discount_factor: + :return: + """ reward = np.zeros_like(next_reward) def take_value(matrix, x, y): return np.take(np.take(matrix, x, axis=1, mode='wrap'), y, axis=0, mode='wrap') - for y in range(len(reward)): - for x in range(len(reward[0])): - d = move_before[y][x] - if d != -1: - dy = (-1 if d == NORTH else 1) if (d == SOUTH or d == NORTH) else 0 - dx = (-1 if d == WEST else 1) if (d == WEST or d == EAST) else 0 - reward[y][x] = discount_factor * take_value(next_reward, x + dx, y + dy) if strength_before[y][ - x] >= take_value( - strength_before, x + dx, y + dy) else 0 - + for (y, x), d in np.ndenumerate(move_before): + if d != -1: + dy = (-1 if d == NORTH else 1) if (d == SOUTH or d == NORTH) else 0 + dx = (-1 if d == WEST else 1) if (d == WEST or d == EAST) else 0 + reward[y][x] = discount_factor * take_value(next_reward, x + dx, y + dy) \ + if strength_before[y][x] >= take_value(strength_before, x + dx, y + dy) \ + else 0 return reward -def discountedRewards(game_states, moves): - raw_rewards = rawRewards(game_states) - # strength_rewards = strengthRewards(game_states) +def discounted_rewards_function(game_states, moves): + """ + Compute height*width matrices of rewards - not yet individualized + :param game_states: The list of game states + :param moves: The list of moves + :return: + """ + raw_rewards = raw_rewards_function(game_states) + # strength_rewards = strength_rewards(game_states) discounted_rewards = np.zeros_like(raw_rewards, dtype=np.float64) running_reward = np.zeros_like(raw_rewards[0], dtype=np.float64) - for t in reversed(range(0, len(raw_rewards))): - running_reward = discountedReward(running_reward, moves[t], game_states[t][1], - discount_factor=0.6) + discountedReward( - raw_rewards[t], moves[t], game_states[t][1]) + for t, (raw_reward, move, game_state) in reversed(list(enumerate(zip(raw_rewards, moves, game_states)))): + running_reward = discounted_reward_function(running_reward, move, game_state[1], + discount_factor=0.6) + \ + discounted_reward_function(raw_reward, move, game_state[1]) discounted_rewards[t] = running_reward - ##TODO : HERE FOR STRENGTH ! INDEPENDENT return discounted_rewards -def individualStatesAndRewards(game_state, move, discounted_reward): +def individual_states_and_rewards(game_state, move, discounted_reward): + """ + Return the triplet states, moves, rewards for each of the square in one frame. + :param game_state: One game state - still a 3*3*3 matrix + :param move: The move for the given square + :param discounted_reward: The global matrix of discounted reward at time t, + from we we extract one frame + :return: + """ states = [] moves = [] rewards = [] for y in range(len(game_state[0])): for x in range(len(game_state[0][0])): - if (game_state[0][y][x] == 1): - states += [normalizeGameState(localStateFromGlobal(game_state, x, y))] + if game_state[0][y][x] == 1: + states += [normalize_game_state(local_state_from_global(game_state, x, y))] moves += [move[y][x]] rewards += [discounted_reward[y][x]] return states, moves, rewards -def allIndividualStatesAndRewards(game_states, moves, discounted_rewards): +def all_individual_states_and_rewards(game_states, moves, discounted_rewards): all_states = [] all_moves = [] all_rewards = [] for game_state, move, discounted_reward in zip(game_states, moves, discounted_rewards): - states_, moves_, rewards_ = individualStatesAndRewards(game_state, move, discounted_reward) + states_, moves_, rewards_ = individual_states_and_rewards(game_state, move, discounted_reward) all_states += states_ all_moves += moves_ all_rewards += rewards_ return np.array(all_states), np.array(all_moves), np.array(all_rewards) -def allRewards(game_states, moves): +def all_rewards_function(game_states, moves): # game_states n+1, moves n - discounted_rewards = discountedRewards(game_states, moves) - return allIndividualStatesAndRewards(game_states[:-1], moves, discounted_rewards) + discounted_rewards = discounted_rewards_function(game_states, moves) + return all_individual_states_and_rewards(game_states[:-1], moves, discounted_rewards) -def formatMoves(game_map, moves): +def format_moves(game_map, moves): moves_to_send = [] for y in range(len(game_map.contents)): for x in range(len(game_map.contents[0])): diff --git a/train/worker.py b/train/worker.py index 9bafd89..708d8c7 100644 --- a/train/worker.py +++ b/train/worker.py @@ -1,12 +1,13 @@ +"""The worker class for training and parallel operations""" import multiprocessing import time import os import tensorflow as tf -from networking.hlt_networking import HLT -from train.reward import formatMoves, getGameState +from train.reward import format_moves, get_game_state from networking.start_game import start_game +from networking.hlt_networking import HLT def update_target_graph(from_scope, to_scope): from_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, from_scope) @@ -19,51 +20,68 @@ def update_target_graph(from_scope, to_scope): class Worker(): + """ + The Worker class for training. Each worker has an individual port, number, and agent. + Each of them work with the global session, and use the global saver. + """ def __init__(self, port, number, agent): self.name = 'worker_' + str(number) self.number = number self.port = port + number def worker(): - start_game(self.port, quiet=True, max_game=-1) # Infinite games + start_game(self.port, quiet=True, max_game=-1) # Infinite games self.p = multiprocessing.Process(target=worker) self.p.start() time.sleep(1) - self.hlt = HLT(port=self.port) + self.hlt = HLT(port=self.port) # Launching the pipe operation self.agent = agent self.update_local_ops = update_target_graph('global', self.name) - def work(self, sess, coord, saver, n_simultations): + def work(self, sess, saver, n_simultations): + """ + Using the pipe operation launched at initialization, + the worker works `n_simultations` games to train the + agent + :param sess: The global session + :param saver: The saver + :param n_simultations: Number of max simulations to run. + Afterwards the process is stopped. + :return: + """ print("Starting worker " + str(self.number)) with sess.as_default(), sess.graph.as_default(): for i in range(n_simultations): # while not coord.should_stop(): - if (i % 10 == 1 and self.number == 0): + if i % 10 == 1 and self.number == 0: print("Simulation: " + str(i)) # self.port) sess.run(self.update_local_ops) # GET THE WORK DONE FROM OTHER - myID, game_map = self.hlt.get_init() + my_id, game_map = self.hlt.get_init() self.hlt.send_init("MyPythonBot") moves = [] game_states = [] - while (self.hlt.get_string() == 'Get map and play!'): + while self.hlt.get_string() == 'Get map and play!': game_map.get_frame(self.hlt.get_string()) - game_states += [getGameState(game_map, myID)] + game_states += [get_game_state(game_map, my_id)] moves += [self.agent.choose_actions(sess, game_states[-1])] - self.hlt.send_frame(formatMoves(game_map, moves[-1])) + self.hlt.send_frame(format_moves(game_map, moves[-1])) self.agent.experience.add_episode(game_states, moves) self.agent.update_agent(sess) if self.number == 0: - directory = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))+'/public/models/variables/'+self.agent.name+'/' + directory = os.path.abspath( + os.path.join(os.path.dirname(__file__), '..')) \ + + '/public/models/variables/' \ + + self.agent.name + '/' if not os.path.exists(directory): - print("Creating directory for agent :"+self.agent.name) + print("Creating directory for agent :" + self.agent.name) os.makedirs(directory) - saver.save(sess, directory+self.agent.name) - self.agent.experience.save_metric(directory+self.agent.name) + saver.save(sess, directory + self.agent.name) + self.agent.experience.save_metric(directory + self.agent.name) self.p.terminate() diff --git a/visualize/static/visualizer.js b/visualize/static/visualizer.js index 29c530a..8b50fca 100755 --- a/visualize/static/visualizer.js +++ b/visualize/static/visualizer.js @@ -553,7 +553,7 @@ function showGame(game, $container, maxWidth, maxHeight, showmovement, isminimal textPolicy[a][b][i].text = '' //(value==0)?'':value.toString() } - //console.log(discountedRewards[frame][Math.floor(loc / game.width)][loc % game.width]) + //console.log(discounted_rewards_function[frame][Math.floor(loc / game.width)][loc % game.width]) var pw = rw * Math.sqrt(site.strength > 0 ? site.strength / 255 : 0.1) / 2 var ph = rh * Math.sqrt(site.strength > 0 ? site.strength / 255 : 0.1) / 2; var direction = frame < game.moves.length ? game.moves[frame][Math.floor(loc / game.width)][loc % game.width] : 0; diff --git a/visualize/visualize.py b/visualize/visualize.py index fdd1b7f..6f2b352 100755 --- a/visualize/visualize.py +++ b/visualize/visualize.py @@ -1,19 +1,24 @@ +"""The visualize main file to launch the server""" import json import os import sys from io import BytesIO -import matplotlib.pyplot as plt import numpy as np +import pandas as pd from flask import Flask, render_template, request, make_response +import matplotlib.pyplot as plt from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas from matplotlib.figure import Figure -app = Flask(__name__) - sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) -from train.reward import discountedRewards -from public.models.bot.trainedBot import TrainedBot +try: + from train.reward import discounted_rewards_function + from public.models.bot.trainedBot import TrainedBot +except: + raise + +app = Flask(__name__) @app.route("/") @@ -26,9 +31,12 @@ def home(): @app.route("/performance.png") def performance_plot(): + """ + Plot the performance at this address + :return: + """ fig = Figure() sub1 = fig.add_subplot(111) - import pandas as pd path_to_variables = os.path.abspath(os.path.dirname(__file__)) + '/../public/models/variables/' list_variables = [name for name in os.listdir(path_to_variables) if name != "README.md"] path_to_npy = [path_to_variables + name + '/' + name + '.npy' for name in list_variables] @@ -36,8 +44,8 @@ def performance_plot(): rewards = [np.load(path) for path in path_to_npy] max_len = max([len(reward) for reward in rewards]) - for i in range(len(rewards)): - rewards[i] = np.append(rewards[i], np.repeat(np.nan, max_len - len(rewards[i]))) + for i, reward in enumerate(rewards): + rewards[i] = np.append(reward, np.repeat(np.nan, max_len - len(reward))) pd.DataFrame(np.array(rewards).T, columns=list_variables).rolling(100).mean().plot( title="Weighted reward at each game. (Rolling average)", ax=sub1) @@ -51,7 +59,12 @@ def performance_plot(): return response -def convert(request): +def convert(r): + """ + Convert the r to the game_states/moves tuple. + :param r: + :return: + """ def get_owner(square): return square['owner'] @@ -60,13 +73,13 @@ def get_strength(square): get_owner = np.vectorize(get_owner) get_strength = np.vectorize(get_strength) - owner_frames = get_owner(request.json["frames"])[:, np.newaxis, :, :] - strength_frames = get_strength(request.json["frames"])[:, np.newaxis, :, :] - production_frames = np.repeat(np.array(request.json["productions"])[np.newaxis, np.newaxis, :, :], + owner_frames = get_owner(r.json["frames"])[:, np.newaxis, :, :] + strength_frames = get_strength(r.json["frames"])[:, np.newaxis, :, :] + production_frames = np.repeat(np.array(r.json["productions"])[np.newaxis, np.newaxis, :, :], len(owner_frames), axis=0) - moves = np.array(request.json['moves']) + moves = np.array(r.json['moves']) game_states = np.concatenate(([owner_frames, strength_frames, production_frames]), axis=1) @@ -77,8 +90,8 @@ def get_strength(square): @app.route('/post_discounted_rewards', methods=['POST']) def post_discounted_rewards(): game_states, moves = convert(request) - discounted_rewards = discountedRewards(game_states, moves) - return json.dumps({'discountedRewards': discounted_rewards.tolist()}) + discounted_rewards = discounted_rewards_function(game_states, moves) + return json.dumps({'discounted_rewards_function': discounted_rewards.tolist()}) @app.route('/post_policies', methods=['POST'])