Handier start_game and vis + minor bug correction:

This change addresses the need by: * For trained Bot, act greedily (no more randomness) * Start_game now specify width or height * Resolve bug related to width/height transfer from game_map to game_state * Reward, separate, rawRewardMetric for comparing model, and rawReward which differs according to the agent. * Visualizer even handier * Convenience for the server
Edouard360 · Oct 4, 2017 · 3ce1211 · 3ce1211
1 parent 9631907
commit 3ce1211
Show file tree

Hide file tree

Showing 8 changed files with 50 additions and 20 deletions.
diff --git a/Makefile b/Makefile
@@ -28,4 +28,8 @@ clear-agent:
 
 .PHONY: server
 server:
-	cd visualize;export FLASK_APP=visualize.py;flask run
+	cd visualize;export FLASK_APP=visualize.py;flask run
+
+.PHONY: debug-server
+debug-server:
+	cd visualize;FLASK_APP=visualize.py FLASK_DEBUG=1 python -m flask run
diff --git a/docs/.config.yml b/docs/.config.yml
@@ -0,0 +1 @@
+theme: jekyll-theme-cayman
diff --git a/networking/start_game.py b/networking/start_game.py
@@ -3,14 +3,14 @@
 import os
 
 
-def start_game(port=2000, dim=10, max_strength=25, max_turn=25, max_game=1, silent_bool=True, timeout=True, quiet=True,
+def start_game(port=2000, width=10,height=10, max_strength=25, max_turn=25, max_game=1, silent_bool=True, timeout=True, quiet=True,
                n_pipe_players=1, slave_players=[]):
     path_to_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
     for i in range(n_pipe_players):
         subprocess.call([path_to_root + "/networking/kill.sh", str(port + i)])  # Free the necessary ports
     # subprocess.call([path_to_root + "/networking/kill.sh", str(port+1)]) # TODO automatic call to subprocess
     halite = path_to_root + '/public/halite '
-    dimensions = '-d "' + str(dim) + ' ' + str(dim) + '" '
+    dimensions = '-d "' + str(height) + ' ' + str(width) + '" '
 
     max_strength = '-z ' + str(max_strength) + ' '
     max_turn = '-x ' + str(max_turn) + ' '
@@ -45,13 +45,14 @@ def start_game(port=2000, dim=10, max_strength=25, max_turn=25, max_game=1, sile
     parser.add_argument("-j", "--silent", help="Doesn't print *.hlt file", action="store_true", default=False)
     parser.add_argument("-q", "--quiet", help="quiet", action="store_true", default=False)
     parser.add_argument("-s", "--strength", help="max strength", type=int, default=25)
-    parser.add_argument("-d", "--dimension", help="max dimension", type=int, default=10)
+    parser.add_argument("-dw", "--width", help="max width", type=int, default=10)
+    parser.add_argument("-dh", "--height", help="max height", type=int, default=10)
     parser.add_argument("-m", "--maxturn", help="max turn", type=int, default=25)
     parser.add_argument("-g", "--maxgame", help="max game", type=int, default=1)  # -1 for infinite game
     parser.add_argument("-pp", "--n_pipe_players", type=int, default=0)
     parser.add_argument("-sp", "--slave_players", nargs='+', default=[])
     args = parser.parse_args()
-    start_game(port=args.port, dim=args.dimension, max_strength=args.strength, max_turn=args.maxturn,
+    start_game(port=args.port, width=args.width,height=args.height, max_strength=args.strength, max_turn=args.maxturn,
                silent_bool=args.silent, timeout=args.timeout, max_game=args.maxgame, quiet=args.quiet,
                n_pipe_players=args.n_pipe_players,
                slave_players=args.slave_players)
diff --git a/public/models/agent/vanillaAgent.py b/public/models/agent/vanillaAgent.py
@@ -6,8 +6,8 @@
 
 
 class VanillaAgent(Agent):
-    def __init__(self, experience, lr = 1e-3, s_size = 9 * 3, a_size = 5, h_size = 50):  # all these are optional ?
-        super(VanillaAgent, self).__init__('vanilla-ter', experience)
+    def __init__(self, experience, lr = 1e-2, s_size = 9 * 3, a_size = 5, h_size = 50):  # all these are optional ?
+        super(VanillaAgent, self).__init__('vanilla-cin', experience)
 
         # These lines established the feed-forward part of the network. The agent takes a state and produces an action.
         self.state_in = tf.placeholder(shape=[None, s_size], dtype=tf.float32)

diff --git a/public/models/bot/trainedBot.py b/public/models/bot/trainedBot.py
@@ -7,7 +7,7 @@
 
 class TrainedBot(Bot):
     def __init__(self):
-        lr = 1e-3;
+        lr = 5*1e-3;
         s_size = 9 * 3;
         a_size = 5;
         h_size = 50
@@ -32,7 +32,7 @@ def __init__(self):
 
     def compute_moves(self, game_map):
         game_state = getGameState(game_map, self.myID)
-        return formatMoves(game_map, self.agent.choose_actions(self.sess, game_state))
+        return formatMoves(game_map, self.agent.choose_actions(self.sess, game_state, debug=True))
 
     def get_policies(self, game_state):
         # Warning this is not hereditary

diff --git a/train/experience.py b/train/experience.py
@@ -3,7 +3,7 @@
 """
 import numpy as np
 
-from train.reward import allRewards, rawRewards
+from train.reward import allRewards, rawRewardsMetric
 
 
 class Experience:
@@ -24,7 +24,7 @@ def batch(self, size):
         pass
 
     def compute_metric(self, game_states):
-        production_increments = np.sum(np.sum(rawRewards(game_states), axis=2), axis=1)
+        production_increments = np.sum(np.sum(rawRewardsMetric(game_states), axis=2), axis=1)
         self.metric = np.append(self.metric, production_increments.dot(np.linspace(2.0, 1.0, num=len(game_states) - 1)))
 
     def save_metric(self, name):

diff --git a/train/reward.py b/train/reward.py
@@ -8,7 +8,7 @@
 def getGameState(game_map, myID):
     game_state = np.reshape(
         [[(square.owner == myID) + 0, square.strength, square.production] for square in game_map],
-        [game_map.width, game_map.height, 3])
+        [game_map.height, game_map.width, 3])
     return np.swapaxes(np.swapaxes(game_state, 2, 0), 1, 2)
 
 
@@ -39,17 +39,36 @@ def discount_rewards(r, gamma=0.8):
         discounted_r[t] = running_add
     return discounted_r
 
+def take_surrounding_square(game_state, x, y, size = 1):
+    return np.take(np.take(game_state, range(y - size, y + size + 1), axis=1, mode='wrap'),
+                   range(x - size, x + size + 1), axis=2, mode='wrap')
+
+def take_surrounding_losange(game_state, x, y, size = 2):
+    np.take(np.take(game_state, y, axis=1, mode='wrap'),
+                       range(x - 2, x + 2 + 1), axis=2, mode='wrap')
+    np.take(np.take(game_state, y+1, axis=1, mode='wrap'),
+                       range(x - 1, x + 1 + 1), axis=2, mode='wrap')
+    np.take(np.take(game_state, y-1, axis=1, mode='wrap'),
+                       range(x - 1, x + 1 + 1), axis=2, mode='wrap')
+    np.take(np.take(game_state, y+2, axis=1, mode='wrap'),
+                       x, axis=2, mode='wrap')
+    np.take(np.take(game_state, y-2, axis=1, mode='wrap'),
+                       x, axis=2, mode='wrap')
 
 def localStateFromGlobal(game_state, x, y, size=1):
     # TODO: for now we still take a square, but a more complex shape could be better.
     return np.take(np.take(game_state, range(y - size, y + size + 1), axis=1, mode='wrap'),
                    range(x - size, x + size + 1), axis=2, mode='wrap')
 
 
-def rawRewards(game_states):
+def rawRewardsMetric(game_states):
     return np.array([game_states[i + 1][0] * game_states[i + 1][2] - game_states[i][0] * game_states[i][2]
                      for i in range(len(game_states) - 1)])
 
+def rawRewards(game_states):
+    return np.array([0.0001*np.power(game_states[i + 1][0] * game_states[i + 1][2] - game_states[i][0] * game_states[i][2],4)
+                     for i in range(len(game_states) - 1)])
+
 
 def strengthRewards(game_states):
     return np.array([(getStrength(game_states[i + 1]) - getStrength(game_states[i]))
@@ -68,7 +87,6 @@ def take_value(matrix, x, y):
             if d != -1:
                 dy = (-1 if d == NORTH else 1) if (d == SOUTH or d == NORTH) else 0
                 dx = (-1 if d == WEST else 1) if (d == WEST or d == EAST) else 0
-                discount_factor = discount_factor if (d != STILL or discount_factor == 1.0) else 0.9
                 reward[y][x] = discount_factor * take_value(next_reward, x + dx, y + dy) if strength_before[y][
                                                                                                 x] >= take_value(
                     strength_before, x + dx, y + dy) else 0
@@ -80,12 +98,12 @@ def discountedRewards(game_states, moves):
     raw_rewards = rawRewards(game_states)
     # strength_rewards = strengthRewards(game_states)
     discounted_rewards = np.zeros_like(raw_rewards, dtype=np.float64)
-    running_reward = np.zeros_like(raw_rewards[0])
+    running_reward = np.zeros_like(raw_rewards[0], dtype=np.float64)
     for t in reversed(range(0, len(raw_rewards))):
         running_reward = discountedReward(running_reward, moves[t], game_states[t][1],
-                                          discount_factor=0.2) + discountedReward(
+                                          discount_factor=0.6) + discountedReward(
             raw_rewards[t], moves[t], game_states[t][1])
-        discounted_rewards[t] = running_reward  # + 0.2*(moves[t]==STILL)*(game_states[t][2])
+        discounted_rewards[t] = running_reward
         ##TODO : HERE FOR STRENGTH ! INDEPENDENT
     return discounted_rewards
 

diff --git a/visualize/static/visualizer.js b/visualize/static/visualizer.js
@@ -248,7 +248,12 @@ function showGame(game, $container, maxWidth, maxHeight, showmovement, isminimal
                 textPossess[a][b].position = new PIXI.Point(rw * (sX+0.5) , rh * (sY+0.5));
                 textPossess[a][b].style.fill = "#ffffff";
 
-                textReward[a][b] = new PIXI.Text(site.owner.toString(),sty)
+                var style_1 = new PIXI.TextStyle({
+                    fontFamily: 'Roboto',
+                    fontSize: 20
+                });
+
+                textReward[a][b] = new PIXI.Text(site.owner.toString(),style_1)
                 textReward[a][b].anchor = new PIXI.Point(0.5, 0.5);
                 textReward[a][b].position = new PIXI.Point(rw * (sX+0.5) , rh * (sY+0.5));
                 textReward[a][b].style.fill = "#ffffff";
@@ -257,6 +262,7 @@ function showGame(game, $container, maxWidth, maxHeight, showmovement, isminimal
                     fontFamily: 'Roboto',
                     fontSize: 10
                 });
+
                 for(var j = 0; j < 5; j++){
                     textPolicy[a][b][j] = new PIXI.Text(site.owner.toString(),style_2)
                     textPolicy[a][b][j].position = new PIXI.Point(rw * (sX+0.5) , rh * (sY+0.5));
@@ -537,7 +543,7 @@ function showGame(game, $container, maxWidth, maxHeight, showmovement, isminimal
                     textPossess[a][b].text = site.owner.toString()
                     textProd[a][b].style.fill = (site.owner.toString()=="1")?"#04e6f2":"#ffffff";
 
-                    textReward[a][b].text =(pressed[65] && discountedRewards!= undefined && frame!=lastFrame && site.owner.toString()=="1")?discountedRewards[frame][Math.floor(loc / game.width)][loc % game.width]:'';
+                    textReward[a][b].text =(pressed[65] && discountedRewards!= undefined && frame!=lastFrame && site.owner.toString()=="1")?discountedRewards[frame][Math.floor(loc / game.width)][loc % game.width].toPrecision(2):'';
 
 
                     //policies[a][b].text = policies[frame][a][b] In fact there are five...
@@ -633,7 +639,7 @@ function showGame(game, $container, maxWidth, maxHeight, showmovement, isminimal
                         textPolicy[y][x][i].text = (value == 0) ? '' : value.toString()
                     }
                     if(pressed[85]){//u pressed
-                        textReward[y][x].text =(discountedRewards!= undefined && frame!=lastFrame)?discountedRewards[frame][y][x]:'';
+                        textReward[y][x].text =(discountedRewards!= undefined && frame!=lastFrame)?discountedRewards[frame][y][x].toPrecision(2):'';
                     }