Skip to content

Commit

Permalink
Handier start_game and vis + minor bug correction:
Browse files Browse the repository at this point in the history
This change addresses the need by:
* For trained Bot, act greedily (no more randomness)
* Start_game now specify width or height
* Resolve bug related to width/height transfer from game_map to game_state
* Reward, separate, rawRewardMetric for comparing model, and rawReward which differs according to the agent.
* Visualizer even handier
* Convenience for the server
  • Loading branch information
Edouard360 committed Oct 4, 2017
1 parent 9631907 commit 3ce1211
Show file tree
Hide file tree
Showing 8 changed files with 50 additions and 20 deletions.
6 changes: 5 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,8 @@ clear-agent:

.PHONY: server
server:
cd visualize;export FLASK_APP=visualize.py;flask run
cd visualize;export FLASK_APP=visualize.py;flask run

.PHONY: debug-server
debug-server:
cd visualize;FLASK_APP=visualize.py FLASK_DEBUG=1 python -m flask run
1 change: 1 addition & 0 deletions docs/.config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
theme: jekyll-theme-cayman
9 changes: 5 additions & 4 deletions networking/start_game.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
import os


def start_game(port=2000, dim=10, max_strength=25, max_turn=25, max_game=1, silent_bool=True, timeout=True, quiet=True,
def start_game(port=2000, width=10,height=10, max_strength=25, max_turn=25, max_game=1, silent_bool=True, timeout=True, quiet=True,
n_pipe_players=1, slave_players=[]):
path_to_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
for i in range(n_pipe_players):
subprocess.call([path_to_root + "/networking/kill.sh", str(port + i)]) # Free the necessary ports
# subprocess.call([path_to_root + "/networking/kill.sh", str(port+1)]) # TODO automatic call to subprocess
halite = path_to_root + '/public/halite '
dimensions = '-d "' + str(dim) + ' ' + str(dim) + '" '
dimensions = '-d "' + str(height) + ' ' + str(width) + '" '

max_strength = '-z ' + str(max_strength) + ' '
max_turn = '-x ' + str(max_turn) + ' '
Expand Down Expand Up @@ -45,13 +45,14 @@ def start_game(port=2000, dim=10, max_strength=25, max_turn=25, max_game=1, sile
parser.add_argument("-j", "--silent", help="Doesn't print *.hlt file", action="store_true", default=False)
parser.add_argument("-q", "--quiet", help="quiet", action="store_true", default=False)
parser.add_argument("-s", "--strength", help="max strength", type=int, default=25)
parser.add_argument("-d", "--dimension", help="max dimension", type=int, default=10)
parser.add_argument("-dw", "--width", help="max width", type=int, default=10)
parser.add_argument("-dh", "--height", help="max height", type=int, default=10)
parser.add_argument("-m", "--maxturn", help="max turn", type=int, default=25)
parser.add_argument("-g", "--maxgame", help="max game", type=int, default=1) # -1 for infinite game
parser.add_argument("-pp", "--n_pipe_players", type=int, default=0)
parser.add_argument("-sp", "--slave_players", nargs='+', default=[])
args = parser.parse_args()
start_game(port=args.port, dim=args.dimension, max_strength=args.strength, max_turn=args.maxturn,
start_game(port=args.port, width=args.width,height=args.height, max_strength=args.strength, max_turn=args.maxturn,
silent_bool=args.silent, timeout=args.timeout, max_game=args.maxgame, quiet=args.quiet,
n_pipe_players=args.n_pipe_players,
slave_players=args.slave_players)
4 changes: 2 additions & 2 deletions public/models/agent/vanillaAgent.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@


class VanillaAgent(Agent):
def __init__(self, experience, lr = 1e-3, s_size = 9 * 3, a_size = 5, h_size = 50): # all these are optional ?
super(VanillaAgent, self).__init__('vanilla-ter', experience)
def __init__(self, experience, lr = 1e-2, s_size = 9 * 3, a_size = 5, h_size = 50): # all these are optional ?
super(VanillaAgent, self).__init__('vanilla-cin', experience)

# These lines established the feed-forward part of the network. The agent takes a state and produces an action.
self.state_in = tf.placeholder(shape=[None, s_size], dtype=tf.float32)
Expand Down
4 changes: 2 additions & 2 deletions public/models/bot/trainedBot.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

class TrainedBot(Bot):
def __init__(self):
lr = 1e-3;
lr = 5*1e-3;
s_size = 9 * 3;
a_size = 5;
h_size = 50
Expand All @@ -32,7 +32,7 @@ def __init__(self):

def compute_moves(self, game_map):
game_state = getGameState(game_map, self.myID)
return formatMoves(game_map, self.agent.choose_actions(self.sess, game_state))
return formatMoves(game_map, self.agent.choose_actions(self.sess, game_state, debug=True))

def get_policies(self, game_state):
# Warning this is not hereditary
Expand Down
4 changes: 2 additions & 2 deletions train/experience.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""
import numpy as np

from train.reward import allRewards, rawRewards
from train.reward import allRewards, rawRewardsMetric


class Experience:
Expand All @@ -24,7 +24,7 @@ def batch(self, size):
pass

def compute_metric(self, game_states):
production_increments = np.sum(np.sum(rawRewards(game_states), axis=2), axis=1)
production_increments = np.sum(np.sum(rawRewardsMetric(game_states), axis=2), axis=1)
self.metric = np.append(self.metric, production_increments.dot(np.linspace(2.0, 1.0, num=len(game_states) - 1)))

def save_metric(self, name):
Expand Down
30 changes: 24 additions & 6 deletions train/reward.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
def getGameState(game_map, myID):
game_state = np.reshape(
[[(square.owner == myID) + 0, square.strength, square.production] for square in game_map],
[game_map.width, game_map.height, 3])
[game_map.height, game_map.width, 3])
return np.swapaxes(np.swapaxes(game_state, 2, 0), 1, 2)


Expand Down Expand Up @@ -39,17 +39,36 @@ def discount_rewards(r, gamma=0.8):
discounted_r[t] = running_add
return discounted_r

def take_surrounding_square(game_state, x, y, size = 1):
return np.take(np.take(game_state, range(y - size, y + size + 1), axis=1, mode='wrap'),
range(x - size, x + size + 1), axis=2, mode='wrap')

def take_surrounding_losange(game_state, x, y, size = 2):
np.take(np.take(game_state, y, axis=1, mode='wrap'),
range(x - 2, x + 2 + 1), axis=2, mode='wrap')
np.take(np.take(game_state, y+1, axis=1, mode='wrap'),
range(x - 1, x + 1 + 1), axis=2, mode='wrap')
np.take(np.take(game_state, y-1, axis=1, mode='wrap'),
range(x - 1, x + 1 + 1), axis=2, mode='wrap')
np.take(np.take(game_state, y+2, axis=1, mode='wrap'),
x, axis=2, mode='wrap')
np.take(np.take(game_state, y-2, axis=1, mode='wrap'),
x, axis=2, mode='wrap')

def localStateFromGlobal(game_state, x, y, size=1):
# TODO: for now we still take a square, but a more complex shape could be better.
return np.take(np.take(game_state, range(y - size, y + size + 1), axis=1, mode='wrap'),
range(x - size, x + size + 1), axis=2, mode='wrap')


def rawRewards(game_states):
def rawRewardsMetric(game_states):
return np.array([game_states[i + 1][0] * game_states[i + 1][2] - game_states[i][0] * game_states[i][2]
for i in range(len(game_states) - 1)])

def rawRewards(game_states):
return np.array([0.0001*np.power(game_states[i + 1][0] * game_states[i + 1][2] - game_states[i][0] * game_states[i][2],4)
for i in range(len(game_states) - 1)])


def strengthRewards(game_states):
return np.array([(getStrength(game_states[i + 1]) - getStrength(game_states[i]))
Expand All @@ -68,7 +87,6 @@ def take_value(matrix, x, y):
if d != -1:
dy = (-1 if d == NORTH else 1) if (d == SOUTH or d == NORTH) else 0
dx = (-1 if d == WEST else 1) if (d == WEST or d == EAST) else 0
discount_factor = discount_factor if (d != STILL or discount_factor == 1.0) else 0.9
reward[y][x] = discount_factor * take_value(next_reward, x + dx, y + dy) if strength_before[y][
x] >= take_value(
strength_before, x + dx, y + dy) else 0
Expand All @@ -80,12 +98,12 @@ def discountedRewards(game_states, moves):
raw_rewards = rawRewards(game_states)
# strength_rewards = strengthRewards(game_states)
discounted_rewards = np.zeros_like(raw_rewards, dtype=np.float64)
running_reward = np.zeros_like(raw_rewards[0])
running_reward = np.zeros_like(raw_rewards[0], dtype=np.float64)
for t in reversed(range(0, len(raw_rewards))):
running_reward = discountedReward(running_reward, moves[t], game_states[t][1],
discount_factor=0.2) + discountedReward(
discount_factor=0.6) + discountedReward(
raw_rewards[t], moves[t], game_states[t][1])
discounted_rewards[t] = running_reward # + 0.2*(moves[t]==STILL)*(game_states[t][2])
discounted_rewards[t] = running_reward
##TODO : HERE FOR STRENGTH ! INDEPENDENT
return discounted_rewards

Expand Down
12 changes: 9 additions & 3 deletions visualize/static/visualizer.js
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,12 @@ function showGame(game, $container, maxWidth, maxHeight, showmovement, isminimal
textPossess[a][b].position = new PIXI.Point(rw * (sX+0.5) , rh * (sY+0.5));
textPossess[a][b].style.fill = "#ffffff";

textReward[a][b] = new PIXI.Text(site.owner.toString(),sty)
var style_1 = new PIXI.TextStyle({
fontFamily: 'Roboto',
fontSize: 20
});

textReward[a][b] = new PIXI.Text(site.owner.toString(),style_1)
textReward[a][b].anchor = new PIXI.Point(0.5, 0.5);
textReward[a][b].position = new PIXI.Point(rw * (sX+0.5) , rh * (sY+0.5));
textReward[a][b].style.fill = "#ffffff";
Expand All @@ -257,6 +262,7 @@ function showGame(game, $container, maxWidth, maxHeight, showmovement, isminimal
fontFamily: 'Roboto',
fontSize: 10
});

for(var j = 0; j < 5; j++){
textPolicy[a][b][j] = new PIXI.Text(site.owner.toString(),style_2)
textPolicy[a][b][j].position = new PIXI.Point(rw * (sX+0.5) , rh * (sY+0.5));
Expand Down Expand Up @@ -537,7 +543,7 @@ function showGame(game, $container, maxWidth, maxHeight, showmovement, isminimal
textPossess[a][b].text = site.owner.toString()
textProd[a][b].style.fill = (site.owner.toString()=="1")?"#04e6f2":"#ffffff";

textReward[a][b].text =(pressed[65] && discountedRewards!= undefined && frame!=lastFrame && site.owner.toString()=="1")?discountedRewards[frame][Math.floor(loc / game.width)][loc % game.width]:'';
textReward[a][b].text =(pressed[65] && discountedRewards!= undefined && frame!=lastFrame && site.owner.toString()=="1")?discountedRewards[frame][Math.floor(loc / game.width)][loc % game.width].toPrecision(2):'';


//policies[a][b].text = policies[frame][a][b] In fact there are five...
Expand Down Expand Up @@ -633,7 +639,7 @@ function showGame(game, $container, maxWidth, maxHeight, showmovement, isminimal
textPolicy[y][x][i].text = (value == 0) ? '' : value.toString()
}
if(pressed[85]){//u pressed
textReward[y][x].text =(discountedRewards!= undefined && frame!=lastFrame)?discountedRewards[frame][y][x]:'';
textReward[y][x].text =(discountedRewards!= undefined && frame!=lastFrame)?discountedRewards[frame][y][x].toPrecision(2):'';
}


Expand Down

0 comments on commit 3ce1211

Please sign in to comment.