In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

**10**

In [13]:
# reset
%reset -f
from __future__ import print_function

import pickle
import os
import numpy as np
import tempfile
import tensorflow as tf
import matplotlib.pyplot as plt

from tf_rl.controller import DiscreteDeepQ, ModelController
from tf_rl.simulation import KarpathyGame
from tf_rl import simulate
from tf_rl.models import MLP
from collections import OrderedDict
from euclid import Vector2

from baselines import deepq
import baselines.common.tf_util as U
from baselines.deepq.replay_buffer import ReplayBuffer
from baselines.common.schedules import LinearSchedule
tf.reset_default_graph()


# doing stuff
current_settings = {
    #earlier objects are eaten by later objects (pred eat prey)
    'objects': [
        'prey',
        'pred',
        'cue',
    ],
    'colors': {
        'prey': [212, 211, 208],
        'pred':  [100, 37, 0],
        'cue': [0,0,0],
    },
    'object_reward': {
        'prey': {'prey': 0.1, 'pred': -0.1, 'cue': 0.0},
        'pred': {'prey': 1.0, 'pred': -1.0, 'cue': 0.0},
    },
    'hero_bounces_off_walls': False,
    'world_size': (500,300),   
    "maximum_velocity":      {'prey': 0, 'pred': 50},
    "object_radius": 10.0,
    "cue_types": 2,
    "num_objects": OrderedDict([('prey', 5), ('pred', 5), ('cue', 1)]),
    # active means that the objects are learning
    "num_objects_active": OrderedDict([('prey', 0), ('pred', 5)]), 
    #'multiple' to create each DQN for each prey/predator
    #'one' to use one DQN for all preys/predators
    # only really matters if the preys/predators are active
    "network_prey": 'one',
    "network_pred": 'multiple',
    "num_observation_lines" : 32,
    "observation_line_length": 75.,
    "tolerable_distance_to_wall": 50,
    "wall_distance_penalty":  -1.0,
    "delta_v": 50
}

#'new' to create new sim with values above
#'load' to load a previously trained graph
RUN = 'load'  

# First three for names for saving new runs
ELAPSE_NAME = 'elapse-5preds10-load-100weight-u75'
REWARDS_NAME = 'rewards-5preds10-load-100weight-u75'
COLLISIONS_NAME = 'collsions-5preds10-load-100weight-u75'

# Last two for names for reloading model/replay buffers
MODEL_RE = '5preds-10w/model-5pred-2cues-500trials-10weight-2603626.ckpt'
REPLAY_RE = '5preds-10w/replay-5pred-2cues-500trials-10weight-2603626.pkl'

# create the game simulator
g = KarpathyGame(current_settings)

tf.reset_default_graph()

all_act = []
all_train = []
all_update = []
all_debug = []
all_replay = []

# Build graphs
if current_settings['num_objects_active']['pred'] != 0:
    if current_settings['network_pred'] == 'one':
        network_pred = 1
    else:
        network_pred = current_settings['num_objects_active']['pred']

    for i in range(network_pred):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            brain_pred = deepq.models.mlp([200, 200])
            act, train, update_target, debug = deepq.build_train(
                make_obs_ph=lambda name: U.BatchInput((g.observation_size,), name=name),
                q_func=brain_pred,
                num_actions=g.num_actions,
                optimizer=tf.train.AdamOptimizer(learning_rate=5e-4),
            )
        replay_buffer = ReplayBuffer(50000)
        all_replay.append(replay_buffer)
        all_act.append(act)
        all_train.append(train)
        all_update.append(update_target)
        all_debug.append(debug)
        
FPS          = 30
ACTION_EVERY = 3
    
fast_mode = False
if fast_mode:
    WAIT, VISUALIZE_EVERY = False, 100
else:
    WAIT, VISUALIZE_EVERY = True, 1

elapsed = []
rewards = []
timesteps = [0]
    
# Initializing or reloading variables
# Start TensorFlow session with 2 CPUs
with U.make_session(2) as sess:
    
    # Initialize the parameters and copy them to the target network.
    U.initialize()
    for i in range(current_settings['num_objects_active']['prey']):
        name = 'prey' + str(i)
        with tf.variable_scope(name):
            update_target()
    for i in range(current_settings['num_objects_active']['pred']):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            update_target()
            
    if RUN == 'load':
#         # when only restoring a subset of variables
#         restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope = 'pred0')
#         saver = tf.train.Saver(restore)

        # reload models
        saver = tf.train.Saver()
        current_dir = os.getcwd()
        model_name = current_dir + '/saved_graphs/' + MODEL_RE 
        saver.restore(sess, model_name)
        # reload replay buffers
        replay_name = current_dir + '/saved_graphs/' + REPLAY_RE 
        with open(replay_name, 'rb') as f:
            all_replay = pickle.load(f)
            
#         # remember to append buffer if restoring a subset of variables
#         all_replay.append(replay_buffer)
    
    # Run simulation
    try:
        simulate(simulation=g,
                 replay = all_replay,
                 act = all_act,
                 train = all_train,
                 update = all_update,
                 debug = all_debug,
                 fps=FPS,
                 visualize_every=VISUALIZE_EVERY,
                 action_every=ACTION_EVERY,
                 wait=WAIT,
                 disable_training=False,
                 simulation_resolution=.001,
                 save_path=None,
                 timesteps = timesteps,
                 elapsed = elapsed,
                 all_rewards = rewards,
                 percent = 1)
        
    except KeyboardInterrupt:
        print("Interrupted")
        g.shut_down_graphics()
        print('graphics shut down')
        
    # Save trial times
    elapse_name = 'saved_graphs/' + ELAPSE_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(elapse_name, "wb") as f:
        pickle.dump(elapsed, f)
        
    # Save rewards
    rewards_name = 'saved_graphs/' + REWARDS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(rewards_name, "wb") as f:
        pickle.dump(rewards, f)
        
    # Save collisions
    collisions_name = 'saved_graphs/' + COLLISIONS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(collisions_name, "wb") as f:
        pickle.dump(g.collisions, f)



[2017-07-26 13:49:39,905] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 13:49:39,974] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 13:49:40,497] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 13:49:40,597] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 13:49:41,119] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 13:49:41,198] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 13:49:41,746] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 13:49:41,831] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 13:49:42,424] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 13:49:42,494] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.


INFO:tensorflow:Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/5preds-10w/model-5pred-2cues-500trials-10weight-2603626.ckpt


[2017-07-26 13:49:44,596] Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/5preds-10w/model-5pred-2cues-500trials-10weight-2603626.ckpt


0
Interrupted
graphics shut down


**20**

In [3]:
# reset
%reset -f
from __future__ import print_function

import pickle
import os
import numpy as np
import tempfile
import tensorflow as tf
import matplotlib.pyplot as plt

from tf_rl.controller import DiscreteDeepQ, ModelController
from tf_rl.simulation import KarpathyGame
from tf_rl import simulate
from tf_rl.models import MLP
from collections import OrderedDict
from euclid import Vector2

from baselines import deepq
import baselines.common.tf_util as U
from baselines.deepq.replay_buffer import ReplayBuffer
from baselines.common.schedules import LinearSchedule
tf.reset_default_graph()


# doing stuff
current_settings = {
    #earlier objects are eaten by later objects (pred eat prey)
    'objects': [
        'prey',
        'pred',
        'cue',
    ],
    'colors': {
        'prey': [212, 211, 208],
        'pred':  [100, 37, 0],
        'cue': [0,0,0],
    },
    'object_reward': {
        'prey': {'prey': 0.1, 'pred': -0.1, 'cue': 0.0},
        'pred': {'prey': 1.0, 'pred': -1.0, 'cue': 0.0},
    },
    'hero_bounces_off_walls': False,
    'world_size': (500,300),   
    "maximum_velocity":      {'prey': 0, 'pred': 50},
    "object_radius": 10.0,
    "cue_types": 2,
    "num_objects": OrderedDict([('prey', 5), ('pred', 5), ('cue', 1)]),
    # active means that the objects are learning
    "num_objects_active": OrderedDict([('prey', 0), ('pred', 5)]), 
    #'multiple' to create each DQN for each prey/predator
    #'one' to use one DQN for all preys/predators
    # only really matters if the preys/predators are active
    "network_prey": 'one',
    "network_pred": 'multiple',
    "num_observation_lines" : 32,
    "observation_line_length": 75.,
    "tolerable_distance_to_wall": 50,
    "wall_distance_penalty":  -1.0,
    "delta_v": 50
}

#'new' to create new sim with values above
#'load' to load a previously trained graph
RUN = 'load'  

# First three for names for saving new runs
ELAPSE_NAME = 'elapse-5preds20-load-100weight-u75'
REWARDS_NAME = 'rewards-5preds20-load-100weight-u75'
COLLISIONS_NAME = 'collsions-5preds20-load-100weight-u75'

# Last two for names for reloading model/replay buffers
MODEL_RE = '5preds-20w/model-5pred-2cues-500trials-20weight-1561231.ckpt'
REPLAY_RE = '5preds-20w/replay-5pred-2cues-500trials-20weight-1561231.pkl'

# create the game simulator
g = KarpathyGame(current_settings)

tf.reset_default_graph()

all_act = []
all_train = []
all_update = []
all_debug = []
all_replay = []

# Build graphs
if current_settings['num_objects_active']['pred'] != 0:
    if current_settings['network_pred'] == 'one':
        network_pred = 1
    else:
        network_pred = current_settings['num_objects_active']['pred']

    for i in range(network_pred):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            brain_pred = deepq.models.mlp([200, 200])
            act, train, update_target, debug = deepq.build_train(
                make_obs_ph=lambda name: U.BatchInput((g.observation_size,), name=name),
                q_func=brain_pred,
                num_actions=g.num_actions,
                optimizer=tf.train.AdamOptimizer(learning_rate=5e-4),
            )
        replay_buffer = ReplayBuffer(50000)
        all_replay.append(replay_buffer)
        all_act.append(act)
        all_train.append(train)
        all_update.append(update_target)
        all_debug.append(debug)
        
FPS          = 30
ACTION_EVERY = 3
    
fast_mode = False
if fast_mode:
    WAIT, VISUALIZE_EVERY = False, 100
else:
    WAIT, VISUALIZE_EVERY = True, 1

elapsed = []
rewards = []
timesteps = [0]
    
# Initializing or reloading variables
# Start TensorFlow session with 2 CPUs
with U.make_session(2) as sess:
    
    # Initialize the parameters and copy them to the target network.
    U.initialize()
    for i in range(current_settings['num_objects_active']['prey']):
        name = 'prey' + str(i)
        with tf.variable_scope(name):
            update_target()
    for i in range(current_settings['num_objects_active']['pred']):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            update_target()
            
    if RUN == 'load':
#         # when only restoring a subset of variables
#         restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope = 'pred0')
#         saver = tf.train.Saver(restore)

        # reload models
        saver = tf.train.Saver()
        current_dir = os.getcwd()
        model_name = current_dir + '/saved_graphs/' + MODEL_RE 
        saver.restore(sess, model_name)
        # reload replay buffers
        replay_name = current_dir + '/saved_graphs/' + REPLAY_RE 
        with open(replay_name, 'rb') as f:
            all_replay = pickle.load(f)
            
#         # remember to append buffer if restoring a subset of variables
#         all_replay.append(replay_buffer)
    
    # Run simulation
    try:
        simulate(simulation=g,
                 replay = all_replay,
                 act = all_act,
                 train = all_train,
                 update = all_update,
                 debug = all_debug,
                 fps=FPS,
                 visualize_every=VISUALIZE_EVERY,
                 action_every=ACTION_EVERY,
                 wait=WAIT,
                 disable_training=False,
                 simulation_resolution=.001,
                 save_path=None,
                 timesteps = timesteps,
                 elapsed = elapsed,
                 all_rewards = rewards,
                 percent = 1)
        
    except KeyboardInterrupt:
        print("Interrupted")
        g.shut_down_graphics()
        print('graphics shut down')
        
    # Save trial times
    elapse_name = 'saved_graphs/' + ELAPSE_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(elapse_name, "wb") as f:
        pickle.dump(elapsed, f)
        
    # Save rewards
    rewards_name = 'saved_graphs/' + REWARDS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(rewards_name, "wb") as f:
        pickle.dump(rewards, f)
        
    # Save collisions
    collisions_name = 'saved_graphs/' + COLLISIONS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(collisions_name, "wb") as f:
        pickle.dump(g.collisions, f)



[2017-07-26 10:35:33,619] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 10:35:33,704] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 10:35:34,151] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 10:35:34,218] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 10:35:34,651] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 10:35:34,719] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 10:35:35,309] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 10:35:35,379] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 10:35:35,857] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 10:35:35,925] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.


INFO:tensorflow:Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/5preds-20w/model-5pred-2cues-500trials-20weight-1561231.ckpt


[2017-07-26 10:35:38,046] Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/5preds-20w/model-5pred-2cues-500trials-20weight-1561231.ckpt


0
3
9
11
15
18
24
29
37
42
47
48
55
59
60
69
76
89
96
96
done


**30**

In [4]:
# reset
%reset -f
from __future__ import print_function

import pickle
import os
import numpy as np
import tempfile
import tensorflow as tf
import matplotlib.pyplot as plt

from tf_rl.controller import DiscreteDeepQ, ModelController
from tf_rl.simulation import KarpathyGame
from tf_rl import simulate
from tf_rl.models import MLP
from collections import OrderedDict
from euclid import Vector2

from baselines import deepq
import baselines.common.tf_util as U
from baselines.deepq.replay_buffer import ReplayBuffer
from baselines.common.schedules import LinearSchedule
tf.reset_default_graph()


# doing stuff
current_settings = {
    #earlier objects are eaten by later objects (pred eat prey)
    'objects': [
        'prey',
        'pred',
        'cue',
    ],
    'colors': {
        'prey': [212, 211, 208],
        'pred':  [100, 37, 0],
        'cue': [0,0,0],
    },
    'object_reward': {
        'prey': {'prey': 0.1, 'pred': -0.1, 'cue': 0.0},
        'pred': {'prey': 1.0, 'pred': -1.0, 'cue': 0.0},
    },
    'hero_bounces_off_walls': False,
    'world_size': (500,300),   
    "maximum_velocity":      {'prey': 0, 'pred': 50},
    "object_radius": 10.0,
    "cue_types": 2,
    "num_objects": OrderedDict([('prey', 5), ('pred', 5), ('cue', 1)]),
    # active means that the objects are learning
    "num_objects_active": OrderedDict([('prey', 0), ('pred', 5)]), 
    #'multiple' to create each DQN for each prey/predator
    #'one' to use one DQN for all preys/predators
    # only really matters if the preys/predators are active
    "network_prey": 'one',
    "network_pred": 'multiple',
    "num_observation_lines" : 32,
    "observation_line_length": 75.,
    "tolerable_distance_to_wall": 50,
    "wall_distance_penalty":  -1.0,
    "delta_v": 50
}

#'new' to create new sim with values above
#'load' to load a previously trained graph
RUN = 'load'  

# First three for names for saving new runs
ELAPSE_NAME = 'elapse-5preds30-load-100weight-u75'
REWARDS_NAME = 'rewards-5preds30-load-100weight-u75'
COLLISIONS_NAME = 'collsions-5preds30-load-100weight-u75'

# Last two for names for reloading model/replay buffers
MODEL_RE = '5preds-30w/model-5pred-2cues-500trials-30weight-903616.ckpt'
REPLAY_RE = '5preds-30w/replay-5pred-2cues-500trials-30weight-903616.pkl'

# create the game simulator
g = KarpathyGame(current_settings)

tf.reset_default_graph()

all_act = []
all_train = []
all_update = []
all_debug = []
all_replay = []

# Build graphs
if current_settings['num_objects_active']['pred'] != 0:
    if current_settings['network_pred'] == 'one':
        network_pred = 1
    else:
        network_pred = current_settings['num_objects_active']['pred']

    for i in range(network_pred):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            brain_pred = deepq.models.mlp([200, 200])
            act, train, update_target, debug = deepq.build_train(
                make_obs_ph=lambda name: U.BatchInput((g.observation_size,), name=name),
                q_func=brain_pred,
                num_actions=g.num_actions,
                optimizer=tf.train.AdamOptimizer(learning_rate=5e-4),
            )
        replay_buffer = ReplayBuffer(50000)
        all_replay.append(replay_buffer)
        all_act.append(act)
        all_train.append(train)
        all_update.append(update_target)
        all_debug.append(debug)
        
FPS          = 30
ACTION_EVERY = 3
    
fast_mode = False
if fast_mode:
    WAIT, VISUALIZE_EVERY = False, 100
else:
    WAIT, VISUALIZE_EVERY = True, 1

elapsed = []
rewards = []
timesteps = [0]
    
# Initializing or reloading variables
# Start TensorFlow session with 2 CPUs
with U.make_session(2) as sess:
    
    # Initialize the parameters and copy them to the target network.
    U.initialize()
    for i in range(current_settings['num_objects_active']['prey']):
        name = 'prey' + str(i)
        with tf.variable_scope(name):
            update_target()
    for i in range(current_settings['num_objects_active']['pred']):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            update_target()
            
    if RUN == 'load':
#         # when only restoring a subset of variables
#         restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope = 'pred0')
#         saver = tf.train.Saver(restore)

        # reload models
        saver = tf.train.Saver()
        current_dir = os.getcwd()
        model_name = current_dir + '/saved_graphs/' + MODEL_RE 
        saver.restore(sess, model_name)
        # reload replay buffers
        replay_name = current_dir + '/saved_graphs/' + REPLAY_RE 
        with open(replay_name, 'rb') as f:
            all_replay = pickle.load(f)
            
#         # remember to append buffer if restoring a subset of variables
#         all_replay.append(replay_buffer)
    
    # Run simulation
    try:
        simulate(simulation=g,
                 replay = all_replay,
                 act = all_act,
                 train = all_train,
                 update = all_update,
                 debug = all_debug,
                 fps=FPS,
                 visualize_every=VISUALIZE_EVERY,
                 action_every=ACTION_EVERY,
                 wait=WAIT,
                 disable_training=False,
                 simulation_resolution=.001,
                 save_path=None,
                 timesteps = timesteps,
                 elapsed = elapsed,
                 all_rewards = rewards,
                 percent = 1)
        
    except KeyboardInterrupt:
        print("Interrupted")
        g.shut_down_graphics()
        print('graphics shut down')
        
    # Save trial times
    elapse_name = 'saved_graphs/' + ELAPSE_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(elapse_name, "wb") as f:
        pickle.dump(elapsed, f)
        
    # Save rewards
    rewards_name = 'saved_graphs/' + REWARDS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(rewards_name, "wb") as f:
        pickle.dump(rewards, f)
        
    # Save collisions
    collisions_name = 'saved_graphs/' + COLLISIONS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(collisions_name, "wb") as f:
        pickle.dump(g.collisions, f)



[2017-07-26 10:56:40,686] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 10:56:40,762] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 10:56:41,253] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 10:56:41,326] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 10:56:41,805] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 10:56:41,875] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 10:56:42,336] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 10:56:42,420] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 10:56:42,878] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 10:56:42,950] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.


INFO:tensorflow:Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/5preds-30w/model-5pred-2cues-500trials-30weight-903616.ckpt


[2017-07-26 10:56:45,033] Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/5preds-30w/model-5pred-2cues-500trials-30weight-903616.ckpt


0
2
8
13
19
29
38
56
79
92
done


**40**

In [5]:
# reset
%reset -f
from __future__ import print_function

import pickle
import os
import numpy as np
import tempfile
import tensorflow as tf
import matplotlib.pyplot as plt

from tf_rl.controller import DiscreteDeepQ, ModelController
from tf_rl.simulation import KarpathyGame
from tf_rl import simulate
from tf_rl.models import MLP
from collections import OrderedDict
from euclid import Vector2

from baselines import deepq
import baselines.common.tf_util as U
from baselines.deepq.replay_buffer import ReplayBuffer
from baselines.common.schedules import LinearSchedule
tf.reset_default_graph()


# doing stuff
current_settings = {
    #earlier objects are eaten by later objects (pred eat prey)
    'objects': [
        'prey',
        'pred',
        'cue',
    ],
    'colors': {
        'prey': [212, 211, 208],
        'pred':  [100, 37, 0],
        'cue': [0,0,0],
    },
    'object_reward': {
        'prey': {'prey': 0.1, 'pred': -0.1, 'cue': 0.0},
        'pred': {'prey': 1.0, 'pred': -1.0, 'cue': 0.0},
    },
    'hero_bounces_off_walls': False,
    'world_size': (500,300),   
    "maximum_velocity":      {'prey': 0, 'pred': 50},
    "object_radius": 10.0,
    "cue_types": 2,
    "num_objects": OrderedDict([('prey', 5), ('pred', 5), ('cue', 1)]),
    # active means that the objects are learning
    "num_objects_active": OrderedDict([('prey', 0), ('pred', 5)]), 
    #'multiple' to create each DQN for each prey/predator
    #'one' to use one DQN for all preys/predators
    # only really matters if the preys/predators are active
    "network_prey": 'one',
    "network_pred": 'multiple',
    "num_observation_lines" : 32,
    "observation_line_length": 75.,
    "tolerable_distance_to_wall": 50,
    "wall_distance_penalty":  -1.0,
    "delta_v": 50
}

#'new' to create new sim with values above
#'load' to load a previously trained graph
RUN = 'load'  

# First three for names for saving new runs
ELAPSE_NAME = 'elapse-5preds40-load-100weight-u75'
REWARDS_NAME = 'rewards-5preds40-load-100weight-u75'
COLLISIONS_NAME = 'collsions-5preds40-load-100weight-u75'

# Last two for names for reloading model/replay buffers
MODEL_RE = '5preds-40w/model-5pred-2cues-500trials-40weight-579358.ckpt'
REPLAY_RE = '5preds-40w/replay-5pred-2cues-500trials-40weight-579358.pkl'

# create the game simulator
g = KarpathyGame(current_settings)

tf.reset_default_graph()

all_act = []
all_train = []
all_update = []
all_debug = []
all_replay = []

# Build graphs
if current_settings['num_objects_active']['pred'] != 0:
    if current_settings['network_pred'] == 'one':
        network_pred = 1
    else:
        network_pred = current_settings['num_objects_active']['pred']

    for i in range(network_pred):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            brain_pred = deepq.models.mlp([200, 200])
            act, train, update_target, debug = deepq.build_train(
                make_obs_ph=lambda name: U.BatchInput((g.observation_size,), name=name),
                q_func=brain_pred,
                num_actions=g.num_actions,
                optimizer=tf.train.AdamOptimizer(learning_rate=5e-4),
            )
        replay_buffer = ReplayBuffer(50000)
        all_replay.append(replay_buffer)
        all_act.append(act)
        all_train.append(train)
        all_update.append(update_target)
        all_debug.append(debug)
        
FPS          = 30
ACTION_EVERY = 3
    
fast_mode = False
if fast_mode:
    WAIT, VISUALIZE_EVERY = False, 100
else:
    WAIT, VISUALIZE_EVERY = True, 1

elapsed = []
rewards = []
timesteps = [0]
    
# Initializing or reloading variables
# Start TensorFlow session with 2 CPUs
with U.make_session(2) as sess:
    
    # Initialize the parameters and copy them to the target network.
    U.initialize()
    for i in range(current_settings['num_objects_active']['prey']):
        name = 'prey' + str(i)
        with tf.variable_scope(name):
            update_target()
    for i in range(current_settings['num_objects_active']['pred']):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            update_target()
            
    if RUN == 'load':
#         # when only restoring a subset of variables
#         restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope = 'pred0')
#         saver = tf.train.Saver(restore)

        # reload models
        saver = tf.train.Saver()
        current_dir = os.getcwd()
        model_name = current_dir + '/saved_graphs/' + MODEL_RE 
        saver.restore(sess, model_name)
        # reload replay buffers
        replay_name = current_dir + '/saved_graphs/' + REPLAY_RE 
        with open(replay_name, 'rb') as f:
            all_replay = pickle.load(f)
            
#         # remember to append buffer if restoring a subset of variables
#         all_replay.append(replay_buffer)
    
    # Run simulation
    try:
        simulate(simulation=g,
                 replay = all_replay,
                 act = all_act,
                 train = all_train,
                 update = all_update,
                 debug = all_debug,
                 fps=FPS,
                 visualize_every=VISUALIZE_EVERY,
                 action_every=ACTION_EVERY,
                 wait=WAIT,
                 disable_training=False,
                 simulation_resolution=.001,
                 save_path=None,
                 timesteps = timesteps,
                 elapsed = elapsed,
                 all_rewards = rewards,
                 percent = 1)
        
    except KeyboardInterrupt:
        print("Interrupted")
        g.shut_down_graphics()
        print('graphics shut down')
        
    # Save trial times
    elapse_name = 'saved_graphs/' + ELAPSE_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(elapse_name, "wb") as f:
        pickle.dump(elapsed, f)
        
    # Save rewards
    rewards_name = 'saved_graphs/' + REWARDS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(rewards_name, "wb") as f:
        pickle.dump(rewards, f)
        
    # Save collisions
    collisions_name = 'saved_graphs/' + COLLISIONS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(collisions_name, "wb") as f:
        pickle.dump(g.collisions, f)



[2017-07-26 11:08:18,286] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:08:18,357] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:08:18,776] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:08:18,852] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:08:19,272] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:08:19,351] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:08:19,768] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:08:19,837] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:08:20,265] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:08:20,333] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.


INFO:tensorflow:Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/5preds-40w/model-5pred-2cues-500trials-40weight-579358.ckpt


[2017-07-26 11:08:22,459] Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/5preds-40w/model-5pred-2cues-500trials-40weight-579358.ckpt


0
10
31
47
62
74
91
done


**50**

In [6]:
# reset
%reset -f
from __future__ import print_function

import pickle
import os
import numpy as np
import tempfile
import tensorflow as tf
import matplotlib.pyplot as plt

from tf_rl.controller import DiscreteDeepQ, ModelController
from tf_rl.simulation import KarpathyGame
from tf_rl import simulate
from tf_rl.models import MLP
from collections import OrderedDict
from euclid import Vector2

from baselines import deepq
import baselines.common.tf_util as U
from baselines.deepq.replay_buffer import ReplayBuffer
from baselines.common.schedules import LinearSchedule
tf.reset_default_graph()


# doing stuff
current_settings = {
    #earlier objects are eaten by later objects (pred eat prey)
    'objects': [
        'prey',
        'pred',
        'cue',
    ],
    'colors': {
        'prey': [212, 211, 208],
        'pred':  [100, 37, 0],
        'cue': [0,0,0],
    },
    'object_reward': {
        'prey': {'prey': 0.1, 'pred': -0.1, 'cue': 0.0},
        'pred': {'prey': 1.0, 'pred': -1.0, 'cue': 0.0},
    },
    'hero_bounces_off_walls': False,
    'world_size': (500,300),   
    "maximum_velocity":      {'prey': 0, 'pred': 50},
    "object_radius": 10.0,
    "cue_types": 2,
    "num_objects": OrderedDict([('prey', 5), ('pred', 5), ('cue', 1)]),
    # active means that the objects are learning
    "num_objects_active": OrderedDict([('prey', 0), ('pred', 5)]), 
    #'multiple' to create each DQN for each prey/predator
    #'one' to use one DQN for all preys/predators
    # only really matters if the preys/predators are active
    "network_prey": 'one',
    "network_pred": 'multiple',
    "num_observation_lines" : 32,
    "observation_line_length": 75.,
    "tolerable_distance_to_wall": 50,
    "wall_distance_penalty":  -1.0,
    "delta_v": 50
}

#'new' to create new sim with values above
#'load' to load a previously trained graph
RUN = 'load'  

# First three for names for saving new runs
ELAPSE_NAME = 'elapse-5preds50-load-100weight-u75'
REWARDS_NAME = 'rewards-5preds50-load-100weight-u75'
COLLISIONS_NAME = 'collsions-5preds50-load-100weight-u75'

# Last two for names for reloading model/replay buffers
MODEL_RE = '5preds-50w/model-5pred-2cues-500trials-50weight-516364.ckpt'
REPLAY_RE = '5preds-50w/replay-5pred-2cues-500trials-50weight-516364.pkl'

# create the game simulator
g = KarpathyGame(current_settings)

tf.reset_default_graph()

all_act = []
all_train = []
all_update = []
all_debug = []
all_replay = []

# Build graphs
if current_settings['num_objects_active']['pred'] != 0:
    if current_settings['network_pred'] == 'one':
        network_pred = 1
    else:
        network_pred = current_settings['num_objects_active']['pred']

    for i in range(network_pred):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            brain_pred = deepq.models.mlp([200, 200])
            act, train, update_target, debug = deepq.build_train(
                make_obs_ph=lambda name: U.BatchInput((g.observation_size,), name=name),
                q_func=brain_pred,
                num_actions=g.num_actions,
                optimizer=tf.train.AdamOptimizer(learning_rate=5e-4),
            )
        replay_buffer = ReplayBuffer(50000)
        all_replay.append(replay_buffer)
        all_act.append(act)
        all_train.append(train)
        all_update.append(update_target)
        all_debug.append(debug)
        
FPS          = 30
ACTION_EVERY = 3
    
fast_mode = False
if fast_mode:
    WAIT, VISUALIZE_EVERY = False, 100
else:
    WAIT, VISUALIZE_EVERY = True, 1

elapsed = []
rewards = []
timesteps = [0]
    
# Initializing or reloading variables
# Start TensorFlow session with 2 CPUs
with U.make_session(2) as sess:
    
    # Initialize the parameters and copy them to the target network.
    U.initialize()
    for i in range(current_settings['num_objects_active']['prey']):
        name = 'prey' + str(i)
        with tf.variable_scope(name):
            update_target()
    for i in range(current_settings['num_objects_active']['pred']):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            update_target()
            
    if RUN == 'load':
#         # when only restoring a subset of variables
#         restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope = 'pred0')
#         saver = tf.train.Saver(restore)

        # reload models
        saver = tf.train.Saver()
        current_dir = os.getcwd()
        model_name = current_dir + '/saved_graphs/' + MODEL_RE 
        saver.restore(sess, model_name)
        # reload replay buffers
        replay_name = current_dir + '/saved_graphs/' + REPLAY_RE 
        with open(replay_name, 'rb') as f:
            all_replay = pickle.load(f)
            
#         # remember to append buffer if restoring a subset of variables
#         all_replay.append(replay_buffer)
    
    # Run simulation
    try:
        simulate(simulation=g,
                 replay = all_replay,
                 act = all_act,
                 train = all_train,
                 update = all_update,
                 debug = all_debug,
                 fps=FPS,
                 visualize_every=VISUALIZE_EVERY,
                 action_every=ACTION_EVERY,
                 wait=WAIT,
                 disable_training=False,
                 simulation_resolution=.001,
                 save_path=None,
                 timesteps = timesteps,
                 elapsed = elapsed,
                 all_rewards = rewards,
                 percent = 1)
        
    except KeyboardInterrupt:
        print("Interrupted")
        g.shut_down_graphics()
        print('graphics shut down')
        
    # Save trial times
    elapse_name = 'saved_graphs/' + ELAPSE_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(elapse_name, "wb") as f:
        pickle.dump(elapsed, f)
        
    # Save rewards
    rewards_name = 'saved_graphs/' + REWARDS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(rewards_name, "wb") as f:
        pickle.dump(rewards, f)
        
    # Save collisions
    collisions_name = 'saved_graphs/' + COLLISIONS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(collisions_name, "wb") as f:
        pickle.dump(g.collisions, f)



[2017-07-26 11:16:30,342] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:16:30,453] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:16:31,094] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:16:31,170] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:16:31,769] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:16:31,864] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:16:32,535] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:16:32,619] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:16:33,239] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:16:33,332] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.


INFO:tensorflow:Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/5preds-50w/model-5pred-2cues-500trials-50weight-516364.ckpt


[2017-07-26 11:16:36,366] Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/5preds-50w/model-5pred-2cues-500trials-50weight-516364.ckpt


0
15
36
53
70
85
done


**60**

In [7]:
# reset
%reset -f
from __future__ import print_function

import pickle
import os
import numpy as np
import tempfile
import tensorflow as tf
import matplotlib.pyplot as plt

from tf_rl.controller import DiscreteDeepQ, ModelController
from tf_rl.simulation import KarpathyGame
from tf_rl import simulate
from tf_rl.models import MLP
from collections import OrderedDict
from euclid import Vector2

from baselines import deepq
import baselines.common.tf_util as U
from baselines.deepq.replay_buffer import ReplayBuffer
from baselines.common.schedules import LinearSchedule
tf.reset_default_graph()


# doing stuff
current_settings = {
    #earlier objects are eaten by later objects (pred eat prey)
    'objects': [
        'prey',
        'pred',
        'cue',
    ],
    'colors': {
        'prey': [212, 211, 208],
        'pred':  [100, 37, 0],
        'cue': [0,0,0],
    },
    'object_reward': {
        'prey': {'prey': 0.1, 'pred': -0.1, 'cue': 0.0},
        'pred': {'prey': 1.0, 'pred': -1.0, 'cue': 0.0},
    },
    'hero_bounces_off_walls': False,
    'world_size': (500,300),   
    "maximum_velocity":      {'prey': 0, 'pred': 50},
    "object_radius": 10.0,
    "cue_types": 2,
    "num_objects": OrderedDict([('prey', 5), ('pred', 5), ('cue', 1)]),
    # active means that the objects are learning
    "num_objects_active": OrderedDict([('prey', 0), ('pred', 5)]), 
    #'multiple' to create each DQN for each prey/predator
    #'one' to use one DQN for all preys/predators
    # only really matters if the preys/predators are active
    "network_prey": 'one',
    "network_pred": 'multiple',
    "num_observation_lines" : 32,
    "observation_line_length": 75.,
    "tolerable_distance_to_wall": 50,
    "wall_distance_penalty":  -1.0,
    "delta_v": 50
}

#'new' to create new sim with values above
#'load' to load a previously trained graph
RUN = 'load'  

# First three for names for saving new runs
ELAPSE_NAME = 'elapse-5preds60-load-100weight-u75'
REWARDS_NAME = 'rewards-5preds60-load-100weight-u75'
COLLISIONS_NAME = 'collsions-5preds60-load-100weight-u75'

# Last two for names for reloading model/replay buffers
MODEL_RE = '5preds-60w/model-5pred-2cues-500trials-60weight-343942.ckpt'
REPLAY_RE = '5preds-60w/replay-5pred-2cues-500trials-60weight-343942.pkl'

# create the game simulator
g = KarpathyGame(current_settings)

tf.reset_default_graph()

all_act = []
all_train = []
all_update = []
all_debug = []
all_replay = []

# Build graphs
if current_settings['num_objects_active']['pred'] != 0:
    if current_settings['network_pred'] == 'one':
        network_pred = 1
    else:
        network_pred = current_settings['num_objects_active']['pred']

    for i in range(network_pred):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            brain_pred = deepq.models.mlp([200, 200])
            act, train, update_target, debug = deepq.build_train(
                make_obs_ph=lambda name: U.BatchInput((g.observation_size,), name=name),
                q_func=brain_pred,
                num_actions=g.num_actions,
                optimizer=tf.train.AdamOptimizer(learning_rate=5e-4),
            )
        replay_buffer = ReplayBuffer(50000)
        all_replay.append(replay_buffer)
        all_act.append(act)
        all_train.append(train)
        all_update.append(update_target)
        all_debug.append(debug)
        
FPS          = 30
ACTION_EVERY = 3
    
fast_mode = False
if fast_mode:
    WAIT, VISUALIZE_EVERY = False, 100
else:
    WAIT, VISUALIZE_EVERY = True, 1

elapsed = []
rewards = []
timesteps = [0]
    
# Initializing or reloading variables
# Start TensorFlow session with 2 CPUs
with U.make_session(2) as sess:
    
    # Initialize the parameters and copy them to the target network.
    U.initialize()
    for i in range(current_settings['num_objects_active']['prey']):
        name = 'prey' + str(i)
        with tf.variable_scope(name):
            update_target()
    for i in range(current_settings['num_objects_active']['pred']):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            update_target()
            
    if RUN == 'load':
#         # when only restoring a subset of variables
#         restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope = 'pred0')
#         saver = tf.train.Saver(restore)

        # reload models
        saver = tf.train.Saver()
        current_dir = os.getcwd()
        model_name = current_dir + '/saved_graphs/' + MODEL_RE 
        saver.restore(sess, model_name)
        # reload replay buffers
        replay_name = current_dir + '/saved_graphs/' + REPLAY_RE 
        with open(replay_name, 'rb') as f:
            all_replay = pickle.load(f)
            
#         # remember to append buffer if restoring a subset of variables
#         all_replay.append(replay_buffer)
    
    # Run simulation
    try:
        simulate(simulation=g,
                 replay = all_replay,
                 act = all_act,
                 train = all_train,
                 update = all_update,
                 debug = all_debug,
                 fps=FPS,
                 visualize_every=VISUALIZE_EVERY,
                 action_every=ACTION_EVERY,
                 wait=WAIT,
                 disable_training=False,
                 simulation_resolution=.001,
                 save_path=None,
                 timesteps = timesteps,
                 elapsed = elapsed,
                 all_rewards = rewards,
                 percent = 1)
        
    except KeyboardInterrupt:
        print("Interrupted")
        g.shut_down_graphics()
        print('graphics shut down')
        
    # Save trial times
    elapse_name = 'saved_graphs/' + ELAPSE_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(elapse_name, "wb") as f:
        pickle.dump(elapsed, f)
        
    # Save rewards
    rewards_name = 'saved_graphs/' + REWARDS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(rewards_name, "wb") as f:
        pickle.dump(rewards, f)
        
    # Save collisions
    collisions_name = 'saved_graphs/' + COLLISIONS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(collisions_name, "wb") as f:
        pickle.dump(g.collisions, f)



[2017-07-26 11:23:38,444] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:23:38,519] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:23:39,262] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:23:39,350] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:23:39,906] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:23:39,984] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:23:40,646] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:23:40,738] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:23:41,333] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:23:41,404] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.


INFO:tensorflow:Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/5preds-60w/model-5pred-2cues-500trials-60weight-343942.ckpt


[2017-07-26 11:23:43,427] Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/5preds-60w/model-5pred-2cues-500trials-60weight-343942.ckpt


0
13
33
41
57
74
86
done


**70**

In [8]:
# reset
%reset -f
from __future__ import print_function

import pickle
import os
import numpy as np
import tempfile
import tensorflow as tf
import matplotlib.pyplot as plt

from tf_rl.controller import DiscreteDeepQ, ModelController
from tf_rl.simulation import KarpathyGame
from tf_rl import simulate
from tf_rl.models import MLP
from collections import OrderedDict
from euclid import Vector2

from baselines import deepq
import baselines.common.tf_util as U
from baselines.deepq.replay_buffer import ReplayBuffer
from baselines.common.schedules import LinearSchedule
tf.reset_default_graph()


# doing stuff
current_settings = {
    #earlier objects are eaten by later objects (pred eat prey)
    'objects': [
        'prey',
        'pred',
        'cue',
    ],
    'colors': {
        'prey': [212, 211, 208],
        'pred':  [100, 37, 0],
        'cue': [0,0,0],
    },
    'object_reward': {
        'prey': {'prey': 0.1, 'pred': -0.1, 'cue': 0.0},
        'pred': {'prey': 1.0, 'pred': -1.0, 'cue': 0.0},
    },
    'hero_bounces_off_walls': False,
    'world_size': (500,300),   
    "maximum_velocity":      {'prey': 0, 'pred': 50},
    "object_radius": 10.0,
    "cue_types": 2,
    "num_objects": OrderedDict([('prey', 5), ('pred', 5), ('cue', 1)]),
    # active means that the objects are learning
    "num_objects_active": OrderedDict([('prey', 0), ('pred', 5)]), 
    #'multiple' to create each DQN for each prey/predator
    #'one' to use one DQN for all preys/predators
    # only really matters if the preys/predators are active
    "network_prey": 'one',
    "network_pred": 'multiple',
    "num_observation_lines" : 32,
    "observation_line_length": 75.,
    "tolerable_distance_to_wall": 50,
    "wall_distance_penalty":  -1.0,
    "delta_v": 50
}

#'new' to create new sim with values above
#'load' to load a previously trained graph
RUN = 'load'  

# First three for names for saving new runs
ELAPSE_NAME = 'elapse-5preds70-load-100weight-u75'
REWARDS_NAME = 'rewards-5preds70-load-100weight-u75'
COLLISIONS_NAME = 'collsions-5preds70-load-100weight-u75'

# Last two for names for reloading model/replay buffers
MODEL_RE = '5preds-70w/model-5pred-2cues-500trials-70weight-271102.ckpt'
REPLAY_RE = '5preds-70w/replay-5pred-2cues-500trials-70weight-271102.pkl'

# create the game simulator
g = KarpathyGame(current_settings)

tf.reset_default_graph()

all_act = []
all_train = []
all_update = []
all_debug = []
all_replay = []

# Build graphs
if current_settings['num_objects_active']['pred'] != 0:
    if current_settings['network_pred'] == 'one':
        network_pred = 1
    else:
        network_pred = current_settings['num_objects_active']['pred']

    for i in range(network_pred):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            brain_pred = deepq.models.mlp([200, 200])
            act, train, update_target, debug = deepq.build_train(
                make_obs_ph=lambda name: U.BatchInput((g.observation_size,), name=name),
                q_func=brain_pred,
                num_actions=g.num_actions,
                optimizer=tf.train.AdamOptimizer(learning_rate=5e-4),
            )
        replay_buffer = ReplayBuffer(50000)
        all_replay.append(replay_buffer)
        all_act.append(act)
        all_train.append(train)
        all_update.append(update_target)
        all_debug.append(debug)
        
FPS          = 30
ACTION_EVERY = 3
    
fast_mode = False
if fast_mode:
    WAIT, VISUALIZE_EVERY = False, 100
else:
    WAIT, VISUALIZE_EVERY = True, 1

elapsed = []
rewards = []
timesteps = [0]
    
# Initializing or reloading variables
# Start TensorFlow session with 2 CPUs
with U.make_session(2) as sess:
    
    # Initialize the parameters and copy them to the target network.
    U.initialize()
    for i in range(current_settings['num_objects_active']['prey']):
        name = 'prey' + str(i)
        with tf.variable_scope(name):
            update_target()
    for i in range(current_settings['num_objects_active']['pred']):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            update_target()
            
    if RUN == 'load':
#         # when only restoring a subset of variables
#         restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope = 'pred0')
#         saver = tf.train.Saver(restore)

        # reload models
        saver = tf.train.Saver()
        current_dir = os.getcwd()
        model_name = current_dir + '/saved_graphs/' + MODEL_RE 
        saver.restore(sess, model_name)
        # reload replay buffers
        replay_name = current_dir + '/saved_graphs/' + REPLAY_RE 
        with open(replay_name, 'rb') as f:
            all_replay = pickle.load(f)
            
#         # remember to append buffer if restoring a subset of variables
#         all_replay.append(replay_buffer)
    
    # Run simulation
    try:
        simulate(simulation=g,
                 replay = all_replay,
                 act = all_act,
                 train = all_train,
                 update = all_update,
                 debug = all_debug,
                 fps=FPS,
                 visualize_every=VISUALIZE_EVERY,
                 action_every=ACTION_EVERY,
                 wait=WAIT,
                 disable_training=False,
                 simulation_resolution=.001,
                 save_path=None,
                 timesteps = timesteps,
                 elapsed = elapsed,
                 all_rewards = rewards,
                 percent = 1)
        
    except KeyboardInterrupt:
        print("Interrupted")
        g.shut_down_graphics()
        print('graphics shut down')
        
    # Save trial times
    elapse_name = 'saved_graphs/' + ELAPSE_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(elapse_name, "wb") as f:
        pickle.dump(elapsed, f)
        
    # Save rewards
    rewards_name = 'saved_graphs/' + REWARDS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(rewards_name, "wb") as f:
        pickle.dump(rewards, f)
        
    # Save collisions
    collisions_name = 'saved_graphs/' + COLLISIONS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(collisions_name, "wb") as f:
        pickle.dump(g.collisions, f)



[2017-07-26 11:31:54,696] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:31:54,765] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:31:55,203] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:31:55,271] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:31:55,734] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:31:55,809] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:31:56,476] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:31:56,563] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:31:57,225] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:31:57,322] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.


INFO:tensorflow:Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/5preds-70w/model-5pred-2cues-500trials-70weight-271102.ckpt


[2017-07-26 11:32:00,339] Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/5preds-70w/model-5pred-2cues-500trials-70weight-271102.ckpt


0
13
30
47
71
93
done


**80**

In [9]:
# reset
%reset -f
from __future__ import print_function

import pickle
import os
import numpy as np
import tempfile
import tensorflow as tf
import matplotlib.pyplot as plt

from tf_rl.controller import DiscreteDeepQ, ModelController
from tf_rl.simulation import KarpathyGame
from tf_rl import simulate
from tf_rl.models import MLP
from collections import OrderedDict
from euclid import Vector2

from baselines import deepq
import baselines.common.tf_util as U
from baselines.deepq.replay_buffer import ReplayBuffer
from baselines.common.schedules import LinearSchedule
tf.reset_default_graph()


# doing stuff
current_settings = {
    #earlier objects are eaten by later objects (pred eat prey)
    'objects': [
        'prey',
        'pred',
        'cue',
    ],
    'colors': {
        'prey': [212, 211, 208],
        'pred':  [100, 37, 0],
        'cue': [0,0,0],
    },
    'object_reward': {
        'prey': {'prey': 0.1, 'pred': -0.1, 'cue': 0.0},
        'pred': {'prey': 1.0, 'pred': -1.0, 'cue': 0.0},
    },
    'hero_bounces_off_walls': False,
    'world_size': (500,300),   
    "maximum_velocity":      {'prey': 0, 'pred': 50},
    "object_radius": 10.0,
    "cue_types": 2,
    "num_objects": OrderedDict([('prey', 5), ('pred', 5), ('cue', 1)]),
    # active means that the objects are learning
    "num_objects_active": OrderedDict([('prey', 0), ('pred', 5)]), 
    #'multiple' to create each DQN for each prey/predator
    #'one' to use one DQN for all preys/predators
    # only really matters if the preys/predators are active
    "network_prey": 'one',
    "network_pred": 'multiple',
    "num_observation_lines" : 32,
    "observation_line_length": 75.,
    "tolerable_distance_to_wall": 50,
    "wall_distance_penalty":  -1.0,
    "delta_v": 50
}

#'new' to create new sim with values above
#'load' to load a previously trained graph
RUN = 'load'  

# First three for names for saving new runs
ELAPSE_NAME = 'elapse-5preds80-load-100weight-u75'
REWARDS_NAME = 'rewards-5preds80-load-100weight-u75'
COLLISIONS_NAME = 'collsions-5preds80-load-100weight-u75'

# Last two for names for reloading model/replay buffers
MODEL_RE = '5preds-80w/model-5pred-2cues-500trials-80weight-238297.ckpt'
REPLAY_RE = '5preds-80w/replay-5pred-2cues-500trials-80weight-238297.pkl'

# create the game simulator
g = KarpathyGame(current_settings)

tf.reset_default_graph()

all_act = []
all_train = []
all_update = []
all_debug = []
all_replay = []

# Build graphs
if current_settings['num_objects_active']['pred'] != 0:
    if current_settings['network_pred'] == 'one':
        network_pred = 1
    else:
        network_pred = current_settings['num_objects_active']['pred']

    for i in range(network_pred):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            brain_pred = deepq.models.mlp([200, 200])
            act, train, update_target, debug = deepq.build_train(
                make_obs_ph=lambda name: U.BatchInput((g.observation_size,), name=name),
                q_func=brain_pred,
                num_actions=g.num_actions,
                optimizer=tf.train.AdamOptimizer(learning_rate=5e-4),
            )
        replay_buffer = ReplayBuffer(50000)
        all_replay.append(replay_buffer)
        all_act.append(act)
        all_train.append(train)
        all_update.append(update_target)
        all_debug.append(debug)
        
FPS          = 30
ACTION_EVERY = 3
    
fast_mode = False
if fast_mode:
    WAIT, VISUALIZE_EVERY = False, 100
else:
    WAIT, VISUALIZE_EVERY = True, 1

elapsed = []
rewards = []
timesteps = [0]
    
# Initializing or reloading variables
# Start TensorFlow session with 2 CPUs
with U.make_session(2) as sess:
    
    # Initialize the parameters and copy them to the target network.
    U.initialize()
    for i in range(current_settings['num_objects_active']['prey']):
        name = 'prey' + str(i)
        with tf.variable_scope(name):
            update_target()
    for i in range(current_settings['num_objects_active']['pred']):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            update_target()
            
    if RUN == 'load':
#         # when only restoring a subset of variables
#         restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope = 'pred0')
#         saver = tf.train.Saver(restore)

        # reload models
        saver = tf.train.Saver()
        current_dir = os.getcwd()
        model_name = current_dir + '/saved_graphs/' + MODEL_RE 
        saver.restore(sess, model_name)
        # reload replay buffers
        replay_name = current_dir + '/saved_graphs/' + REPLAY_RE 
        with open(replay_name, 'rb') as f:
            all_replay = pickle.load(f)
            
#         # remember to append buffer if restoring a subset of variables
#         all_replay.append(replay_buffer)
    
    # Run simulation
    try:
        simulate(simulation=g,
                 replay = all_replay,
                 act = all_act,
                 train = all_train,
                 update = all_update,
                 debug = all_debug,
                 fps=FPS,
                 visualize_every=VISUALIZE_EVERY,
                 action_every=ACTION_EVERY,
                 wait=WAIT,
                 disable_training=False,
                 simulation_resolution=.001,
                 save_path=None,
                 timesteps = timesteps,
                 elapsed = elapsed,
                 all_rewards = rewards,
                 percent = 1)
        
    except KeyboardInterrupt:
        print("Interrupted")
        g.shut_down_graphics()
        print('graphics shut down')
        
    # Save trial times
    elapse_name = 'saved_graphs/' + ELAPSE_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(elapse_name, "wb") as f:
        pickle.dump(elapsed, f)
        
    # Save rewards
    rewards_name = 'saved_graphs/' + REWARDS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(rewards_name, "wb") as f:
        pickle.dump(rewards, f)
        
    # Save collisions
    collisions_name = 'saved_graphs/' + COLLISIONS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(collisions_name, "wb") as f:
        pickle.dump(g.collisions, f)



[2017-07-26 11:39:16,007] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:39:16,130] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:39:16,877] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:39:17,015] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:39:17,545] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:39:17,625] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:39:18,141] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:39:18,227] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:39:19,033] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:39:19,207] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.


INFO:tensorflow:Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/5preds-80w/model-5pred-2cues-500trials-80weight-238297.ckpt


[2017-07-26 11:39:22,350] Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/5preds-80w/model-5pred-2cues-500trials-80weight-238297.ckpt


0
24
40
60
75
90
done


**90**

In [None]:
# reset
%reset -f
from __future__ import print_function

import pickle
import os
import numpy as np
import tempfile
import tensorflow as tf
import matplotlib.pyplot as plt

from tf_rl.controller import DiscreteDeepQ, ModelController
from tf_rl.simulation import KarpathyGame
from tf_rl import simulate
from tf_rl.models import MLP
from collections import OrderedDict
from euclid import Vector2

from baselines import deepq
import baselines.common.tf_util as U
from baselines.deepq.replay_buffer import ReplayBuffer
from baselines.common.schedules import LinearSchedule
tf.reset_default_graph()


# doing stuff
current_settings = {
    #earlier objects are eaten by later objects (pred eat prey)
    'objects': [
        'prey',
        'pred',
        'cue',
    ],
    'colors': {
        'prey': [212, 211, 208],
        'pred':  [100, 37, 0],
        'cue': [0,0,0],
    },
    'object_reward': {
        'prey': {'prey': 0.1, 'pred': -0.1, 'cue': 0.0},
        'pred': {'prey': 1.0, 'pred': -1.0, 'cue': 0.0},
    },
    'hero_bounces_off_walls': False,
    'world_size': (500,300),   
    "maximum_velocity":      {'prey': 0, 'pred': 50},
    "object_radius": 10.0,
    "cue_types": 2,
    "num_objects": OrderedDict([('prey', 5), ('pred', 5), ('cue', 1)]),
    # active means that the objects are learning
    "num_objects_active": OrderedDict([('prey', 0), ('pred', 5)]), 
    #'multiple' to create each DQN for each prey/predator
    #'one' to use one DQN for all preys/predators
    # only really matters if the preys/predators are active
    "network_prey": 'one',
    "network_pred": 'multiple',
    "num_observation_lines" : 32,
    "observation_line_length": 75.,
    "tolerable_distance_to_wall": 50,
    "wall_distance_penalty":  -1.0,
    "delta_v": 50
}

#'new' to create new sim with values above
#'load' to load a previously trained graph
RUN = 'load'  

# First three for names for saving new runs
ELAPSE_NAME = 'elapse-5preds90-load-100weight-u75'
REWARDS_NAME = 'rewards-5preds90-load-100weight-u75'
COLLISIONS_NAME = 'collsions-5preds90-load-100weight-u75'

# Last two for names for reloading model/replay buffers
MODEL_RE = '5preds-90w/model-5pred-2cues-500trials-90weight-180703.ckpt'
REPLAY_RE = '5preds-90w/replay-5pred-2cues-500trials-90weight-180703.pkl'

# create the game simulator
g = KarpathyGame(current_settings)

tf.reset_default_graph()

all_act = []
all_train = []
all_update = []
all_debug = []
all_replay = []

# Build graphs
if current_settings['num_objects_active']['pred'] != 0:
    if current_settings['network_pred'] == 'one':
        network_pred = 1
    else:
        network_pred = current_settings['num_objects_active']['pred']

    for i in range(network_pred):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            brain_pred = deepq.models.mlp([200, 200])
            act, train, update_target, debug = deepq.build_train(
                make_obs_ph=lambda name: U.BatchInput((g.observation_size,), name=name),
                q_func=brain_pred,
                num_actions=g.num_actions,
                optimizer=tf.train.AdamOptimizer(learning_rate=5e-4),
            )
        replay_buffer = ReplayBuffer(50000)
        all_replay.append(replay_buffer)
        all_act.append(act)
        all_train.append(train)
        all_update.append(update_target)
        all_debug.append(debug)
        
FPS          = 30
ACTION_EVERY = 3
    
fast_mode = False
if fast_mode:
    WAIT, VISUALIZE_EVERY = False, 100
else:
    WAIT, VISUALIZE_EVERY = True, 1

elapsed = []
rewards = []
timesteps = [0]
    
# Initializing or reloading variables
# Start TensorFlow session with 2 CPUs
with U.make_session(2) as sess:
    
    # Initialize the parameters and copy them to the target network.
    U.initialize()
    for i in range(current_settings['num_objects_active']['prey']):
        name = 'prey' + str(i)
        with tf.variable_scope(name):
            update_target()
    for i in range(current_settings['num_objects_active']['pred']):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            update_target()
            
    if RUN == 'load':
#         # when only restoring a subset of variables
#         restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope = 'pred0')
#         saver = tf.train.Saver(restore)

        # reload models
        saver = tf.train.Saver()
        current_dir = os.getcwd()
        model_name = current_dir + '/saved_graphs/' + MODEL_RE 
        saver.restore(sess, model_name)
        # reload replay buffers
        replay_name = current_dir + '/saved_graphs/' + REPLAY_RE 
        with open(replay_name, 'rb') as f:
            all_replay = pickle.load(f)
            
#         # remember to append buffer if restoring a subset of variables
#         all_replay.append(replay_buffer)
    
    # Run simulation
    try:
        simulate(simulation=g,
                 replay = all_replay,
                 act = all_act,
                 train = all_train,
                 update = all_update,
                 debug = all_debug,
                 fps=FPS,
                 visualize_every=VISUALIZE_EVERY,
                 action_every=ACTION_EVERY,
                 wait=WAIT,
                 disable_training=False,
                 simulation_resolution=.001,
                 save_path=None,
                 timesteps = timesteps,
                 elapsed = elapsed,
                 all_rewards = rewards,
                 percent = 1)
        
    except KeyboardInterrupt:
        print("Interrupted")
        g.shut_down_graphics()
        print('graphics shut down')
        
    # Save trial times
    elapse_name = 'saved_graphs/' + ELAPSE_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(elapse_name, "wb") as f:
        pickle.dump(elapsed, f)
        
    # Save rewards
    rewards_name = 'saved_graphs/' + REWARDS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(rewards_name, "wb") as f:
        pickle.dump(rewards, f)
        
    # Save collisions
    collisions_name = 'saved_graphs/' + COLLISIONS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(collisions_name, "wb") as f:
        pickle.dump(g.collisions, f)

**100**

In [11]:
# reset
%reset -f
from __future__ import print_function

import pickle
import os
import numpy as np
import tempfile
import tensorflow as tf
import matplotlib.pyplot as plt

from tf_rl.controller import DiscreteDeepQ, ModelController
from tf_rl.simulation import KarpathyGame
from tf_rl import simulate
from tf_rl.models import MLP
from collections import OrderedDict
from euclid import Vector2

from baselines import deepq
import baselines.common.tf_util as U
from baselines.deepq.replay_buffer import ReplayBuffer
from baselines.common.schedules import LinearSchedule
tf.reset_default_graph()


# doing stuff
current_settings = {
    #earlier objects are eaten by later objects (pred eat prey)
    'objects': [
        'prey',
        'pred',
        'cue',
    ],
    'colors': {
        'prey': [212, 211, 208],
        'pred':  [100, 37, 0],
        'cue': [0,0,0],
    },
    'object_reward': {
        'prey': {'prey': 0.1, 'pred': -0.1, 'cue': 0.0},
        'pred': {'prey': 1.0, 'pred': -1.0, 'cue': 0.0},
    },
    'hero_bounces_off_walls': False,
    'world_size': (500,300),   
    "maximum_velocity":      {'prey': 0, 'pred': 50},
    "object_radius": 10.0,
    "cue_types": 2,
    "num_objects": OrderedDict([('prey', 5), ('pred', 5), ('cue', 1)]),
    # active means that the objects are learning
    "num_objects_active": OrderedDict([('prey', 0), ('pred', 5)]), 
    #'multiple' to create each DQN for each prey/predator
    #'one' to use one DQN for all preys/predators
    # only really matters if the preys/predators are active
    "network_prey": 'one',
    "network_pred": 'multiple',
    "num_observation_lines" : 32,
    "observation_line_length": 75.,
    "tolerable_distance_to_wall": 50,
    "wall_distance_penalty":  -1.0,
    "delta_v": 50
}

#'new' to create new sim with values above
#'load' to load a previously trained graph
RUN = 'load'  

# First three for names for saving new runs
ELAPSE_NAME = 'elapse-5preds100-load-100weight-u75'
REWARDS_NAME = 'rewards-5preds100-load-100weight-u75'
COLLISIONS_NAME = 'collsions-5preds100-load-100weight-u75'

# Last two for names for reloading model/replay buffers
MODEL_RE = '5preds-free/model-5pred-2cues-500trials-189205.ckpt'
REPLAY_RE = '5preds-free/replay-5pred-2cues-500trials-189205.pkl'

# create the game simulator
g = KarpathyGame(current_settings)

tf.reset_default_graph()

all_act = []
all_train = []
all_update = []
all_debug = []
all_replay = []

# Build graphs
if current_settings['num_objects_active']['pred'] != 0:
    if current_settings['network_pred'] == 'one':
        network_pred = 1
    else:
        network_pred = current_settings['num_objects_active']['pred']

    for i in range(network_pred):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            brain_pred = deepq.models.mlp([200, 200])
            act, train, update_target, debug = deepq.build_train(
                make_obs_ph=lambda name: U.BatchInput((g.observation_size,), name=name),
                q_func=brain_pred,
                num_actions=g.num_actions,
                optimizer=tf.train.AdamOptimizer(learning_rate=5e-4),
            )
        replay_buffer = ReplayBuffer(50000)
        all_replay.append(replay_buffer)
        all_act.append(act)
        all_train.append(train)
        all_update.append(update_target)
        all_debug.append(debug)
        
FPS          = 30
ACTION_EVERY = 3
    
fast_mode = False
if fast_mode:
    WAIT, VISUALIZE_EVERY = False, 100
else:
    WAIT, VISUALIZE_EVERY = True, 1

elapsed = []
rewards = []
timesteps = [0]
    
# Initializing or reloading variables
# Start TensorFlow session with 2 CPUs
with U.make_session(2) as sess:
    
    # Initialize the parameters and copy them to the target network.
    U.initialize()
    for i in range(current_settings['num_objects_active']['prey']):
        name = 'prey' + str(i)
        with tf.variable_scope(name):
            update_target()
    for i in range(current_settings['num_objects_active']['pred']):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            update_target()
            
    if RUN == 'load':
#         # when only restoring a subset of variables
#         restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope = 'pred0')
#         saver = tf.train.Saver(restore)

        # reload models
        saver = tf.train.Saver()
        current_dir = os.getcwd()
        model_name = current_dir + '/saved_graphs/' + MODEL_RE 
        saver.restore(sess, model_name)
        # reload replay buffers
        replay_name = current_dir + '/saved_graphs/' + REPLAY_RE 
        with open(replay_name, 'rb') as f:
            all_replay = pickle.load(f)
            
#         # remember to append buffer if restoring a subset of variables
#         all_replay.append(replay_buffer)
    
    # Run simulation
    try:
        simulate(simulation=g,
                 replay = all_replay,
                 act = all_act,
                 train = all_train,
                 update = all_update,
                 debug = all_debug,
                 fps=FPS,
                 visualize_every=VISUALIZE_EVERY,
                 action_every=ACTION_EVERY,
                 wait=WAIT,
                 disable_training=False,
                 simulation_resolution=.001,
                 save_path=None,
                 timesteps = timesteps,
                 elapsed = elapsed,
                 all_rewards = rewards,
                 percent = 1)
        
    except KeyboardInterrupt:
        print("Interrupted")
        g.shut_down_graphics()
        print('graphics shut down')
        
    # Save trial times
    elapse_name = 'saved_graphs/' + ELAPSE_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(elapse_name, "wb") as f:
        pickle.dump(elapsed, f)
        
    # Save rewards
    rewards_name = 'saved_graphs/' + REWARDS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(rewards_name, "wb") as f:
        pickle.dump(rewards, f)
        
    # Save collisions
    collisions_name = 'saved_graphs/' + COLLISIONS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(collisions_name, "wb") as f:
        pickle.dump(g.collisions, f)



[2017-07-26 11:51:26,376] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:51:26,442] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:51:26,867] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:51:26,937] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:51:27,370] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:51:27,446] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:51:27,870] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:51:27,939] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:51:28,362] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 11:51:28,431] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.


INFO:tensorflow:Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/5preds-free/model-5pred-2cues-500trials-189205.ckpt


[2017-07-26 11:51:30,228] Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/5preds-free/model-5pred-2cues-500trials-189205.ckpt


0
20
40
60
75
96
done


**indep**

In [None]:
# reset
%reset -f
from __future__ import print_function

import pickle
import os
import numpy as np
import tempfile
import tensorflow as tf
import matplotlib.pyplot as plt

from tf_rl.controller import DiscreteDeepQ, ModelController
from tf_rl.simulation import KarpathyGame
from tf_rl import simulate
from tf_rl.models import MLP
from collections import OrderedDict
from euclid import Vector2

from baselines import deepq
import baselines.common.tf_util as U
from baselines.deepq.replay_buffer import ReplayBuffer
from baselines.common.schedules import LinearSchedule
tf.reset_default_graph()

current_settings = {
    #earlier objects are eaten by later objects (pred eat prey)
    'objects': [
        'prey',
        'pred',
        'cue',
    ],
    'colors': {
        'prey': [212, 211, 208],
        'pred':  [100, 37, 0],
        'cue': [0,0,0],
    },
    'object_reward': {
        'prey': {'prey': 0.1, 'pred': -0.1, 'cue': 0.0},
        'pred': {'prey': 1.0, 'pred': -1.0, 'cue': 0.0},
    },
    'hero_bounces_off_walls': False,
    'world_size': (500,300),   
    "maximum_velocity":      {'prey': 0, 'pred': 50},
    "object_radius": 10.0,
    "cue_types": 2,
    "num_objects": OrderedDict([('prey', 5), ('pred', 5), ('cue', 1)]),
    # active means that the objects are learning
    "num_objects_active": OrderedDict([('prey', 0), ('pred', 5)]), 
    #'multiple' to create each DQN for each prey/predator
    #'one' to use one DQN for all preys/predators
    # only really matters if the preys/predators are active
    "network_prey": 'one',
    "network_pred": 'multiple',
    "num_observation_lines" : 32,
    "observation_line_length": 75.,
    "tolerable_distance_to_wall": 50,
    "wall_distance_penalty":  -1.0,
    "delta_v": 50
}

#'new' to create new sim with values above
#'load' to load a previously trained graph
RUN = 'load'  

# First three for names for saving new runs
ELAPSE_NAME = 'elapse-5indeps-100weight-u75'
REWARDS_NAME = 'rewards-5indeps-100weight-u75'
COLLISIONS_NAME = 'collsions-5indeps-load-100weight-u75'

# Last two for names for reloading model/replay buffers
MODEL_RE_0 = 'independent-0/model-1pred-2cues-500trials-340708.ckpt'
REPLAY_RE_0 = 'independent-0/replay-1pred-2cues-500trials-340708.pkl'
MODEL_RE_1 = 'independent-1/model-1pred-2cues-500trials-1-361393.ckpt'
REPLAY_RE_1 = 'independent-1/replay-1pred-2cues-500trials-1-361393.pkl'
MODEL_RE_2 = 'independent-2/model-1pred-2cues-500trials-2-327760.ckpt'
REPLAY_RE_2 = 'independent-2/replay-1pred-2cues-500trials-2-327760.pkl'
MODEL_RE_3 = 'independent-3/model-1pred-2cues-500trials-3-209602.ckpt'
REPLAY_RE_3 = 'independent-3/replay-1pred-2cues-500trials-3-209602.pkl'
MODEL_RE_4 = 'independent-4/model-1pred-2cues-500trials-4-361147.ckpt'
REPLAY_RE_4 = 'independent-4/replay-1pred-2cues-500trials-4-361147.pkl'

# create the game simulator
g = KarpathyGame(current_settings)

tf.reset_default_graph()

all_act = []
all_train = []
all_update = []
all_debug = []
all_replay = []

# Build graphs
if current_settings['num_objects_active']['pred'] != 0:
    if current_settings['network_pred'] == 'one':
        network_pred = 1
    else:
        network_pred = current_settings['num_objects_active']['pred']

    for i in range(network_pred):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            brain_pred = deepq.models.mlp([200, 200])
            act, train, update_target, debug = deepq.build_train(
                make_obs_ph=lambda name: U.BatchInput((g.observation_size,), name=name),
                q_func=brain_pred,
                num_actions=g.num_actions,
                optimizer=tf.train.AdamOptimizer(learning_rate=5e-4),
            )
        all_act.append(act)
        all_train.append(train)
        all_update.append(update_target)
        all_debug.append(debug)
        
FPS          = 30
ACTION_EVERY = 3
    
fast_mode = False
if fast_mode:
    WAIT, VISUALIZE_EVERY = False, 100
else:
    WAIT, VISUALIZE_EVERY = True, 1

elapsed = []
rewards = []
timesteps = [0]
    
# Initializing or reloading variables
# Start TensorFlow session with 2 CPUs
with U.make_session(2) as sess:
    
    # Initialize the parameters and copy them to the target network.
    U.initialize()
    for i in range(current_settings['num_objects_active']['prey']):
        name = 'prey' + str(i)
        with tf.variable_scope(name):
            update_target()
    for i in range(current_settings['num_objects_active']['pred']):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            update_target()
            
    if RUN == 'load':
        current_dir = os.getcwd()
        all_replay = []
        
        # pred0
        restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope = 'pred0')
        saver = tf.train.Saver(restore)
        model_name = current_dir + '/saved_graphs/' + MODEL_RE_0
        saver.restore(sess, model_name)
        replay_name = current_dir + '/saved_graphs/' + REPLAY_RE_0 
        with open(replay_name, 'rb') as f:
            replay_buffer = pickle.load(f)
        all_replay.append(replay_buffer[0])
        
        # pred0
        restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope = 'pred1')
        saver = tf.train.Saver(restore)
        model_name = current_dir + '/saved_graphs/' + MODEL_RE_1 
        saver.restore(sess, model_name)
        replay_name = current_dir + '/saved_graphs/' + REPLAY_RE_1 
        with open(replay_name, 'rb') as f:
            replay_buffer = pickle.load(f)
        all_replay.append(replay_buffer[0])
        
        # pred0
        restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope = 'pred2')
        saver = tf.train.Saver(restore)
        model_name = current_dir + '/saved_graphs/' + MODEL_RE_2 
        saver.restore(sess, model_name)
        replay_name = current_dir + '/saved_graphs/' + REPLAY_RE_2 
        with open(replay_name, 'rb') as f:
            replay_buffer = pickle.load(f)
        all_replay.append(replay_buffer[0])
        
        # pred0
        restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope = 'pred3')
        saver = tf.train.Saver(restore)
        model_name = current_dir + '/saved_graphs/' + MODEL_RE_3 
        saver.restore(sess, model_name)
        replay_name = current_dir + '/saved_graphs/' + REPLAY_RE_3 
        with open(replay_name, 'rb') as f:
            replay_buffer = pickle.load(f)
        all_replay.append(replay_buffer[0])
        
        # pred0
        restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope = 'pred4')
        saver = tf.train.Saver(restore)
        model_name = current_dir + '/saved_graphs/' + MODEL_RE_4 
        saver.restore(sess, model_name)
        replay_name = current_dir + '/saved_graphs/' + REPLAY_RE_4 
        with open(replay_name, 'rb') as f:
            replay_buffer = pickle.load(f)
        all_replay.append(replay_buffer[0])
            

    # Run simulation
    try:
        simulate(simulation=g,
                 replay = all_replay,
                 act = all_act,
                 train = all_train,
                 update = all_update,
                 debug = all_debug,
                 fps=FPS,
                 visualize_every=VISUALIZE_EVERY,
                 action_every=ACTION_EVERY,
                 wait=WAIT,
                 disable_training=False,
                 simulation_resolution=.001,
                 save_path=None,
                 timesteps = timesteps,
                 elapsed = elapsed,
                 all_rewards = rewards,
                 percent = 1)
        
    except KeyboardInterrupt:
        print("Interrupted")
        g.shut_down_graphics()
        print('graphics shut down')
        
        
    # Save trial times
    elapse_name = 'saved_graphs/' + ELAPSE_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(elapse_name, "wb") as f:
        pickle.dump(elapsed, f)
        
    # Save rewards
    rewards_name = 'saved_graphs/' + REWARDS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(rewards_name, "wb") as f:
        pickle.dump(rewards, f)
        
    # Save collisions
    collisions_name = 'saved_graphs/' + COLLISIONS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(collisions_name, "wb") as f:
        pickle.dump(g.collisions, f)



[2017-07-26 13:51:46,735] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 13:51:46,909] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 13:51:47,932] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 13:51:48,071] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 13:51:49,049] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 13:51:49,203] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 13:51:50,191] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 13:51:50,346] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 13:51:51,360] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 13:51:51,495] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.


INFO:tensorflow:Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/independent-0/model-1pred-2cues-500trials-340708.ckpt


[2017-07-26 13:51:53,943] Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/independent-0/model-1pred-2cues-500trials-340708.ckpt


INFO:tensorflow:Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/independent-1/model-1pred-2cues-500trials-1-361393.ckpt


[2017-07-26 13:51:55,547] Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/independent-1/model-1pred-2cues-500trials-1-361393.ckpt


INFO:tensorflow:Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/independent-2/model-1pred-2cues-500trials-2-327760.ckpt


[2017-07-26 13:51:57,804] Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/independent-2/model-1pred-2cues-500trials-2-327760.ckpt


INFO:tensorflow:Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/independent-3/model-1pred-2cues-500trials-3-209602.ckpt


[2017-07-26 13:52:00,075] Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/independent-3/model-1pred-2cues-500trials-3-209602.ckpt


INFO:tensorflow:Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/independent-4/model-1pred-2cues-500trials-4-361147.ckpt


[2017-07-26 13:52:02,080] Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/independent-4/model-1pred-2cues-500trials-4-361147.ckpt


0
36
61
Interrupted
graphics shut down
