In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
# reset
%reset -f
from __future__ import print_function

import pickle
import os
import numpy as np
import tempfile
import tensorflow as tf
import matplotlib.pyplot as plt

from tf_rl.controller import DiscreteDeepQ, ModelController
from tf_rl.simulation import KarpathyGame
from tf_rl import simulate
from tf_rl.models import MLP
from collections import OrderedDict
from euclid import Vector2

from baselines import deepq
import baselines.common.tf_util as U
from baselines.deepq.replay_buffer import ReplayBuffer
from baselines.common.schedules import LinearSchedule
tf.reset_default_graph()


# doing stuff
current_settings = {
    #earlier objects are eaten by later objects (pred eat prey)
    'objects': [
        'prey',
        'pred',
        'cue',
    ],
    'colors': {
        'prey': [212, 211, 208],
        'pred':  [100, 37, 0],
        'cue': [0,0,0],
    },
    'object_reward': {
        'prey': {'prey': 0.1, 'pred': -0.1, 'cue': 0.0},
        'pred': {'prey': 1.0, 'pred': -1.0, 'cue': 0.0},
    },
    'hero_bounces_off_walls': False,
    'world_size': (500,300),   
    "maximum_velocity":      {'prey': 0, 'pred': 50},
    "object_radius": 10.0,
    "cue_types": 2,
    "num_objects": OrderedDict([('prey', 5), ('pred', 5), ('cue', 1)]),
    # active means that the objects are learning
    "num_objects_active": OrderedDict([('prey', 0), ('pred', 5)]), 
    #'multiple' to create each DQN for each prey/predator
    #'one' to use one DQN for all preys/predators
    # only really matters if the preys/predators are active
    "network_prey": 'one',
    "network_pred": 'multiple',
    "num_observation_lines" : 32,
    "observation_line_length": 75.,
    "tolerable_distance_to_wall": 50,
    "wall_distance_penalty":  -1.0,
    "delta_v": 50
}

#'new' to create new sim with values above
#'load' to load a previously trained graph
RUN = 'load'  

# First three for names for saving new runs
MODEL_NAME = 'model-5preds10-load-100weight-c'
REPLAY_NAME = 'replay-5preds10-load-100weight-c'
ELAPSE_NAME = 'elapse-5preds10-load-100weight-c'
REWARDS_NAME = 'rewards-5preds10-load-100weight-c'
COLLISIONS_NAME = 'collsions-5preds10-load-100weight-c'

# Last two for names for reloading model/replay buffers
MODEL_RE = '5preds-10w/model-5pred-2cues-500trials-10weight-2603626.ckpt'
REPLAY_RE = '5preds-10w/replay-5pred-2cues-500trials-10weight-2603626.pkl'

# create the game simulator
g = KarpathyGame(current_settings)

tf.reset_default_graph()

all_act = []
all_train = []
all_update = []
all_debug = []
all_replay = []

# Build graphs
if current_settings['num_objects_active']['pred'] != 0:
    if current_settings['network_pred'] == 'one':
        network_pred = 1
    else:
        network_pred = current_settings['num_objects_active']['pred']

    for i in range(network_pred):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            brain_pred = deepq.models.mlp([200, 200])
            act, train, update_target, debug = deepq.build_train(
                make_obs_ph=lambda name: U.BatchInput((g.observation_size,), name=name),
                q_func=brain_pred,
                num_actions=g.num_actions,
                optimizer=tf.train.AdamOptimizer(learning_rate=5e-4),
            )
        replay_buffer = ReplayBuffer(50000)
        all_replay.append(replay_buffer)
        all_act.append(act)
        all_train.append(train)
        all_update.append(update_target)
        all_debug.append(debug)
        
FPS          = 30
ACTION_EVERY = 3
    
fast_mode = False
if fast_mode:
    WAIT, VISUALIZE_EVERY = False, 100
else:
    WAIT, VISUALIZE_EVERY = True, 1

elapsed = []
rewards = []
timesteps = [0]
    
# Initializing or reloading variables
# Start TensorFlow session with 2 CPUs
with U.make_session(2) as sess:
    
    # Initialize the parameters and copy them to the target network.
    U.initialize()
    for i in range(current_settings['num_objects_active']['prey']):
        name = 'prey' + str(i)
        with tf.variable_scope(name):
            update_target()
    for i in range(current_settings['num_objects_active']['pred']):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            update_target()
            
    if RUN == 'load':
#         # when only restoring a subset of variables
#         restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope = 'pred0')
#         saver = tf.train.Saver(restore)

        # reload models
        saver = tf.train.Saver()
        current_dir = os.getcwd()
        model_name = current_dir + '/saved_graphs/' + MODEL_RE 
        saver.restore(sess, model_name)
        # reload replay buffers
        replay_name = current_dir + '/saved_graphs/' + REPLAY_RE 
        with open(replay_name, 'rb') as f:
            all_replay = pickle.load(f)
            
#         # remember to append buffer if restoring a subset of variables
#         all_replay.append(replay_buffer)
    
    # Run simulation
    try:
        simulate(simulation=g,
                 replay = all_replay,
                 act = all_act,
                 train = all_train,
                 update = all_update,
                 debug = all_debug,
                 fps=FPS,
                 visualize_every=VISUALIZE_EVERY,
                 action_every=ACTION_EVERY,
                 wait=WAIT,
                 disable_training=False,
                 simulation_resolution=.001,
                 save_path=None,
                 timesteps = timesteps,
                 elapsed = elapsed,
                 all_rewards = rewards,
                 percent = 1)
        
    except KeyboardInterrupt:
        print("Interrupted")
        g.shut_down_graphics()
        print('graphics shut down')
        
    # Save trial times
    elapse_name = 'saved_graphs/' + ELAPSE_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(elapse_name, "wb") as f:
        pickle.dump(elapsed, f)
        
    # Save rewards
    rewards_name = 'saved_graphs/' + REWARDS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(rewards_name, "wb") as f:
        pickle.dump(rewards, f)
        
    # Save collisions
    collisions_name = 'saved_graphs/' + COLLISIONS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(collisions_name, "wb") as f:
        pickle.dump(g.collisions, f)



[2017-07-26 03:39:03,058] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 03:39:03,132] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 03:39:03,653] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 03:39:03,817] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 03:39:04,282] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 03:39:04,353] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 03:39:04,858] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 03:39:04,944] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 03:39:05,518] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 03:39:05,590] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.


INFO:tensorflow:Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/5preds-10w/model-5pred-2cues-500trials-10weight-2603626.ckpt


[2017-07-26 03:39:07,629] Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/5preds-10w/model-5pred-2cues-500trials-10weight-2603626.ckpt


0
6
16
26
36
43
51
61
65
70
76
86
96
done


In [3]:
# reset
%reset -f
from __future__ import print_function

import pickle
import os
import numpy as np
import tempfile
import tensorflow as tf
import matplotlib.pyplot as plt

from tf_rl.controller import DiscreteDeepQ, ModelController
from tf_rl.simulation import KarpathyGame
from tf_rl import simulate
from tf_rl.models import MLP
from collections import OrderedDict
from euclid import Vector2

from baselines import deepq
import baselines.common.tf_util as U
from baselines.deepq.replay_buffer import ReplayBuffer
from baselines.common.schedules import LinearSchedule
tf.reset_default_graph()


# doing stuff
current_settings = {
    #earlier objects are eaten by later objects (pred eat prey)
    'objects': [
        'prey',
        'pred',
        'cue',
    ],
    'colors': {
        'prey': [212, 211, 208],
        'pred':  [100, 37, 0],
        'cue': [0,0,0],
    },
    'object_reward': {
        'prey': {'prey': 0.1, 'pred': -0.1, 'cue': 0.0},
        'pred': {'prey': 1.0, 'pred': -1.0, 'cue': 0.0},
    },
    'hero_bounces_off_walls': False,
    'world_size': (500,300),   
    "maximum_velocity":      {'prey': 0, 'pred': 50},
    "object_radius": 10.0,
    "cue_types": 2,
    "num_objects": OrderedDict([('prey', 5), ('pred', 5), ('cue', 1)]),
    # active means that the objects are learning
    "num_objects_active": OrderedDict([('prey', 0), ('pred', 5)]), 
    #'multiple' to create each DQN for each prey/predator
    #'one' to use one DQN for all preys/predators
    # only really matters if the preys/predators are active
    "network_prey": 'one',
    "network_pred": 'multiple',
    "num_observation_lines" : 32,
    "observation_line_length": 75.,
    "tolerable_distance_to_wall": 50,
    "wall_distance_penalty":  -1.0,
    "delta_v": 50
}

#'new' to create new sim with values above
#'load' to load a previously trained graph
RUN = 'load'  

# First three for names for saving new runs
MODEL_NAME = 'model-5preds40-load-100weight-c'
REPLAY_NAME = 'replay-5preds40-load-100weight-c'
ELAPSE_NAME = 'elapse-5preds40-load-100weight-c'
REWARDS_NAME = 'rewards-5preds40-load-100weight-c'
COLLISIONS_NAME = 'collsions-5preds40-load-100weight-c'

# Last two for names for reloading model/replay buffers
MODEL_RE = '5preds-40w/model-5pred-2cues-500trials-40weight-579358.ckpt'
REPLAY_RE = '5preds-40w/replay-5pred-2cues-500trials-40weight-579358.pkl'

# create the game simulator
g = KarpathyGame(current_settings)

tf.reset_default_graph()

all_act = []
all_train = []
all_update = []
all_debug = []
all_replay = []

# Build graphs
if current_settings['num_objects_active']['pred'] != 0:
    if current_settings['network_pred'] == 'one':
        network_pred = 1
    else:
        network_pred = current_settings['num_objects_active']['pred']

    for i in range(network_pred):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            brain_pred = deepq.models.mlp([200, 200])
            act, train, update_target, debug = deepq.build_train(
                make_obs_ph=lambda name: U.BatchInput((g.observation_size,), name=name),
                q_func=brain_pred,
                num_actions=g.num_actions,
                optimizer=tf.train.AdamOptimizer(learning_rate=5e-4),
            )
        replay_buffer = ReplayBuffer(50000)
        all_replay.append(replay_buffer)
        all_act.append(act)
        all_train.append(train)
        all_update.append(update_target)
        all_debug.append(debug)
        
FPS          = 30
ACTION_EVERY = 3
    
fast_mode = False
if fast_mode:
    WAIT, VISUALIZE_EVERY = False, 100
else:
    WAIT, VISUALIZE_EVERY = True, 1

elapsed = []
rewards = []
timesteps = [0]
    
# Initializing or reloading variables
# Start TensorFlow session with 2 CPUs
with U.make_session(2) as sess:
    
    # Initialize the parameters and copy them to the target network.
    U.initialize()
    for i in range(current_settings['num_objects_active']['prey']):
        name = 'prey' + str(i)
        with tf.variable_scope(name):
            update_target()
    for i in range(current_settings['num_objects_active']['pred']):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            update_target()
            
    if RUN == 'load':
#         # when only restoring a subset of variables
#         restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope = 'pred0')
#         saver = tf.train.Saver(restore)

        # reload models
        saver = tf.train.Saver()
        current_dir = os.getcwd()
        model_name = current_dir + '/saved_graphs/' + MODEL_RE 
        saver.restore(sess, model_name)
        # reload replay buffers
        replay_name = current_dir + '/saved_graphs/' + REPLAY_RE 
        with open(replay_name, 'rb') as f:
            all_replay = pickle.load(f)
            
#         # remember to append buffer if restoring a subset of variables
#         all_replay.append(replay_buffer)
    
    # Run simulation
    try:
        simulate(simulation=g,
                 replay = all_replay,
                 act = all_act,
                 train = all_train,
                 update = all_update,
                 debug = all_debug,
                 fps=FPS,
                 visualize_every=VISUALIZE_EVERY,
                 action_every=ACTION_EVERY,
                 wait=WAIT,
                 disable_training=False,
                 simulation_resolution=.001,
                 save_path=None,
                 timesteps = timesteps,
                 elapsed = elapsed,
                 all_rewards = rewards,
                 percent = 1)
        
    except KeyboardInterrupt:
        print("Interrupted")
        g.shut_down_graphics()
        print('graphics shut down')
        
    # Save trial times
    elapse_name = 'saved_graphs/' + ELAPSE_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(elapse_name, "wb") as f:
        pickle.dump(elapsed, f)
        
    # Save rewards
    rewards_name = 'saved_graphs/' + REWARDS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(rewards_name, "wb") as f:
        pickle.dump(rewards, f)
        
    # Save collisions
    collisions_name = 'saved_graphs/' + COLLISIONS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(collisions_name, "wb") as f:
        pickle.dump(g.collisions, f)



[2017-07-26 03:52:35,111] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 03:52:35,183] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 03:52:35,592] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 03:52:35,665] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 03:52:36,083] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 03:52:36,161] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 03:52:36,721] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 03:52:36,793] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 03:52:37,210] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 03:52:37,279] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.


INFO:tensorflow:Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/5preds-40w/model-5pred-2cues-500trials-40weight-579358.ckpt


[2017-07-26 03:52:39,202] Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/5preds-40w/model-5pred-2cues-500trials-40weight-579358.ckpt


0
22
46
73
96
done


In [4]:
# reset
%reset -f
from __future__ import print_function

import pickle
import os
import numpy as np
import tempfile
import tensorflow as tf
import matplotlib.pyplot as plt

from tf_rl.controller import DiscreteDeepQ, ModelController
from tf_rl.simulation import KarpathyGame
from tf_rl import simulate
from tf_rl.models import MLP
from collections import OrderedDict
from euclid import Vector2

from baselines import deepq
import baselines.common.tf_util as U
from baselines.deepq.replay_buffer import ReplayBuffer
from baselines.common.schedules import LinearSchedule
tf.reset_default_graph()


# doing stuff
current_settings = {
    #earlier objects are eaten by later objects (pred eat prey)
    'objects': [
        'prey',
        'pred',
        'cue',
    ],
    'colors': {
        'prey': [212, 211, 208],
        'pred':  [100, 37, 0],
        'cue': [0,0,0],
    },
    'object_reward': {
        'prey': {'prey': 0.1, 'pred': -0.1, 'cue': 0.0},
        'pred': {'prey': 1.0, 'pred': -1.0, 'cue': 0.0},
    },
    'hero_bounces_off_walls': False,
    'world_size': (500,300),   
    "maximum_velocity":      {'prey': 0, 'pred': 50},
    "object_radius": 10.0,
    "cue_types": 2,
    "num_objects": OrderedDict([('prey', 5), ('pred', 5), ('cue', 1)]),
    # active means that the objects are learning
    "num_objects_active": OrderedDict([('prey', 0), ('pred', 5)]), 
    #'multiple' to create each DQN for each prey/predator
    #'one' to use one DQN for all preys/predators
    # only really matters if the preys/predators are active
    "network_prey": 'one',
    "network_pred": 'multiple',
    "num_observation_lines" : 32,
    "observation_line_length": 75.,
    "tolerable_distance_to_wall": 50,
    "wall_distance_penalty":  -1.0,
    "delta_v": 50
}

#'new' to create new sim with values above
#'load' to load a previously trained graph
RUN = 'load'  

# First three for names for saving new runs
MODEL_NAME = 'model-5preds50-load-100weight-c'
REPLAY_NAME = 'replay-5preds50-load-100weight-c'
ELAPSE_NAME = 'elapse-5preds50-load-100weight-c'
REWARDS_NAME = 'rewards-5preds50-load-100weight-c'
COLLISIONS_NAME = 'collsions-5preds50-load-100weight-c'

# Last two for names for reloading model/replay buffers
MODEL_RE = '5preds-50w/model-5pred-2cues-500trials-50weight-516364.ckpt'
REPLAY_RE = '5preds-50w/replay-5pred-2cues-500trials-50weight-516364.pkl'

# create the game simulator
g = KarpathyGame(current_settings)

tf.reset_default_graph()

all_act = []
all_train = []
all_update = []
all_debug = []
all_replay = []

# Build graphs
if current_settings['num_objects_active']['pred'] != 0:
    if current_settings['network_pred'] == 'one':
        network_pred = 1
    else:
        network_pred = current_settings['num_objects_active']['pred']

    for i in range(network_pred):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            brain_pred = deepq.models.mlp([200, 200])
            act, train, update_target, debug = deepq.build_train(
                make_obs_ph=lambda name: U.BatchInput((g.observation_size,), name=name),
                q_func=brain_pred,
                num_actions=g.num_actions,
                optimizer=tf.train.AdamOptimizer(learning_rate=5e-4),
            )
        replay_buffer = ReplayBuffer(50000)
        all_replay.append(replay_buffer)
        all_act.append(act)
        all_train.append(train)
        all_update.append(update_target)
        all_debug.append(debug)
        
FPS          = 30
ACTION_EVERY = 3
    
fast_mode = False
if fast_mode:
    WAIT, VISUALIZE_EVERY = False, 100
else:
    WAIT, VISUALIZE_EVERY = True, 1

elapsed = []
rewards = []
timesteps = [0]
    
# Initializing or reloading variables
# Start TensorFlow session with 2 CPUs
with U.make_session(2) as sess:
    
    # Initialize the parameters and copy them to the target network.
    U.initialize()
    for i in range(current_settings['num_objects_active']['prey']):
        name = 'prey' + str(i)
        with tf.variable_scope(name):
            update_target()
    for i in range(current_settings['num_objects_active']['pred']):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            update_target()
            
    if RUN == 'load':
#         # when only restoring a subset of variables
#         restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope = 'pred0')
#         saver = tf.train.Saver(restore)

        # reload models
        saver = tf.train.Saver()
        current_dir = os.getcwd()
        model_name = current_dir + '/saved_graphs/' + MODEL_RE 
        saver.restore(sess, model_name)
        # reload replay buffers
        replay_name = current_dir + '/saved_graphs/' + REPLAY_RE 
        with open(replay_name, 'rb') as f:
            all_replay = pickle.load(f)
            
#         # remember to append buffer if restoring a subset of variables
#         all_replay.append(replay_buffer)
    
    # Run simulation
    try:
        simulate(simulation=g,
                 replay = all_replay,
                 act = all_act,
                 train = all_train,
                 update = all_update,
                 debug = all_debug,
                 fps=FPS,
                 visualize_every=VISUALIZE_EVERY,
                 action_every=ACTION_EVERY,
                 wait=WAIT,
                 disable_training=False,
                 simulation_resolution=.001,
                 save_path=None,
                 timesteps = timesteps,
                 elapsed = elapsed,
                 all_rewards = rewards,
                 percent = 1)
        
    except KeyboardInterrupt:
        print("Interrupted")
        g.shut_down_graphics()
        print('graphics shut down')
        
    # Save trial times
    elapse_name = 'saved_graphs/' + ELAPSE_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(elapse_name, "wb") as f:
        pickle.dump(elapsed, f)
        
    # Save rewards
    rewards_name = 'saved_graphs/' + REWARDS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(rewards_name, "wb") as f:
        pickle.dump(rewards, f)
        
    # Save collisions
    collisions_name = 'saved_graphs/' + COLLISIONS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(collisions_name, "wb") as f:
        pickle.dump(g.collisions, f)



[2017-07-26 03:57:01,962] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 03:57:02,028] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 03:57:02,446] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 03:57:02,511] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 03:57:02,935] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 03:57:03,007] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 03:57:03,425] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 03:57:03,499] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 03:57:03,930] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 03:57:04,012] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.


INFO:tensorflow:Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/5preds-50w/model-5pred-2cues-500trials-50weight-516364.ckpt


[2017-07-26 03:57:05,995] Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/5preds-50w/model-5pred-2cues-500trials-50weight-516364.ckpt


0
17
50
75
done


In [5]:
# reset
%reset -f
from __future__ import print_function

import pickle
import os
import numpy as np
import tempfile
import tensorflow as tf
import matplotlib.pyplot as plt

from tf_rl.controller import DiscreteDeepQ, ModelController
from tf_rl.simulation import KarpathyGame
from tf_rl import simulate
from tf_rl.models import MLP
from collections import OrderedDict
from euclid import Vector2

from baselines import deepq
import baselines.common.tf_util as U
from baselines.deepq.replay_buffer import ReplayBuffer
from baselines.common.schedules import LinearSchedule
tf.reset_default_graph()


# doing stuff
current_settings = {
    #earlier objects are eaten by later objects (pred eat prey)
    'objects': [
        'prey',
        'pred',
        'cue',
    ],
    'colors': {
        'prey': [212, 211, 208],
        'pred':  [100, 37, 0],
        'cue': [0,0,0],
    },
    'object_reward': {
        'prey': {'prey': 0.1, 'pred': -0.1, 'cue': 0.0},
        'pred': {'prey': 1.0, 'pred': -1.0, 'cue': 0.0},
    },
    'hero_bounces_off_walls': False,
    'world_size': (500,300),   
    "maximum_velocity":      {'prey': 0, 'pred': 50},
    "object_radius": 10.0,
    "cue_types": 2,
    "num_objects": OrderedDict([('prey', 5), ('pred', 5), ('cue', 1)]),
    # active means that the objects are learning
    "num_objects_active": OrderedDict([('prey', 0), ('pred', 5)]), 
    #'multiple' to create each DQN for each prey/predator
    #'one' to use one DQN for all preys/predators
    # only really matters if the preys/predators are active
    "network_prey": 'one',
    "network_pred": 'multiple',
    "num_observation_lines" : 32,
    "observation_line_length": 75.,
    "tolerable_distance_to_wall": 50,
    "wall_distance_penalty":  -1.0,
    "delta_v": 50
}

#'new' to create new sim with values above
#'load' to load a previously trained graph
RUN = 'load'  

# First three for names for saving new runs
MODEL_NAME = 'model-5preds60-load-100weight-c'
REPLAY_NAME = 'replay-5preds60-load-100weight-c'
ELAPSE_NAME = 'elapse-5preds60-load-100weight-c'
REWARDS_NAME = 'rewards-5preds60-load-100weight-c'
COLLISIONS_NAME = 'collsions-5preds60-load-100weight-c'

# Last two for names for reloading model/replay buffers
MODEL_RE = '5preds-60w/model-5pred-2cues-500trials-60weight-343942.ckpt'
REPLAY_RE = '5preds-60w/replay-5pred-2cues-500trials-60weight-343942.pkl'

# create the game simulator
g = KarpathyGame(current_settings)

tf.reset_default_graph()

all_act = []
all_train = []
all_update = []
all_debug = []
all_replay = []

# Build graphs
if current_settings['num_objects_active']['pred'] != 0:
    if current_settings['network_pred'] == 'one':
        network_pred = 1
    else:
        network_pred = current_settings['num_objects_active']['pred']

    for i in range(network_pred):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            brain_pred = deepq.models.mlp([200, 200])
            act, train, update_target, debug = deepq.build_train(
                make_obs_ph=lambda name: U.BatchInput((g.observation_size,), name=name),
                q_func=brain_pred,
                num_actions=g.num_actions,
                optimizer=tf.train.AdamOptimizer(learning_rate=5e-4),
            )
        replay_buffer = ReplayBuffer(50000)
        all_replay.append(replay_buffer)
        all_act.append(act)
        all_train.append(train)
        all_update.append(update_target)
        all_debug.append(debug)
        
FPS          = 30
ACTION_EVERY = 3
    
fast_mode = False
if fast_mode:
    WAIT, VISUALIZE_EVERY = False, 100
else:
    WAIT, VISUALIZE_EVERY = True, 1

elapsed = []
rewards = []
timesteps = [0]
    
# Initializing or reloading variables
# Start TensorFlow session with 2 CPUs
with U.make_session(2) as sess:
    
    # Initialize the parameters and copy them to the target network.
    U.initialize()
    for i in range(current_settings['num_objects_active']['prey']):
        name = 'prey' + str(i)
        with tf.variable_scope(name):
            update_target()
    for i in range(current_settings['num_objects_active']['pred']):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            update_target()
            
    if RUN == 'load':
#         # when only restoring a subset of variables
#         restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope = 'pred0')
#         saver = tf.train.Saver(restore)

        # reload models
        saver = tf.train.Saver()
        current_dir = os.getcwd()
        model_name = current_dir + '/saved_graphs/' + MODEL_RE 
        saver.restore(sess, model_name)
        # reload replay buffers
        replay_name = current_dir + '/saved_graphs/' + REPLAY_RE 
        with open(replay_name, 'rb') as f:
            all_replay = pickle.load(f)
            
#         # remember to append buffer if restoring a subset of variables
#         all_replay.append(replay_buffer)
    
    # Run simulation
    try:
        simulate(simulation=g,
                 replay = all_replay,
                 act = all_act,
                 train = all_train,
                 update = all_update,
                 debug = all_debug,
                 fps=FPS,
                 visualize_every=VISUALIZE_EVERY,
                 action_every=ACTION_EVERY,
                 wait=WAIT,
                 disable_training=False,
                 simulation_resolution=.001,
                 save_path=None,
                 timesteps = timesteps,
                 elapsed = elapsed,
                 all_rewards = rewards,
                 percent = 1)
        
    except KeyboardInterrupt:
        print("Interrupted")
        g.shut_down_graphics()
        print('graphics shut down')
        
    # Save trial times
    elapse_name = 'saved_graphs/' + ELAPSE_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(elapse_name, "wb") as f:
        pickle.dump(elapsed, f)
        
    # Save rewards
    rewards_name = 'saved_graphs/' + REWARDS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(rewards_name, "wb") as f:
        pickle.dump(rewards, f)
        
    # Save collisions
    collisions_name = 'saved_graphs/' + COLLISIONS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(collisions_name, "wb") as f:
        pickle.dump(g.collisions, f)



[2017-07-26 04:00:56,327] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:00:56,393] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:00:56,810] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:00:56,876] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:00:57,291] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:00:57,363] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:00:57,792] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:00:57,860] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:00:58,277] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:00:58,345] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.


INFO:tensorflow:Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/5preds-60w/model-5pred-2cues-500trials-60weight-343942.ckpt


[2017-07-26 04:01:00,407] Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/5preds-60w/model-5pred-2cues-500trials-60weight-343942.ckpt


0
17
40
81
done


In [None]:
# reset
%reset -f
from __future__ import print_function

import pickle
import os
import numpy as np
import tempfile
import tensorflow as tf
import matplotlib.pyplot as plt

from tf_rl.controller import DiscreteDeepQ, ModelController
from tf_rl.simulation import KarpathyGame
from tf_rl import simulate
from tf_rl.models import MLP
from collections import OrderedDict
from euclid import Vector2

from baselines import deepq
import baselines.common.tf_util as U
from baselines.deepq.replay_buffer import ReplayBuffer
from baselines.common.schedules import LinearSchedule
tf.reset_default_graph()


# doing stuff
current_settings = {
    #earlier objects are eaten by later objects (pred eat prey)
    'objects': [
        'prey',
        'pred',
        'cue',
    ],
    'colors': {
        'prey': [212, 211, 208],
        'pred':  [100, 37, 0],
        'cue': [0,0,0],
    },
    'object_reward': {
        'prey': {'prey': 0.1, 'pred': -0.1, 'cue': 0.0},
        'pred': {'prey': 1.0, 'pred': -1.0, 'cue': 0.0},
    },
    'hero_bounces_off_walls': False,
    'world_size': (500,300),   
    "maximum_velocity":      {'prey': 0, 'pred': 50},
    "object_radius": 10.0,
    "cue_types": 2,
    "num_objects": OrderedDict([('prey', 5), ('pred', 5), ('cue', 1)]),
    # active means that the objects are learning
    "num_objects_active": OrderedDict([('prey', 0), ('pred', 5)]), 
    #'multiple' to create each DQN for each prey/predator
    #'one' to use one DQN for all preys/predators
    # only really matters if the preys/predators are active
    "network_prey": 'one',
    "network_pred": 'multiple',
    "num_observation_lines" : 32,
    "observation_line_length": 75.,
    "tolerable_distance_to_wall": 50,
    "wall_distance_penalty":  -1.0,
    "delta_v": 50
}

#'new' to create new sim with values above
#'load' to load a previously trained graph
RUN = 'load'  

# First three for names for saving new runs
MODEL_NAME = 'model-5preds70-load-100weight-c'
REPLAY_NAME = 'replay-5preds70-load-100weight-c'
ELAPSE_NAME = 'elapse-5preds70-load-100weight-c'
REWARDS_NAME = 'rewards-5preds70-load-100weight-c'
COLLISIONS_NAME = 'collsions-5preds70-load-100weight-c'

# Last two for names for reloading model/replay buffers
MODEL_RE = '5preds-70w/model-5pred-2cues-500trials-70weight-271102.ckpt'
REPLAY_RE = '5preds-70w/replay-5pred-2cues-500trials-70weight-271102.pkl'

# create the game simulator
g = KarpathyGame(current_settings)

tf.reset_default_graph()

all_act = []
all_train = []
all_update = []
all_debug = []
all_replay = []

# Build graphs
if current_settings['num_objects_active']['pred'] != 0:
    if current_settings['network_pred'] == 'one':
        network_pred = 1
    else:
        network_pred = current_settings['num_objects_active']['pred']

    for i in range(network_pred):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            brain_pred = deepq.models.mlp([200, 200])
            act, train, update_target, debug = deepq.build_train(
                make_obs_ph=lambda name: U.BatchInput((g.observation_size,), name=name),
                q_func=brain_pred,
                num_actions=g.num_actions,
                optimizer=tf.train.AdamOptimizer(learning_rate=5e-4),
            )
        replay_buffer = ReplayBuffer(50000)
        all_replay.append(replay_buffer)
        all_act.append(act)
        all_train.append(train)
        all_update.append(update_target)
        all_debug.append(debug)
        
FPS          = 30
ACTION_EVERY = 3
    
fast_mode = False
if fast_mode:
    WAIT, VISUALIZE_EVERY = False, 100
else:
    WAIT, VISUALIZE_EVERY = True, 1

elapsed = []
rewards = []
timesteps = [0]
    
# Initializing or reloading variables
# Start TensorFlow session with 2 CPUs
with U.make_session(2) as sess:
    
    # Initialize the parameters and copy them to the target network.
    U.initialize()
    for i in range(current_settings['num_objects_active']['prey']):
        name = 'prey' + str(i)
        with tf.variable_scope(name):
            update_target()
    for i in range(current_settings['num_objects_active']['pred']):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            update_target()
            
    if RUN == 'load':
#         # when only restoring a subset of variables
#         restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope = 'pred0')
#         saver = tf.train.Saver(restore)

        # reload models
        saver = tf.train.Saver()
        current_dir = os.getcwd()
        model_name = current_dir + '/saved_graphs/' + MODEL_RE 
        saver.restore(sess, model_name)
        # reload replay buffers
        replay_name = current_dir + '/saved_graphs/' + REPLAY_RE 
        with open(replay_name, 'rb') as f:
            all_replay = pickle.load(f)
            
#         # remember to append buffer if restoring a subset of variables
#         all_replay.append(replay_buffer)
    
    # Run simulation
    try:
        simulate(simulation=g,
                 replay = all_replay,
                 act = all_act,
                 train = all_train,
                 update = all_update,
                 debug = all_debug,
                 fps=FPS,
                 visualize_every=VISUALIZE_EVERY,
                 action_every=ACTION_EVERY,
                 wait=WAIT,
                 disable_training=False,
                 simulation_resolution=.001,
                 save_path=None,
                 timesteps = timesteps,
                 elapsed = elapsed,
                 all_rewards = rewards,
                 percent = 1)
        
    except KeyboardInterrupt:
        print("Interrupted")
        g.shut_down_graphics()
        print('graphics shut down')
        
    # Save trial times
    elapse_name = 'saved_graphs/' + ELAPSE_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(elapse_name, "wb") as f:
        pickle.dump(elapsed, f)
        
    # Save rewards
    rewards_name = 'saved_graphs/' + REWARDS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(rewards_name, "wb") as f:
        pickle.dump(rewards, f)
        
    # Save collisions
    collisions_name = 'saved_graphs/' + COLLISIONS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(collisions_name, "wb") as f:
        pickle.dump(g.collisions, f)

In [6]:
# reset
%reset -f
from __future__ import print_function

import pickle
import os
import numpy as np
import tempfile
import tensorflow as tf
import matplotlib.pyplot as plt

from tf_rl.controller import DiscreteDeepQ, ModelController
from tf_rl.simulation import KarpathyGame
from tf_rl import simulate
from tf_rl.models import MLP
from collections import OrderedDict
from euclid import Vector2

from baselines import deepq
import baselines.common.tf_util as U
from baselines.deepq.replay_buffer import ReplayBuffer
from baselines.common.schedules import LinearSchedule
tf.reset_default_graph()


# doing stuff
current_settings = {
    #earlier objects are eaten by later objects (pred eat prey)
    'objects': [
        'prey',
        'pred',
        'cue',
    ],
    'colors': {
        'prey': [212, 211, 208],
        'pred':  [100, 37, 0],
        'cue': [0,0,0],
    },
    'object_reward': {
        'prey': {'prey': 0.1, 'pred': -0.1, 'cue': 0.0},
        'pred': {'prey': 1.0, 'pred': -1.0, 'cue': 0.0},
    },
    'hero_bounces_off_walls': False,
    'world_size': (500,300),   
    "maximum_velocity":      {'prey': 0, 'pred': 50},
    "object_radius": 10.0,
    "cue_types": 2,
    "num_objects": OrderedDict([('prey', 5), ('pred', 5), ('cue', 1)]),
    # active means that the objects are learning
    "num_objects_active": OrderedDict([('prey', 0), ('pred', 5)]), 
    #'multiple' to create each DQN for each prey/predator
    #'one' to use one DQN for all preys/predators
    # only really matters if the preys/predators are active
    "network_prey": 'one',
    "network_pred": 'multiple',
    "num_observation_lines" : 32,
    "observation_line_length": 75.,
    "tolerable_distance_to_wall": 50,
    "wall_distance_penalty":  -1.0,
    "delta_v": 50
}

#'new' to create new sim with values above
#'load' to load a previously trained graph
RUN = 'load'  

# First three for names for saving new runs
MODEL_NAME = 'model-5preds80-load-100weight-c'
REPLAY_NAME = 'replay-5preds80-load-100weight-c'
ELAPSE_NAME = 'elapse-5preds80-load-100weight-c'
REWARDS_NAME = 'rewards-5preds80-load-100weight-c'
COLLISIONS_NAME = 'collsions-5preds80-load-100weight-c'

# Last two for names for reloading model/replay buffers
MODEL_RE = '5preds-80w/model-5pred-2cues-500trials-80weight-238297.ckpt'
REPLAY_RE = '5preds-80w/replay-5pred-2cues-500trials-80weight-238297.pkl'

# create the game simulator
g = KarpathyGame(current_settings)

tf.reset_default_graph()

all_act = []
all_train = []
all_update = []
all_debug = []
all_replay = []

# Build graphs
if current_settings['num_objects_active']['pred'] != 0:
    if current_settings['network_pred'] == 'one':
        network_pred = 1
    else:
        network_pred = current_settings['num_objects_active']['pred']

    for i in range(network_pred):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            brain_pred = deepq.models.mlp([200, 200])
            act, train, update_target, debug = deepq.build_train(
                make_obs_ph=lambda name: U.BatchInput((g.observation_size,), name=name),
                q_func=brain_pred,
                num_actions=g.num_actions,
                optimizer=tf.train.AdamOptimizer(learning_rate=5e-4),
            )
        replay_buffer = ReplayBuffer(50000)
        all_replay.append(replay_buffer)
        all_act.append(act)
        all_train.append(train)
        all_update.append(update_target)
        all_debug.append(debug)
        
FPS          = 30
ACTION_EVERY = 3
    
fast_mode = False
if fast_mode:
    WAIT, VISUALIZE_EVERY = False, 100
else:
    WAIT, VISUALIZE_EVERY = True, 1

elapsed = []
rewards = []
timesteps = [0]
    
# Initializing or reloading variables
# Start TensorFlow session with 2 CPUs
with U.make_session(2) as sess:
    
    # Initialize the parameters and copy them to the target network.
    U.initialize()
    for i in range(current_settings['num_objects_active']['prey']):
        name = 'prey' + str(i)
        with tf.variable_scope(name):
            update_target()
    for i in range(current_settings['num_objects_active']['pred']):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            update_target()
            
    if RUN == 'load':
#         # when only restoring a subset of variables
#         restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope = 'pred0')
#         saver = tf.train.Saver(restore)

        # reload models
        saver = tf.train.Saver()
        current_dir = os.getcwd()
        model_name = current_dir + '/saved_graphs/' + MODEL_RE 
        saver.restore(sess, model_name)
        # reload replay buffers
        replay_name = current_dir + '/saved_graphs/' + REPLAY_RE 
        with open(replay_name, 'rb') as f:
            all_replay = pickle.load(f)
            
#         # remember to append buffer if restoring a subset of variables
#         all_replay.append(replay_buffer)
    
    # Run simulation
    try:
        simulate(simulation=g,
                 replay = all_replay,
                 act = all_act,
                 train = all_train,
                 update = all_update,
                 debug = all_debug,
                 fps=FPS,
                 visualize_every=VISUALIZE_EVERY,
                 action_every=ACTION_EVERY,
                 wait=WAIT,
                 disable_training=False,
                 simulation_resolution=.001,
                 save_path=None,
                 timesteps = timesteps,
                 elapsed = elapsed,
                 all_rewards = rewards,
                 percent = 1)
        
    except KeyboardInterrupt:
        print("Interrupted")
        g.shut_down_graphics()
        print('graphics shut down')
        
    # Save trial times
    elapse_name = 'saved_graphs/' + ELAPSE_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(elapse_name, "wb") as f:
        pickle.dump(elapsed, f)
        
    # Save rewards
    rewards_name = 'saved_graphs/' + REWARDS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(rewards_name, "wb") as f:
        pickle.dump(rewards, f)
        
    # Save collisions
    collisions_name = 'saved_graphs/' + COLLISIONS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(collisions_name, "wb") as f:
        pickle.dump(g.collisions, f)



[2017-07-26 04:04:54,338] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:04:54,408] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:04:54,813] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:04:54,880] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:04:55,312] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:04:55,381] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:04:55,804] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:04:55,868] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:04:56,293] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:04:56,374] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.


INFO:tensorflow:Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/5preds-80w/model-5pred-2cues-500trials-80weight-238297.ckpt


[2017-07-26 04:04:58,493] Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/5preds-80w/model-5pred-2cues-500trials-80weight-238297.ckpt


0
31
62
92
done


In [7]:
# reset
%reset -f
from __future__ import print_function

import pickle
import os
import numpy as np
import tempfile
import tensorflow as tf
import matplotlib.pyplot as plt

from tf_rl.controller import DiscreteDeepQ, ModelController
from tf_rl.simulation import KarpathyGame
from tf_rl import simulate
from tf_rl.models import MLP
from collections import OrderedDict
from euclid import Vector2

from baselines import deepq
import baselines.common.tf_util as U
from baselines.deepq.replay_buffer import ReplayBuffer
from baselines.common.schedules import LinearSchedule
tf.reset_default_graph()


# doing stuff
current_settings = {
    #earlier objects are eaten by later objects (pred eat prey)
    'objects': [
        'prey',
        'pred',
        'cue',
    ],
    'colors': {
        'prey': [212, 211, 208],
        'pred':  [100, 37, 0],
        'cue': [0,0,0],
    },
    'object_reward': {
        'prey': {'prey': 0.1, 'pred': -0.1, 'cue': 0.0},
        'pred': {'prey': 1.0, 'pred': -1.0, 'cue': 0.0},
    },
    'hero_bounces_off_walls': False,
    'world_size': (500,300),   
    "maximum_velocity":      {'prey': 0, 'pred': 50},
    "object_radius": 10.0,
    "cue_types": 2,
    "num_objects": OrderedDict([('prey', 5), ('pred', 5), ('cue', 1)]),
    # active means that the objects are learning
    "num_objects_active": OrderedDict([('prey', 0), ('pred', 5)]), 
    #'multiple' to create each DQN for each prey/predator
    #'one' to use one DQN for all preys/predators
    # only really matters if the preys/predators are active
    "network_prey": 'one',
    "network_pred": 'multiple',
    "num_observation_lines" : 32,
    "observation_line_length": 75.,
    "tolerable_distance_to_wall": 50,
    "wall_distance_penalty":  -1.0,
    "delta_v": 50
}

#'new' to create new sim with values above
#'load' to load a previously trained graph
RUN = 'load'  

# First three for names for saving new runs
MODEL_NAME = 'model-5preds90-load-100weight-c'
REPLAY_NAME = 'replay-5preds90-load-100weight-c'
ELAPSE_NAME = 'elapse-5preds90-load-100weight-c'
REWARDS_NAME = 'rewards-5preds90-load-100weight-c'
COLLISIONS_NAME = 'collsions-5preds90-load-100weight-c'

# Last two for names for reloading model/replay buffers
MODEL_RE = '5preds-90w/model-5pred-2cues-500trials-90weight-180703.ckpt'
REPLAY_RE = '5preds-90w/replay-5pred-2cues-500trials-90weight-180703.pkl'

# create the game simulator
g = KarpathyGame(current_settings)

tf.reset_default_graph()

all_act = []
all_train = []
all_update = []
all_debug = []
all_replay = []

# Build graphs
if current_settings['num_objects_active']['pred'] != 0:
    if current_settings['network_pred'] == 'one':
        network_pred = 1
    else:
        network_pred = current_settings['num_objects_active']['pred']

    for i in range(network_pred):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            brain_pred = deepq.models.mlp([200, 200])
            act, train, update_target, debug = deepq.build_train(
                make_obs_ph=lambda name: U.BatchInput((g.observation_size,), name=name),
                q_func=brain_pred,
                num_actions=g.num_actions,
                optimizer=tf.train.AdamOptimizer(learning_rate=5e-4),
            )
        replay_buffer = ReplayBuffer(50000)
        all_replay.append(replay_buffer)
        all_act.append(act)
        all_train.append(train)
        all_update.append(update_target)
        all_debug.append(debug)
        
FPS          = 30
ACTION_EVERY = 3
    
fast_mode = False
if fast_mode:
    WAIT, VISUALIZE_EVERY = False, 100
else:
    WAIT, VISUALIZE_EVERY = True, 1

elapsed = []
rewards = []
timesteps = [0]
    
# Initializing or reloading variables
# Start TensorFlow session with 2 CPUs
with U.make_session(2) as sess:
    
    # Initialize the parameters and copy them to the target network.
    U.initialize()
    for i in range(current_settings['num_objects_active']['prey']):
        name = 'prey' + str(i)
        with tf.variable_scope(name):
            update_target()
    for i in range(current_settings['num_objects_active']['pred']):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            update_target()
            
    if RUN == 'load':
#         # when only restoring a subset of variables
#         restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope = 'pred0')
#         saver = tf.train.Saver(restore)

        # reload models
        saver = tf.train.Saver()
        current_dir = os.getcwd()
        model_name = current_dir + '/saved_graphs/' + MODEL_RE 
        saver.restore(sess, model_name)
        # reload replay buffers
        replay_name = current_dir + '/saved_graphs/' + REPLAY_RE 
        with open(replay_name, 'rb') as f:
            all_replay = pickle.load(f)
            
#         # remember to append buffer if restoring a subset of variables
#         all_replay.append(replay_buffer)
    
    # Run simulation
    try:
        simulate(simulation=g,
                 replay = all_replay,
                 act = all_act,
                 train = all_train,
                 update = all_update,
                 debug = all_debug,
                 fps=FPS,
                 visualize_every=VISUALIZE_EVERY,
                 action_every=ACTION_EVERY,
                 wait=WAIT,
                 disable_training=False,
                 simulation_resolution=.001,
                 save_path=None,
                 timesteps = timesteps,
                 elapsed = elapsed,
                 all_rewards = rewards,
                 percent = 1)
        
    except KeyboardInterrupt:
        print("Interrupted")
        g.shut_down_graphics()
        print('graphics shut down')
        
    # Save trial times
    elapse_name = 'saved_graphs/' + ELAPSE_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(elapse_name, "wb") as f:
        pickle.dump(elapsed, f)
        
    # Save rewards
    rewards_name = 'saved_graphs/' + REWARDS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(rewards_name, "wb") as f:
        pickle.dump(rewards, f)
        
    # Save collisions
    collisions_name = 'saved_graphs/' + COLLISIONS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(collisions_name, "wb") as f:
        pickle.dump(g.collisions, f)



[2017-07-26 04:08:30,329] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:08:30,395] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:08:30,810] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:08:30,875] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:08:31,290] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:08:31,359] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:08:31,784] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:08:31,853] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:08:32,287] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:08:32,364] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.


INFO:tensorflow:Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/5preds-90w/model-5pred-2cues-500trials-90weight-180703.ckpt


[2017-07-26 04:08:34,112] Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/5preds-90w/model-5pred-2cues-500trials-90weight-180703.ckpt


0
33
58
97
done


In [8]:
# reset
%reset -f
from __future__ import print_function

import pickle
import os
import numpy as np
import tempfile
import tensorflow as tf
import matplotlib.pyplot as plt

from tf_rl.controller import DiscreteDeepQ, ModelController
from tf_rl.simulation import KarpathyGame
from tf_rl import simulate
from tf_rl.models import MLP
from collections import OrderedDict
from euclid import Vector2

from baselines import deepq
import baselines.common.tf_util as U
from baselines.deepq.replay_buffer import ReplayBuffer
from baselines.common.schedules import LinearSchedule
tf.reset_default_graph()


# doing stuff
current_settings = {
    #earlier objects are eaten by later objects (pred eat prey)
    'objects': [
        'prey',
        'pred',
        'cue',
    ],
    'colors': {
        'prey': [212, 211, 208],
        'pred':  [100, 37, 0],
        'cue': [0,0,0],
    },
    'object_reward': {
        'prey': {'prey': 0.1, 'pred': -0.1, 'cue': 0.0},
        'pred': {'prey': 1.0, 'pred': -1.0, 'cue': 0.0},
    },
    'hero_bounces_off_walls': False,
    'world_size': (500,300),   
    "maximum_velocity":      {'prey': 0, 'pred': 50},
    "object_radius": 10.0,
    "cue_types": 2,
    "num_objects": OrderedDict([('prey', 5), ('pred', 5), ('cue', 1)]),
    # active means that the objects are learning
    "num_objects_active": OrderedDict([('prey', 0), ('pred', 5)]), 
    #'multiple' to create each DQN for each prey/predator
    #'one' to use one DQN for all preys/predators
    # only really matters if the preys/predators are active
    "network_prey": 'one',
    "network_pred": 'multiple',
    "num_observation_lines" : 32,
    "observation_line_length": 75.,
    "tolerable_distance_to_wall": 50,
    "wall_distance_penalty":  -1.0,
    "delta_v": 50
}

#'new' to create new sim with values above
#'load' to load a previously trained graph
RUN = 'load'  

# First three for names for saving new runs
MODEL_NAME = 'model-5preds100-load-100weight-c'
REPLAY_NAME = 'replay-5preds100-load-100weight-c'
ELAPSE_NAME = 'elapse-5preds100-load-100weight-c'
REWARDS_NAME = 'rewards-5preds100-load-100weight-c'
COLLISIONS_NAME = 'collsions-5preds100-load-100weight-c'

# Last two for names for reloading model/replay buffers
MODEL_RE = '5preds-free/model-5pred-2cues-500trials-189205.ckpt'
REPLAY_RE = '5preds-free/replay-5pred-2cues-500trials-189205.pkl'

# create the game simulator
g = KarpathyGame(current_settings)

tf.reset_default_graph()

all_act = []
all_train = []
all_update = []
all_debug = []
all_replay = []

# Build graphs
if current_settings['num_objects_active']['pred'] != 0:
    if current_settings['network_pred'] == 'one':
        network_pred = 1
    else:
        network_pred = current_settings['num_objects_active']['pred']

    for i in range(network_pred):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            brain_pred = deepq.models.mlp([200, 200])
            act, train, update_target, debug = deepq.build_train(
                make_obs_ph=lambda name: U.BatchInput((g.observation_size,), name=name),
                q_func=brain_pred,
                num_actions=g.num_actions,
                optimizer=tf.train.AdamOptimizer(learning_rate=5e-4),
            )
        replay_buffer = ReplayBuffer(50000)
        all_replay.append(replay_buffer)
        all_act.append(act)
        all_train.append(train)
        all_update.append(update_target)
        all_debug.append(debug)
        
FPS          = 30
ACTION_EVERY = 3
    
fast_mode = False
if fast_mode:
    WAIT, VISUALIZE_EVERY = False, 100
else:
    WAIT, VISUALIZE_EVERY = True, 1

elapsed = []
rewards = []
timesteps = [0]
    
# Initializing or reloading variables
# Start TensorFlow session with 2 CPUs
with U.make_session(2) as sess:
    
    # Initialize the parameters and copy them to the target network.
    U.initialize()
    for i in range(current_settings['num_objects_active']['prey']):
        name = 'prey' + str(i)
        with tf.variable_scope(name):
            update_target()
    for i in range(current_settings['num_objects_active']['pred']):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            update_target()
            
    if RUN == 'load':
#         # when only restoring a subset of variables
#         restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope = 'pred0')
#         saver = tf.train.Saver(restore)

        # reload models
        saver = tf.train.Saver()
        current_dir = os.getcwd()
        model_name = current_dir + '/saved_graphs/' + MODEL_RE 
        saver.restore(sess, model_name)
        # reload replay buffers
        replay_name = current_dir + '/saved_graphs/' + REPLAY_RE 
        with open(replay_name, 'rb') as f:
            all_replay = pickle.load(f)
            
#         # remember to append buffer if restoring a subset of variables
#         all_replay.append(replay_buffer)
    
    # Run simulation
    try:
        simulate(simulation=g,
                 replay = all_replay,
                 act = all_act,
                 train = all_train,
                 update = all_update,
                 debug = all_debug,
                 fps=FPS,
                 visualize_every=VISUALIZE_EVERY,
                 action_every=ACTION_EVERY,
                 wait=WAIT,
                 disable_training=False,
                 simulation_resolution=.001,
                 save_path=None,
                 timesteps = timesteps,
                 elapsed = elapsed,
                 all_rewards = rewards,
                 percent = 1)
        
    except KeyboardInterrupt:
        print("Interrupted")
        g.shut_down_graphics()
        print('graphics shut down')
        
    # Save trial times
    elapse_name = 'saved_graphs/' + ELAPSE_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(elapse_name, "wb") as f:
        pickle.dump(elapsed, f)
        
    # Save rewards
    rewards_name = 'saved_graphs/' + REWARDS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(rewards_name, "wb") as f:
        pickle.dump(rewards, f)
        
    # Save collisions
    collisions_name = 'saved_graphs/' + COLLISIONS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(collisions_name, "wb") as f:
        pickle.dump(g.collisions, f)



[2017-07-26 04:11:53,502] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:11:53,569] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:11:53,974] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:11:54,041] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:11:54,462] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:11:54,530] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:11:54,951] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:11:55,019] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:11:55,446] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:11:55,529] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.


INFO:tensorflow:Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/5preds-free/model-5pred-2cues-500trials-189205.ckpt


[2017-07-26 04:11:57,275] Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/5preds-free/model-5pred-2cues-500trials-189205.ckpt


0
36
59
94
done


In [9]:
# reset
%reset -f
from __future__ import print_function

import pickle
import os
import numpy as np
import tempfile
import tensorflow as tf
import matplotlib.pyplot as plt

from tf_rl.controller import DiscreteDeepQ, ModelController
from tf_rl.simulation import KarpathyGame
from tf_rl import simulate
from tf_rl.models import MLP
from collections import OrderedDict
from euclid import Vector2

from baselines import deepq
import baselines.common.tf_util as U
from baselines.deepq.replay_buffer import ReplayBuffer
from baselines.common.schedules import LinearSchedule
tf.reset_default_graph()

current_settings = {
    #earlier objects are eaten by later objects (pred eat prey)
    'objects': [
        'prey',
        'pred',
        'cue',
    ],
    'colors': {
        'prey': [212, 211, 208],
        'pred':  [100, 37, 0],
        'cue': [0,0,0],
    },
    'object_reward': {
        'prey': {'prey': 0.1, 'pred': -0.1, 'cue': 0.0},
        'pred': {'prey': 1.0, 'pred': -1.0, 'cue': 0.0},
    },
    'hero_bounces_off_walls': False,
    'world_size': (500,300),   
    "maximum_velocity":      {'prey': 0, 'pred': 50},
    "object_radius": 10.0,
    "cue_types": 2,
    "num_objects": OrderedDict([('prey', 5), ('pred', 5), ('cue', 1)]),
    # active means that the objects are learning
    "num_objects_active": OrderedDict([('prey', 0), ('pred', 5)]), 
    #'multiple' to create each DQN for each prey/predator
    #'one' to use one DQN for all preys/predators
    # only really matters if the preys/predators are active
    "network_prey": 'one',
    "network_pred": 'multiple',
    "num_observation_lines" : 32,
    "observation_line_length": 75.,
    "tolerable_distance_to_wall": 50,
    "wall_distance_penalty":  -1.0,
    "delta_v": 50
}

#'new' to create new sim with values above
#'load' to load a previously trained graph
RUN = 'load'  

# First three for names for saving new runs
MODEL_NAME = 'model-5indeps-100weight-c'
REPLAY_NAME = 'replay-5indeps-100weight-c'
ELAPSE_NAME = 'elapse-5indeps-100weight-c'
REWARDS_NAME = 'rewards-5indeps-100weight-c'
COLLISIONS_NAME = 'collsions-5indeps-load-100weight-c'

# Last two for names for reloading model/replay buffers
MODEL_RE_0 = 'independent-0/model-1pred-2cues-500trials-340708.ckpt'
REPLAY_RE_0 = 'independent-0/replay-1pred-2cues-500trials-340708.pkl'
MODEL_RE_1 = 'independent-1/model-1pred-2cues-500trials-1-361393.ckpt'
REPLAY_RE_1 = 'independent-1/replay-1pred-2cues-500trials-1-361393.pkl'
MODEL_RE_2 = 'independent-2/model-1pred-2cues-500trials-2-327760.ckpt'
REPLAY_RE_2 = 'independent-2/replay-1pred-2cues-500trials-2-327760.pkl'
MODEL_RE_3 = 'independent-3/model-1pred-2cues-500trials-3-209602.ckpt'
REPLAY_RE_3 = 'independent-3/replay-1pred-2cues-500trials-3-209602.pkl'
MODEL_RE_4 = 'independent-4/model-1pred-2cues-500trials-4-361147.ckpt'
REPLAY_RE_4 = 'independent-4/replay-1pred-2cues-500trials-4-361147.pkl'

# create the game simulator
g = KarpathyGame(current_settings)

tf.reset_default_graph()

all_act = []
all_train = []
all_update = []
all_debug = []
all_replay = []

# Build graphs
if current_settings['num_objects_active']['pred'] != 0:
    if current_settings['network_pred'] == 'one':
        network_pred = 1
    else:
        network_pred = current_settings['num_objects_active']['pred']

    for i in range(network_pred):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            brain_pred = deepq.models.mlp([200, 200])
            act, train, update_target, debug = deepq.build_train(
                make_obs_ph=lambda name: U.BatchInput((g.observation_size,), name=name),
                q_func=brain_pred,
                num_actions=g.num_actions,
                optimizer=tf.train.AdamOptimizer(learning_rate=5e-4),
            )
        all_act.append(act)
        all_train.append(train)
        all_update.append(update_target)
        all_debug.append(debug)
        
FPS          = 30
ACTION_EVERY = 3
    
fast_mode = False
if fast_mode:
    WAIT, VISUALIZE_EVERY = False, 100
else:
    WAIT, VISUALIZE_EVERY = True, 1

elapsed = []
rewards = []
timesteps = [0]
    
# Initializing or reloading variables
# Start TensorFlow session with 2 CPUs
with U.make_session(2) as sess:
    
    # Initialize the parameters and copy them to the target network.
    U.initialize()
    for i in range(current_settings['num_objects_active']['prey']):
        name = 'prey' + str(i)
        with tf.variable_scope(name):
            update_target()
    for i in range(current_settings['num_objects_active']['pred']):
        name = 'pred' + str(i)
        with tf.variable_scope(name):
            update_target()
            
    if RUN == 'load':
        current_dir = os.getcwd()
        all_replay = []
        
        # pred0
        restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope = 'pred0')
        saver = tf.train.Saver(restore)
        model_name = current_dir + '/saved_graphs/' + MODEL_RE_0
        saver.restore(sess, model_name)
        replay_name = current_dir + '/saved_graphs/' + REPLAY_RE_0 
        with open(replay_name, 'rb') as f:
            replay_buffer = pickle.load(f)
        all_replay.append(replay_buffer[0])
        
        # pred0
        restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope = 'pred1')
        saver = tf.train.Saver(restore)
        model_name = current_dir + '/saved_graphs/' + MODEL_RE_1 
        saver.restore(sess, model_name)
        replay_name = current_dir + '/saved_graphs/' + REPLAY_RE_1 
        with open(replay_name, 'rb') as f:
            replay_buffer = pickle.load(f)
        all_replay.append(replay_buffer[0])
        
        # pred0
        restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope = 'pred2')
        saver = tf.train.Saver(restore)
        model_name = current_dir + '/saved_graphs/' + MODEL_RE_2 
        saver.restore(sess, model_name)
        replay_name = current_dir + '/saved_graphs/' + REPLAY_RE_2 
        with open(replay_name, 'rb') as f:
            replay_buffer = pickle.load(f)
        all_replay.append(replay_buffer[0])
        
        # pred0
        restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope = 'pred3')
        saver = tf.train.Saver(restore)
        model_name = current_dir + '/saved_graphs/' + MODEL_RE_3 
        saver.restore(sess, model_name)
        replay_name = current_dir + '/saved_graphs/' + REPLAY_RE_3 
        with open(replay_name, 'rb') as f:
            replay_buffer = pickle.load(f)
        all_replay.append(replay_buffer[0])
        
        # pred0
        restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope = 'pred4')
        saver = tf.train.Saver(restore)
        model_name = current_dir + '/saved_graphs/' + MODEL_RE_4 
        saver.restore(sess, model_name)
        replay_name = current_dir + '/saved_graphs/' + REPLAY_RE_4 
        with open(replay_name, 'rb') as f:
            replay_buffer = pickle.load(f)
        all_replay.append(replay_buffer[0])
            

    # Run simulation
    try:
        simulate(simulation=g,
                 replay = all_replay,
                 act = all_act,
                 train = all_train,
                 update = all_update,
                 debug = all_debug,
                 fps=FPS,
                 visualize_every=VISUALIZE_EVERY,
                 action_every=ACTION_EVERY,
                 wait=WAIT,
                 disable_training=False,
                 simulation_resolution=.001,
                 save_path=None,
                 timesteps = timesteps,
                 elapsed = elapsed,
                 all_rewards = rewards,
                 percent = 1)
        
    except KeyboardInterrupt:
        print("Interrupted")
        g.shut_down_graphics()
        print('graphics shut down')
        
        
    # Save trial times
    elapse_name = 'saved_graphs/' + ELAPSE_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(elapse_name, "wb") as f:
        pickle.dump(elapsed, f)
        
    # Save rewards
    rewards_name = 'saved_graphs/' + REWARDS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(rewards_name, "wb") as f:
        pickle.dump(rewards, f)
        
    # Save collisions
    collisions_name = 'saved_graphs/' + COLLISIONS_NAME + '-' + str(timesteps[0]) + '.pkl'
    with open(collisions_name, "wb") as f:
        pickle.dump(g.collisions, f)



[2017-07-26 04:15:16,229] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:15:16,294] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:15:16,713] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:15:16,782] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:15:17,193] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:15:17,259] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:15:17,679] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:15:17,750] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:15:18,185] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.




[2017-07-26 04:15:18,257] VARIABLES collection name is deprecated, please use GLOBAL_VARIABLES instead; VARIABLES will be removed after 2017-03-02.


INFO:tensorflow:Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/independent-0/model-1pred-2cues-500trials-340708.ckpt


[2017-07-26 04:15:19,630] Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/independent-0/model-1pred-2cues-500trials-340708.ckpt


INFO:tensorflow:Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/independent-1/model-1pred-2cues-500trials-1-361393.ckpt


[2017-07-26 04:15:20,476] Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/independent-1/model-1pred-2cues-500trials-1-361393.ckpt


INFO:tensorflow:Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/independent-2/model-1pred-2cues-500trials-2-327760.ckpt


[2017-07-26 04:15:21,334] Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/independent-2/model-1pred-2cues-500trials-2-327760.ckpt


INFO:tensorflow:Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/independent-3/model-1pred-2cues-500trials-3-209602.ckpt


[2017-07-26 04:15:22,243] Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/independent-3/model-1pred-2cues-500trials-3-209602.ckpt


INFO:tensorflow:Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/independent-4/model-1pred-2cues-500trials-4-361147.ckpt


[2017-07-26 04:15:23,190] Restoring parameters from /Users/Linhchi/dqlearn_multiagent/summer17-python3/saved_graphs/independent-4/model-1pred-2cues-500trials-4-361147.ckpt


0
48
95
done
