#### run global setup

In [None]:
try:
    with open("../global_setup.py") as setupfile:
        exec(setupfile.read())
except FileNotFoundError:
    print('Setup already completed')

#### run local setup

In [None]:
from tqdm import tqdm
from src.rl.NatureDQN import NatureDQN
from src.rl.AtariAgent import AtariAgent
from src.rl.util import run_episode
import gym

config = {'conv_layers': 3,
          'conv_units': [32, 64, 64],
          'filter_sizes': [8, 4, 3],
          'strides': [4, 2, 1],
          'state_frames': 4,
          'fc_layers': 1,
          'fc_units': [512],
          'in_width': 84,
          'in_height': 84,
          'discount': 0.99,
          'device': '/gpu:0',
          'lr': 0.00025,
          'opt_decay': 0.95,
          'momentum': 0.0,
          'opt_eps': 0.01,
          'clip_delta': 1.0,
          'tensorboard': False,
          'tensorboard_freq': 50,
          'ckpt': 0,
          'random_seed': 42,
          'hist_size': 1e6,
          'batch_size': 32,
          'eps': 1.0,
          
          }

### Breakout

In [None]:
env = gym.make('Breakout-v4')
config['num_actions'] = env.action_space.n
net = NatureDQN(config)
#net.load('src/rl/trained/breakout')
agent = AtariAgent(env, net, config)

In [None]:
s = env.reset()
s.shape

In [None]:
run_episode(env, agent, render=True, render_delay=0.03)

In [None]:
def run_experiment(env, agent, epsilon_decay, n_episodes) -> list:
    rewards = []
    for i in tqdm(range(n_episodes)):
        agent.reset()
        sum_r = run_episode(env, agent, learn=True)
        rewards.append(sum_r)
        agent.eps -= epsilon_decay
        if i % 1e2 == 0:
            print("Episode", i, "reward:", sum_r, "(eps", agent.eps, ")")
            agent.sync_target()
    agent.epsilon = 0
    sum_r = run_episode(env, agent)
    print('Trained for ', n_episodes, ' episodes. Last episode achieved a reward of ', sum_r)     
    return rewards


run_experiment(env, agent, 9e-5, int(1e4))

In [1]:
from baselines.acktr.acktr_disc import learn
from baselines.common.cmd_util import make_atari_env, wrap_deepmind
from baselines.common.vec_env.vec_frame_stack import VecFrameStack
from baselines.ppo2.policies import CnnPolicy
from functools import partial

env_id = 'BreakoutNoFrameskip-v4'
num_timesteps = int(5e7)
num_cpu = 24
seed = 420

train_env = VecFrameStack(make_atari_env(env_id, num_cpu, seed), 4)
train_env.render()
policy = partial(CnnPolicy, one_dim_bias=True)
model = learn(policy, train_env, seed, total_timesteps=int(num_timesteps), nprocs=num_cpu, log_interval=1000)
print("Done training.")

  from ._conv import register_converters as _register_converters


Render not defined for <baselines.common.vec_env.subproc_vec_env.SubprocVecEnv object at 0x7f0e17c59da0>


Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.



Instructions for updating:
keep_dims is deprecated, use keepdims instead


Instructions for updating:
keep_dims is deprecated, use keepdims instead


Use async eigen decomp
updating 34 eigenvalue/vectors


projecting 12 gradient matrices


Process Process-20:


Process Process-3:


Process Process-22:


Process Process-1:


Process Process-9:


Process Process-6:


Process Process-10:


Process Process-2:


Process Process-24:


Process Process-8:


Process Process-23:


Process Process-11:


Process Process-21:


Process Process-5:


Process Process-7:


Traceback (most recent call last):


Process Process-19:


Process Process-4:


Traceback (most recent call last):


Traceback (most recent call last):


Traceback (most recent call last):


Traceback (most recent call last):


Traceback (most recent call last):


Traceback (most recent call last):


Traceback (most recent call last):


Traceback (most recent call last):


Traceback (most recent call last):


Traceback (most recent call last):


Traceback (most recent call last):


Traceback (most recent call last):


  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()


Traceback (most recent call last):


  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()


Traceback (most recent call last):


  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()


  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()


  File "/home/ralars/.local/lib/python3.6/site-packages/baselines/common/vec_env/subproc_vec_env.py", line 10, in worker
    cmd, data = remote.recv()


  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()


  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)


  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()


  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)


  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)


  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)


KeyboardInterrupt


  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()


  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()


  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()


  File "/home/ralars/.local/lib/python3.6/site-packages/baselines/common/vec_env/subproc_vec_env.py", line 10, in worker
    cmd, data = remote.recv()


  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)


Process Process-14:


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()


  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)


  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()


  File "/home/ralars/.local/lib/python3.6/site-packages/baselines/common/vec_env/subproc_vec_env.py", line 10, in worker
    cmd, data = remote.recv()


  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)


  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)


  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()


  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()


  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()


Process Process-16:


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)


  File "/home/ralars/.local/lib/python3.6/site-packages/baselines/common/vec_env/subproc_vec_env.py", line 10, in worker
    cmd, data = remote.recv()


  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()


Traceback (most recent call last):


  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)


  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)


  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()


  File "/home/ralars/.local/lib/python3.6/site-packages/baselines/common/vec_env/subproc_vec_env.py", line 10, in worker
    cmd, data = remote.recv()


  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()


  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)


KeyboardInterrupt


Process Process-17:


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)


  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)


  File "/home/ralars/.local/lib/python3.6/site-packages/baselines/common/vec_env/subproc_vec_env.py", line 10, in worker
    cmd, data = remote.recv()


  File "/home/ralars/.local/lib/python3.6/site-packages/baselines/common/vec_env/subproc_vec_env.py", line 10, in worker
    cmd, data = remote.recv()


Traceback (most recent call last):


  File "/home/ralars/.local/lib/python3.6/site-packages/baselines/common/vec_env/subproc_vec_env.py", line 10, in worker
    cmd, data = remote.recv()


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()


  File "/home/ralars/.local/lib/python3.6/site-packages/baselines/common/vec_env/subproc_vec_env.py", line 10, in worker
    cmd, data = remote.recv()


Process Process-13:


Traceback (most recent call last):


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()


Process Process-15:


  File "/home/ralars/.local/lib/python3.6/site-packages/baselines/common/vec_env/subproc_vec_env.py", line 10, in worker
    cmd, data = remote.recv()


Process Process-18:


  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)


  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)


  File "/home/ralars/.local/lib/python3.6/site-packages/baselines/common/vec_env/subproc_vec_env.py", line 10, in worker
    cmd, data = remote.recv()


  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)


  File "/home/ralars/.local/lib/python3.6/site-packages/baselines/common/vec_env/subproc_vec_env.py", line 10, in worker
    cmd, data = remote.recv()


  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)


  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()


Traceback (most recent call last):


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()


  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)


Process Process-12:


Traceback (most recent call last):


  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)


  File "/home/ralars/.local/lib/python3.6/site-packages/baselines/common/vec_env/subproc_vec_env.py", line 10, in worker
    cmd, data = remote.recv()


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()


Traceback (most recent call last):


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)


  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()


  File "/home/ralars/.local/lib/python3.6/site-packages/baselines/common/vec_env/subproc_vec_env.py", line 10, in worker
    cmd, data = remote.recv()


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()


  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()


Traceback (most recent call last):


Traceback (most recent call last):


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)


  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)


  File "/home/ralars/.local/lib/python3.6/site-packages/baselines/common/vec_env/subproc_vec_env.py", line 10, in worker
    cmd, data = remote.recv()


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)


  File "/home/ralars/.local/lib/python3.6/site-packages/baselines/common/vec_env/subproc_vec_env.py", line 10, in worker
    cmd, data = remote.recv()


Traceback (most recent call last):


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)


  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()


KeyboardInterrupt


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)


  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()


  File "/home/ralars/.local/lib/python3.6/site-packages/baselines/common/vec_env/subproc_vec_env.py", line 10, in worker
    cmd, data = remote.recv()


  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)


  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)


  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()


KeyboardInterrupt


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)


  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()


KeyboardInterrupt


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)


KeyboardInterrupt


KeyboardInterrupt


KeyboardInterrupt


  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)


  File "/home/ralars/.local/lib/python3.6/site-packages/baselines/common/vec_env/subproc_vec_env.py", line 10, in worker
    cmd, data = remote.recv()


  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)


  File "/home/ralars/.local/lib/python3.6/site-packages/baselines/common/vec_env/subproc_vec_env.py", line 10, in worker
    cmd, data = remote.recv()


  File "/home/ralars/.local/lib/python3.6/site-packages/baselines/common/vec_env/subproc_vec_env.py", line 10, in worker
    cmd, data = remote.recv()


  File "/home/ralars/.local/lib/python3.6/site-packages/baselines/common/vec_env/subproc_vec_env.py", line 10, in worker
    cmd, data = remote.recv()


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)


  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)


  File "/home/ralars/.local/lib/python3.6/site-packages/baselines/common/vec_env/subproc_vec_env.py", line 10, in worker
    cmd, data = remote.recv()


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()


  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()


KeyboardInterrupt


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)


KeyboardInterrupt


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)


  File "/home/ralars/.local/lib/python3.6/site-packages/baselines/common/vec_env/subproc_vec_env.py", line 10, in worker
    cmd, data = remote.recv()


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)


  File "/home/ralars/.local/lib/python3.6/site-packages/baselines/common/vec_env/subproc_vec_env.py", line 10, in worker
    cmd, data = remote.recv()


KeyboardInterrupt


KeyboardInterrupt


KeyboardInterrupt


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()


KeyboardInterrupt


KeyboardInterrupt


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)


KeyboardInterrupt


KeyboardInterrupt


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)


KeyboardInterrupt


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)


KeyboardInterrupt


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)


KeyboardInterrupt


KeyboardInterrupt


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)


KeyboardInterrupt


KeyboardInterrupt


KeyboardInterrupt


In [4]:
import gym
import numpy as np
model.load("/home/ralars/asd")
env = wrap_deepmind(gym.make(env_id), frame_stack=True)
s = np.zeros((24, 84, 84, 4))
s[:] = env.reset()
while True:
    env.render()
    a, _, _, _ = model.step(s)
    a = a[0]
    s[:], _, t, _ = env.step(a)
    if t:
        s[:] = env.reset()

KeyboardInterrupt: 