In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys, os
sys.path.append(os.getcwd() + '/..')
import src

# Experiment setup
* Here we basically just load the game and check that it works

In [3]:
from __future__ import print_function 
import matplotlib.pyplot as plt
import numpy as np
import gym
from tqdm import tqdm


%matplotlib inline
%env THEANO_FLAGS="floatX=float32"

env: THEANO_FLAGS="floatX=float32"


In [4]:
GAME = "FrontoPolarStocks-v0"

N_AGENTS = 1
SEQ_LENGTH = 10

In [5]:
env = gym.make(GAME)
env.reset()

action_shape = (env.action_space.num_discrete_space,)
action_emb_shape = (*action_shape, 3)
state_shape = env.observation_space.shape

state, _, _, _ = env.step([0] * action_shape[0])

action_names = np.array(["sell", "pass", "buy"]) #i guess so... i may be wrong

print(state)

[2017-05-21 21:30:23,663] Making new env: FrontoPolarStocks-v0


[ 452.500023   54.699997   37.830002   42.919998   23.190001   34.119999
   24.23       51.93       24.200001   22.799999   26.200001   34.82
    8.94       14.55    ]


# Basic agent setup
Here we define a simple agent that maps game images into Qvalues using shallow neural network.


In [6]:
import theano
from theano import tensor as T
import lasagne

theano.config.exception_verbosity = 'high'
theano.config.optimizer = 'fast_compile'

In [7]:
from agent.agent import build_agent

In [8]:
agent, action_layer, V_layer = build_agent(action_emb_shape, state_shape)

In [9]:
agent.action_layers[0].output_shape

(None, 14)

In [10]:
#Since it's a single lasagne network, one can get it's weights, output, etc
weights = lasagne.layers.get_all_params((action_layer,V_layer),trainable=True)

# Create and manage a pool of atari sessions to play with

* To make training more stable, we shall have an entire batch of game sessions each happening independent of others
* Why several parallel agents help training: http://arxiv.org/pdf/1602.01783v1.pdf
* Alternative approach: store more sessions: https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf

In [11]:
from agentnet.experiments.openai_gym.pool import EnvPool

pool = EnvPool(agent, GAME, N_AGENTS, max_size=10000)

[2017-05-21 21:30:25,826] Making new env: FrontoPolarStocks-v0


In [12]:
%%time
#interact for 7 ticks
_,action_log,reward_log,_,_,_  = pool.interact(7)


print(action_names[action_log])
print(reward_log)

[[['buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy'
   'buy' 'buy']
  ['buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy'
   'buy' 'buy']
  ['buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy'
   'buy' 'buy']
  ['buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy'
   'buy' 'buy']
  ['buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy'
   'buy' 'buy']
  ['buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy' 'buy'
   'buy' 'buy']
  ['pass' 'buy' 'pass' 'pass' 'pass' 'pass' 'pass' 'buy' 'buy' 'pass' 'buy'
   'pass' 'pass' 'buy']]]
[[  3.36947898 -18.36905801 -28.06317896 -14.633176    61.47429002
  -22.61297396   0.        ]]
CPU times: user 4 ms, sys: 4 ms, total: 8 ms
Wall time: 18.3 ms


In [13]:
#load first sessions (this function calls interact and remembers sessions)
pool.update(SEQ_LENGTH)

# a2c loss

Here we define obective function for actor-critic (one-step) RL.

* We regularize policy with expected inverse action probabilities (discouraging very small probas) to make objective numerically stable


In [14]:
#get agent's Qvalues obtained via experience replay
replay = pool.experience_replay.sample_session_batch(100, replace=True)

_,_,_,_,(policy_seq,V_seq) = agent.get_sessions(
    replay,
    session_length=SEQ_LENGTH,
    experience_replay=True,
)

In [15]:
#get reference Qvalues according to Qlearning algorithm
from agentnet.learning import a2c


#Train via actor-critic (see here - https://www.youtube.com/watch?v=KHZVXao4qXs)

### COMMENTED ASSERT ACTION.DIM==2 LINE IN THIS METHOD
elwise_mse_loss = a2c.get_elementwise_objective(policy_seq,V_seq[:,:,0],
                                                       replay.actions[0],
                                                       replay.rewards,
                                                       replay.is_alive,
                                                       gamma_or_gammas=0.99)

#compute mean over "alive" fragments
loss = elwise_mse_loss.sum() / replay.is_alive.sum()

In [16]:
reg = T.mean((1./policy_seq).sum(axis=-1))
loss += 0.001*reg

In [17]:
# Compute weight updates
updates = lasagne.updates.rmsprop(loss, weights, learning_rate=0.001)

In [18]:
#compile train function

train_step = theano.function([],loss,updates=updates)

# Demo run

In [19]:
#for MountainCar-v0 evaluation session is cropped to 200 ticks
untrained_reward = pool.evaluate(save_path="./records",record_video=True)

[2017-05-21 21:30:29,364] Making new env: FrontoPolarStocks-v0
[2017-05-21 21:30:29,595] Clearing 4 monitor files from previous run (because force=True was provided)
[2017-05-21 21:30:29,599] Starting new video recorder writing to /home/manatee/Desktop/inn.prac/Algorithms/src/records/openaigym.video.0.786.video000000.mp4
[2017-05-21 21:30:36,741] Finished writing results. You can upload them to the scoreboard via gym.upload('/home/manatee/Desktop/inn.prac/Algorithms/src/records')


Episode finished after 400 timesteps with reward=104771.62900974997


In [20]:
from IPython.display import HTML

video_path="records/openaigym.video.0.15895.video000000.mp4"

HTML("""
<video width="640" height="480" controls>
  <source src="{}" type="video/mp4">
</video>
""".format(video_path))

# Vime

In [21]:
from agent.bnn import BNN
bnn = BNN(state_shape, action_shape, action_emb_shape, replay)

(None, 14, 3)
(None, 56)


# Training loop

In [22]:
#starting epoch
epoch_counter = 1

#full game rewards
rewards = {epoch_counter: untrained_reward}

In [23]:
#pre-fill pool
for i in tqdm(range(1000)):
    pool.update(SEQ_LENGTH, append=True, preprocess=bnn.add_vime_reward)

  0%|          | 0/1000 [00:00<?, ?it/s]


ValueError: Shape mismatch: x has 17 cols (and 10 rows) but y has 56 rows (and 50 cols)
Apply node that caused the error: Dot22(Join.0, Elemwise{add,no_inplace}.0)
Toposort index: 160
Inputs types: [TensorType(float32, matrix), TensorType(float32, matrix)]
Inputs shapes: [(10, 17), (56, 50)]
Inputs strides: [(68, 4), (200, 4)]
Inputs values: ['not shown', 'not shown']
Inputs type_num: [11, 11]
Outputs clients: [[Elemwise{add,no_inplace}(Dot22.0, InplaceDimShuffle{x,0}.0)]]

Debugprint of the apply node: 
Dot22 [id A] <TensorType(float32, matrix)> ''   
 |Join [id B] <TensorType(float32, matrix)> ''   
 | |TensorConstant{1} [id C] <TensorType(int8, scalar)>
 | |Reshape{2} [id D] <TensorType(float32, matrix)> ''   
 | | |AdvancedSubtensor1 [id E] <TensorType(float32, matrix)> ''   
 | | | |Elemwise{add,no_inplace} [id F] <TensorType(float32, matrix)> ''   
 | | | | |bnn.mu_copy [id G] <TensorType(float32, matrix)>
 | | | | |Elemwise{mul,no_inplace} [id H] <TensorType(float32, matrix)> ''   
 | | | |   |Elemwise{log1p,no_inplace} [id I] <TensorType(float32, matrix)> ''   
 | | | |   | |Elemwise{exp,no_inplace} [id J] <TensorType(float32, matrix)> ''   
 | | | |   |   |bnn.rho_copy [id K] <TensorType(float32, matrix)>
 | | | |   |Elemwise{add,no_inplace} [id L] <TensorType(float32, matrix)> ''   
 | | | |     |TensorConstant{(1, 1) of 0.0} [id M] <TensorType(float32, (True, True))>
 | | | |     |Elemwise{mul,no_inplace} [id N] <TensorType(float32, matrix)> ''   
 | | | |       |TensorConstant{(1, 1) of 1.0} [id O] <TensorType(float32, (True, True))>
 | | | |       |Reshape{2} [id P] <TensorType(float32, matrix)> ''   
 | | | |         |Join [id Q] <TensorType(float32, vector)> ''   
 | | | |         | |TensorConstant{0} [id R] <TensorType(int8, scalar)>
 | | | |         | |Elemwise{mul,no_inplace} [id S] <TensorType(float32, vector)> ''   
 | | | |         | | |Elemwise{sqrt,no_inplace} [id T] <TensorType(float32, vector)> ''   
 | | | |         | | | |Elemwise{mul,no_inplace} [id U] <TensorType(float32, vector)> ''   
 | | | |         | | |   |TensorConstant{(1,) of -2.0} [id V] <TensorType(float32, (True,))>
 | | | |         | | |   |Elemwise{log,no_inplace} [id W] <TensorType(float32, vector)> ''   
 | | | |         | | |     |Subtensor{:int64:} [id X] <TensorType(float32, vector)> ''   
 | | | |         | | |       |Elemwise{add,no_inplace} [id Y] <TensorType(float32, vector)> ''   
 | | | |         | | |       | |Elemwise{mul,no_inplace} [id Z] <TensorType(float32, vector)> ''   
 | | | |         | | |       | | |mrg_uniform{TensorType(float32, vector),no_inplace}.1 [id BA] <TensorType(float32, vector)> ''   
 | | | |         | | |       | | | |<TensorType(int32, matrix)> [id BB] <TensorType(int32, matrix)>
 | | | |         | | |       | | | |TensorConstant{(1,) of 42} [id BC] <TensorType(int64, (True,))>
 | | | |         | | |       | | |TensorConstant{(1,) of 1.0} [id BD] <TensorType(float32, (True,))>
 | | | |         | | |       | |TensorConstant{(1,) of 0.0} [id BE] <TensorType(float32, (True,))>
 | | | |         | | |       |Constant{21} [id BF] <int64>
 | | | |         | | |Elemwise{cos,no_inplace} [id BG] <TensorType(float32, vector)> ''   
 | | | |         | |   |Elemwise{mul,no_inplace} [id BH] <TensorType(float32, vector)> ''   
 | | | |         | |     |TensorConstant{(1,) of 6.28319} [id BI] <TensorType(float32, (True,))>
 | | | |         | |     |Subtensor{int64::} [id BJ] <TensorType(float32, vector)> ''   
 | | | |         | |       |Elemwise{add,no_inplace} [id Y] <TensorType(float32, vector)> ''   
 | | | |         | |       |Constant{21} [id BF] <int64>
 | | | |         | |Elemwise{mul,no_inplace} [id BK] <TensorType(float32, vector)> ''   
 | | | |         |   |Elemwise{sqrt,no_inplace} [id T] <TensorType(float32, vector)> ''   
 | | | |         |   |Elemwise{sin,no_inplace} [id BL] <TensorType(float32, vector)> ''   
 | | | |         |     |Elemwise{mul,no_inplace} [id BH] <TensorType(float32, vector)> ''   
 | | | |         |TensorConstant{[14  3]} [id BM] <TensorType(int64, vector)>
 | | | |Reshape{1} [id BN] <TensorType(int32, vector)> ''   
 | | |   |Alloc [id BO] <TensorType(int32, row)> ''   
 | | |   | |InplaceDimShuffle{0,x} [id BP] <TensorType(int32, (True, True))> ''   
 | | |   | | |InplaceDimShuffle{x} [id BQ] <TensorType(int32, (True,))> ''   
 | | |   | |   |actions[t] [id BR] <TensorType(int32, scalar)>
 | | |   | |Subtensor{int64} [id BS] <TensorType(int64, scalar)> ''   
 | | |   | | |Shape [id BT] <TensorType(int64, vector)> ''   
 | | |   | | | |InplaceDimShuffle{x} [id BQ] <TensorType(int32, (True,))> ''   
 | | |   | | |Constant{0} [id BU] <int64>
 | | |   | |TensorConstant{10} [id BV] <TensorType(int8, scalar)>
 | | |   |MakeVector{dtype='int64'} [id BW] <TensorType(int64, vector)> ''   
 | | |     |Elemwise{mul,no_inplace} [id BX] <TensorType(int64, scalar)> ''   
 | | |       |Subtensor{int64} [id BS] <TensorType(int64, scalar)> ''   
 | | |       |TensorConstant{10} [id BV] <TensorType(int8, scalar)>
 | | |MakeVector{dtype='int64'} [id BY] <TensorType(int64, vector)> ''   
 | |   |Subtensor{int64} [id BZ] <TensorType(int64, scalar)> ''   
 | |   | |Subtensor{:int64:} [id CA] <TensorType(int64, vector)> ''   
 | |   | | |Shape [id CB] <TensorType(int64, vector)> ''   
 | |   | | | |AdvancedSubtensor1 [id E] <TensorType(float32, matrix)> ''   
 | |   | | |Constant{1} [id CC] <int64>
 | |   | |Constant{0} [id BU] <int64>
 | |   |TensorConstant{-1} [id CD] <TensorType(int64, scalar)>
 | |Reshape{2} [id CE] <TensorType(float32, matrix)> ''   
 |   |Alloc [id CF] <TensorType(float32, (True, False, False))> ''   
 |   | |InplaceDimShuffle{0,x,1} [id CG] <TensorType(float32, (True, True, False))> ''   
 |   | | |InplaceDimShuffle{x,0} [id CH] <TensorType(float32, row)> ''   
 |   | |   |previous states[t] [id CI] <TensorType(float32, vector)>
 |   | |Subtensor{int64} [id CJ] <TensorType(int64, scalar)> ''   
 |   | | |Shape [id CK] <TensorType(int64, vector)> ''   
 |   | | | |InplaceDimShuffle{x,0} [id CH] <TensorType(float32, row)> ''   
 |   | | |Constant{0} [id BU] <int64>
 |   | |TensorConstant{10} [id BV] <TensorType(int8, scalar)>
 |   | |Subtensor{int64} [id CL] <TensorType(int64, scalar)> ''   
 |   |   |Shape [id CK] <TensorType(int64, vector)> ''   
 |   |   |Constant{1} [id CC] <int64>
 |   |MakeVector{dtype='int64'} [id CM] <TensorType(int64, vector)> ''   
 |     |Elemwise{mul,no_inplace} [id CN] <TensorType(int64, scalar)> ''   
 |     | |Subtensor{int64} [id CJ] <TensorType(int64, scalar)> ''   
 |     | |TensorConstant{10} [id BV] <TensorType(int8, scalar)>
 |     |Subtensor{int64} [id CL] <TensorType(int64, scalar)> ''   
 |Elemwise{add,no_inplace} [id CO] <TensorType(float32, matrix)> ''   
   |bnn.mu_copy [id CP] <TensorType(float32, matrix)>
   |Elemwise{mul,no_inplace} [id CQ] <TensorType(float32, matrix)> ''   
     |Elemwise{log1p,no_inplace} [id CR] <TensorType(float32, matrix)> ''   
     | |Elemwise{exp,no_inplace} [id CS] <TensorType(float32, matrix)> ''   
     |   |bnn.rho_copy [id CT] <TensorType(float32, matrix)>
     |Elemwise{add,no_inplace} [id CU] <TensorType(float32, matrix)> ''   
       |TensorConstant{(1, 1) of 0.0} [id M] <TensorType(float32, (True, True))>
       |Elemwise{mul,no_inplace} [id CV] <TensorType(float32, matrix)> ''   
         |TensorConstant{(1, 1) of 1.0} [id O] <TensorType(float32, (True, True))>
         |Reshape{2} [id CW] <TensorType(float32, matrix)> ''   
           |Join [id CX] <TensorType(float32, vector)> ''   
           | |TensorConstant{0} [id R] <TensorType(int8, scalar)>
           | |Elemwise{mul,no_inplace} [id CY] <TensorType(float32, vector)> ''   
           | | |Elemwise{sqrt,no_inplace} [id CZ] <TensorType(float32, vector)> ''   
           | | | |Elemwise{mul,no_inplace} [id DA] <TensorType(float32, vector)> ''   
           | | |   |TensorConstant{(1,) of -2.0} [id V] <TensorType(float32, (True,))>
           | | |   |Elemwise{log,no_inplace} [id DB] <TensorType(float32, vector)> ''   
           | | |     |Subtensor{:int64:} [id DC] <TensorType(float32, vector)> ''   
           | | |       |Elemwise{add,no_inplace} [id DD] <TensorType(float32, vector)> ''   
           | | |       | |Elemwise{mul,no_inplace} [id DE] <TensorType(float32, vector)> ''   
           | | |       | | |mrg_uniform{TensorType(float32, vector),no_inplace}.1 [id DF] <TensorType(float32, vector)> ''   
           | | |       | | | |<TensorType(int32, matrix)> [id DG] <TensorType(int32, matrix)>
           | | |       | | | |TensorConstant{(1,) of 2800} [id DH] <TensorType(int64, (True,))>
           | | |       | | |TensorConstant{(1,) of 1.0} [id BD] <TensorType(float32, (True,))>
           | | |       | |TensorConstant{(1,) of 0.0} [id BE] <TensorType(float32, (True,))>
           | | |       |Constant{1400} [id DI] <int64>
           | | |Elemwise{cos,no_inplace} [id DJ] <TensorType(float32, vector)> ''   
           | |   |Elemwise{mul,no_inplace} [id DK] <TensorType(float32, vector)> ''   
           | |     |TensorConstant{(1,) of 6.28319} [id BI] <TensorType(float32, (True,))>
           | |     |Subtensor{int64::} [id DL] <TensorType(float32, vector)> ''   
           | |       |Elemwise{add,no_inplace} [id DD] <TensorType(float32, vector)> ''   
           | |       |Constant{1400} [id DI] <int64>
           | |Elemwise{mul,no_inplace} [id DM] <TensorType(float32, vector)> ''   
           |   |Elemwise{sqrt,no_inplace} [id CZ] <TensorType(float32, vector)> ''   
           |   |Elemwise{sin,no_inplace} [id DN] <TensorType(float32, vector)> ''   
           |     |Elemwise{mul,no_inplace} [id DK] <TensorType(float32, vector)> ''   
           |TensorConstant{[56 50]} [id DO] <TensorType(int64, vector)>

HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.
Apply node that caused the error: for{cpu,scan_fn}(Elemwise{minimum,no_inplace}.0, Subtensor{:int64:}.0, Subtensor{:int64:}.0, Subtensor{:int64:}.0, IncSubtensor{Set;:int64:}.0, IncSubtensor{Set;:int64:}.0, IncSubtensor{Set;:int64:}.0, IncSubtensor{Set;:int64:}.0, IncSubtensor{Set;:int64:}.0, Elemwise{minimum,no_inplace}.0, bnn.rho, bnn.mu, bnn.mu, bnn.rho, bnn.mu, bnn.rho, bnn.mu, bnn.rho, bnn.mu, bnn.rho)
Toposort index: 65
Inputs types: [TensorType(int64, scalar), TensorType(float32, matrix), TensorType(int32, vector), TensorType(float32, matrix), TensorType(int32, 3D), TensorType(int32, 3D), TensorType(int32, 3D), TensorType(int32, 3D), TensorType(int32, 3D), TensorType(int64, scalar), TensorType(float32, matrix), TensorType(float32, matrix), TensorType(float32, matrix), TensorType(float32, matrix), TensorType(float32, vector), TensorType(float32, vector), TensorType(float32, matrix), TensorType(float32, matrix), TensorType(float32, vector), TensorType(float32, vector)]
Inputs shapes: [(), (9, 14), (9,), (9, 14), (10, 15360, 6), (10, 15360, 6), (10, 15360, 6), (10, 15360, 6), (10, 15360, 6), (), (14, 3), (14, 3), (56, 50), (56, 50), (50,), (50,), (50, 14), (50, 14), (14,), (14,)]
Inputs strides: [(), (56, 4), (4,), (56, 4), (368640, 24, 4), (368640, 24, 4), (368640, 24, 4), (368640, 24, 4), (368640, 24, 4), (), (12, 4), (12, 4), (200, 4), (200, 4), (4,), (4,), (56, 4), (56, 4), (4,), (4,)]
Inputs values: [array(9), 'not shown', 'not shown', 'not shown', 'not shown', 'not shown', 'not shown', 'not shown', 'not shown', array(9), 'not shown', 'not shown', 'not shown', 'not shown', 'not shown', 'not shown', 'not shown', 'not shown', 'not shown', 'not shown']
Inputs type_num: [7, 11, 5, 11, 5, 5, 5, 5, 5, 7, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11]
Outputs clients: [[Subtensor{int64::}(for{cpu,scan_fn}.0, Constant{1})], [Subtensor{int64::}(for{cpu,scan_fn}.1, Constant{1})], [Subtensor{int64::}(for{cpu,scan_fn}.2, Constant{1})], [Subtensor{int64::}(for{cpu,scan_fn}.3, Constant{1})], [Subtensor{int64::}(for{cpu,scan_fn}.4, Constant{1})], ['output']]

Backtrace when the node is created(use Theano flag traceback.limit=N to make it longer):
  File "/home/manatee/anaconda3/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/manatee/anaconda3/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 501, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/manatee/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/manatee/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2821, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/manatee/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-21-3939ba4f6cbd>", line 2, in <module>
    bnn = BNN(state_shape, action_shape, action_emb_shape, replay)
  File "/home/manatee/Desktop/inn.prac/Algorithms/src/agent/bnn.py", line 99, in __init__
    n_samples=10)
  File "/home/manatee/Desktop/inn.prac/Algorithms/src/agent/curiosity.py", line 92, in compile_vime_reward
    sequences=[prev_states, actions, next_states])

Debugprint of the apply node: 
for{cpu,scan_fn}.0 [id A] <TensorType(int32, 3D)> ''   
 |Elemwise{minimum,no_inplace} [id B] <TensorType(int64, scalar)> ''   
 | |Elemwise{minimum,no_inplace} [id C] <TensorType(int64, scalar)> ''   
 | | |Subtensor{int64} [id D] <TensorType(int64, scalar)> ''   
 | | | |Shape [id E] <TensorType(int64, vector)> ''   
 | | | | |Subtensor{int64::} [id F] <TensorType(float32, matrix)> 'previous states[0:]'   
 | | | |Constant{0} [id G] <int64>
 | | |Subtensor{int64} [id H] <TensorType(int64, scalar)> ''   
 | |   |Shape [id I] <TensorType(int64, vector)> ''   
 | |   | |Subtensor{int64::} [id J] <TensorType(int32, vector)> 'actions[0:]'   
 | |   |Constant{0} [id G] <int64>
 | |Subtensor{int64} [id K] <TensorType(int64, scalar)> ''   
 |   |Shape [id L] <TensorType(int64, vector)> ''   
 |   | |Subtensor{int64::} [id M] <TensorType(float32, matrix)> 'next states[0:]'   
 |   |Constant{0} [id G] <int64>
 |Subtensor{:int64:} [id N] <TensorType(float32, matrix)> ''   
 | |Subtensor{int64::} [id F] <TensorType(float32, matrix)> 'previous states[0:]'   
 | |ScalarFromTensor [id O] <int64> ''   
 |   |Elemwise{minimum,no_inplace} [id B] <TensorType(int64, scalar)> ''   
 |Subtensor{:int64:} [id P] <TensorType(int32, vector)> ''   
 | |Subtensor{int64::} [id J] <TensorType(int32, vector)> 'actions[0:]'   
 | |ScalarFromTensor [id O] <int64> ''   
 |Subtensor{:int64:} [id Q] <TensorType(float32, matrix)> ''   
 | |Subtensor{int64::} [id M] <TensorType(float32, matrix)> 'next states[0:]'   
 | |ScalarFromTensor [id O] <int64> ''   
 |IncSubtensor{Set;:int64:} [id R] <TensorType(int32, 3D)> ''   
 | |AllocEmpty{dtype='int32'} [id S] <TensorType(int32, 3D)> ''   
 | | |Elemwise{add,no_inplace} [id T] <TensorType(int64, scalar)> ''   
 | | | |Elemwise{minimum,no_inplace} [id B] <TensorType(int64, scalar)> ''   
 | | | |Subtensor{int64} [id U] <TensorType(int64, scalar)> ''   
 | | |   |Shape [id V] <TensorType(int64, vector)> ''   
 | | |   | |Rebroadcast{0} [id W] <TensorType(int32, 3D)> ''   
 | | |   |   |InplaceDimShuffle{x,0,1} [id X] <TensorType(int32, (True, False, False))> ''   
 | | |   |     |<TensorType(int32, matrix)> [id Y] <TensorType(int32, matrix)>
 | | |   |Constant{0} [id G] <int64>
 | | |Subtensor{int64} [id Z] <TensorType(int64, scalar)> ''   
 | | | |Shape [id V] <TensorType(int64, vector)> ''   
 | | | |Constant{1} [id BA] <int64>
 | | |Subtensor{int64} [id BB] <TensorType(int64, scalar)> ''   
 | |   |Shape [id V] <TensorType(int64, vector)> ''   
 | |   |Constant{2} [id BC] <int64>
 | |Rebroadcast{0} [id W] <TensorType(int32, 3D)> ''   
 | |ScalarFromTensor [id BD] <int64> ''   
 |   |Subtensor{int64} [id U] <TensorType(int64, scalar)> ''   
 |IncSubtensor{Set;:int64:} [id BE] <TensorType(int32, 3D)> ''   
 | |AllocEmpty{dtype='int32'} [id BF] <TensorType(int32, 3D)> ''   
 | | |Elemwise{add,no_inplace} [id BG] <TensorType(int64, scalar)> ''   
 | | | |Elemwise{minimum,no_inplace} [id B] <TensorType(int64, scalar)> ''   
 | | | |Subtensor{int64} [id BH] <TensorType(int64, scalar)> ''   
 | | |   |Shape [id BI] <TensorType(int64, vector)> ''   
 | | |   | |Rebroadcast{0} [id BJ] <TensorType(int32, 3D)> ''   
 | | |   |   |InplaceDimShuffle{x,0,1} [id BK] <TensorType(int32, (True, False, False))> ''   
 | | |   |     |<TensorType(int32, matrix)> [id BL] <TensorType(int32, matrix)>
 | | |   |Constant{0} [id G] <int64>
 | | |Subtensor{int64} [id BM] <TensorType(int64, scalar)> ''   
 | | | |Shape [id BI] <TensorType(int64, vector)> ''   
 | | | |Constant{1} [id BA] <int64>
 | | |Subtensor{int64} [id BN] <TensorType(int64, scalar)> ''   
 | |   |Shape [id BI] <TensorType(int64, vector)> ''   
 | |   |Constant{2} [id BC] <int64>
 | |Rebroadcast{0} [id BJ] <TensorType(int32, 3D)> ''   
 | |ScalarFromTensor [id BO] <int64> ''   
 |   |Subtensor{int64} [id BH] <TensorType(int64, scalar)> ''   
 |IncSubtensor{Set;:int64:} [id BP] <TensorType(int32, 3D)> ''   
 | |AllocEmpty{dtype='int32'} [id BQ] <TensorType(int32, 3D)> ''   
 | | |Elemwise{add,no_inplace} [id BR] <TensorType(int64, scalar)> ''   
 | | | |Elemwise{minimum,no_inplace} [id B] <TensorType(int64, scalar)> ''   
 | | | |Subtensor{int64} [id BS] <TensorType(int64, scalar)> ''   
 | | |   |Shape [id BT] <TensorType(int64, vector)> ''   
 | | |   | |Rebroadcast{0} [id BU] <TensorType(int32, 3D)> ''   
 | | |   |   |InplaceDimShuffle{x,0,1} [id BV] <TensorType(int32, (True, False, False))> ''   
 | | |   |     |<TensorType(int32, matrix)> [id BW] <TensorType(int32, matrix)>
 | | |   |Constant{0} [id G] <int64>
 | | |Subtensor{int64} [id BX] <TensorType(int64, scalar)> ''   
 | | | |Shape [id BT] <TensorType(int64, vector)> ''   
 | | | |Constant{1} [id BA] <int64>
 | | |Subtensor{int64} [id BY] <TensorType(int64, scalar)> ''   
 | |   |Shape [id BT] <TensorType(int64, vector)> ''   
 | |   |Constant{2} [id BC] <int64>
 | |Rebroadcast{0} [id BU] <TensorType(int32, 3D)> ''   
 | |ScalarFromTensor [id BZ] <int64> ''   
 |   |Subtensor{int64} [id BS] <TensorType(int64, scalar)> ''   
 |IncSubtensor{Set;:int64:} [id CA] <TensorType(int32, 3D)> ''   
 | |AllocEmpty{dtype='int32'} [id CB] <TensorType(int32, 3D)> ''   
 | | |Elemwise{add,no_inplace} [id CC] <TensorType(int64, scalar)> ''   
 | | | |Elemwise{minimum,no_inplace} [id B] <TensorType(int64, scalar)> ''   
 | | | |Subtensor{int64} [id CD] <TensorType(int64, scalar)> ''   
 | | |   |Shape [id CE] <TensorType(int64, vector)> ''   
 | | |   | |Rebroadcast{0} [id CF] <TensorType(int32, 3D)> ''   
 | | |   |   |InplaceDimShuffle{x,0,1} [id CG] <TensorType(int32, (True, False, False))> ''   
 | | |   |     |<TensorType(int32, matrix)> [id CH] <TensorType(int32, matrix)>
 | | |   |Constant{0} [id G] <int64>
 | | |Subtensor{int64} [id CI] <TensorType(int64, scalar)> ''   
 | | | |Shape [id CE] <TensorType(int64, vector)> ''   
 | | | |Constant{1} [id BA] <int64>
 | | |Subtensor{int64} [id CJ] <TensorType(int64, scalar)> ''   
 | |   |Shape [id CE] <TensorType(int64, vector)> ''   
 | |   |Constant{2} [id BC] <int64>
 | |Rebroadcast{0} [id CF] <TensorType(int32, 3D)> ''   
 | |ScalarFromTensor [id CK] <int64> ''   
 |   |Subtensor{int64} [id CD] <TensorType(int64, scalar)> ''   
 |IncSubtensor{Set;:int64:} [id CL] <TensorType(int32, 3D)> ''   
 | |AllocEmpty{dtype='int32'} [id CM] <TensorType(int32, 3D)> ''   
 | | |Elemwise{add,no_inplace} [id CN] <TensorType(int64, scalar)> ''   
 | | | |Elemwise{minimum,no_inplace} [id B] <TensorType(int64, scalar)> ''   
 | | | |Subtensor{int64} [id CO] <TensorType(int64, scalar)> ''   
 | | |   |Shape [id CP] <TensorType(int64, vector)> ''   
 | | |   | |Rebroadcast{0} [id CQ] <TensorType(int32, 3D)> ''   
 | | |   |   |InplaceDimShuffle{x,0,1} [id CR] <TensorType(int32, (True, False, False))> ''   
 | | |   |     |<TensorType(int32, matrix)> [id CS] <TensorType(int32, matrix)>
 | | |   |Constant{0} [id G] <int64>
 | | |Subtensor{int64} [id CT] <TensorType(int64, scalar)> ''   
 | | | |Shape [id CP] <TensorType(int64, vector)> ''   
 | | | |Constant{1} [id BA] <int64>
 | | |Subtensor{int64} [id CU] <TensorType(int64, scalar)> ''   
 | |   |Shape [id CP] <TensorType(int64, vector)> ''   
 | |   |Constant{2} [id BC] <int64>
 | |Rebroadcast{0} [id CQ] <TensorType(int32, 3D)> ''   
 | |ScalarFromTensor [id CV] <int64> ''   
 |   |Subtensor{int64} [id CO] <TensorType(int64, scalar)> ''   
 |Elemwise{minimum,no_inplace} [id B] <TensorType(int64, scalar)> ''   
 |bnn.rho [id CW] <TensorType(float32, matrix)>
 |bnn.mu [id CX] <TensorType(float32, matrix)>
 |bnn.mu [id CY] <TensorType(float32, matrix)>
 |bnn.rho [id CZ] <TensorType(float32, matrix)>
 |bnn.mu [id DA] <TensorType(float32, vector)>
 |bnn.rho [id DB] <TensorType(float32, vector)>
 |bnn.mu [id DC] <TensorType(float32, matrix)>
 |bnn.rho [id DD] <TensorType(float32, matrix)>
 |bnn.mu [id DE] <TensorType(float32, vector)>
 |bnn.rho [id DF] <TensorType(float32, vector)>
for{cpu,scan_fn}.1 [id A] <TensorType(int32, 3D)> ''   
for{cpu,scan_fn}.2 [id A] <TensorType(int32, 3D)> ''   
for{cpu,scan_fn}.3 [id A] <TensorType(int32, 3D)> ''   
for{cpu,scan_fn}.4 [id A] <TensorType(int32, 3D)> ''   
for{cpu,scan_fn}.5 [id A] <TensorType(float64, vector)> ''   

Inner graphs of the scan ops:

for{cpu,scan_fn}.0 [id A] <TensorType(int32, 3D)> ''   
 >mrg_uniform{TensorType(float32, vector),no_inplace}.0 [id DG] <TensorType(int32, matrix)> ''   
 > |<TensorType(int32, matrix)> [id DH] <TensorType(int32, matrix)> -> [id R]
 > |TensorConstant{(1,) of 42} [id DI] <TensorType(int64, (True,))>
 >mrg_uniform{TensorType(float32, vector),no_inplace}.0 [id DJ] <TensorType(int32, matrix)> ''   
 > |<TensorType(int32, matrix)> [id DK] <TensorType(int32, matrix)> -> [id BE]
 > |TensorConstant{(1,) of 2800} [id DL] <TensorType(int64, (True,))>
 >mrg_uniform{TensorType(float32, vector),no_inplace}.0 [id DM] <TensorType(int32, matrix)> ''   
 > |<TensorType(int32, matrix)> [id DN] <TensorType(int32, matrix)> -> [id BP]
 > |TensorConstant{(1,) of 50} [id DO] <TensorType(int64, (True,))>
 >mrg_uniform{TensorType(float32, vector),no_inplace}.0 [id DP] <TensorType(int32, matrix)> ''   
 > |<TensorType(int32, matrix)> [id DQ] <TensorType(int32, matrix)> -> [id CA]
 > |TensorConstant{(1,) of 700} [id DR] <TensorType(int64, (True,))>
 >mrg_uniform{TensorType(float32, vector),no_inplace}.0 [id DS] <TensorType(int32, matrix)> ''   
 > |<TensorType(int32, matrix)> [id DT] <TensorType(int32, matrix)> -> [id CL]
 > |TensorConstant{(1,) of 14} [id DU] <TensorType(int64, (True,))>
 >Elemwise{sub,no_inplace} [id DV] <TensorType(float64, scalar)> ''   
 > |Elemwise{mul,no_inplace} [id DW] <TensorType(float32, scalar)> ''   
 > | |Elemwise{add,no_inplace} [id DX] <TensorType(float32, scalar)> ''   
 > | | |Elemwise{sub,no_inplace} [id DY] <TensorType(float32, scalar)> ''   
 > | | | |Elemwise{add,no_inplace} [id DZ] <TensorType(float32, scalar)> ''   
 > | | | | |Sum{acc_dtype=float64} [id EA] <TensorType(float32, scalar)> ''   
 > | | | | | |Elemwise{pow,no_inplace} [id EB] <TensorType(float32, vector)> ''   
 > | | | | |   |Elemwise{true_div,no_inplace} [id EC] <TensorType(float32, vector)> ''   
 > | | | | |   | |Elemwise{log1p,no_inplace} [id ED] <TensorType(float32, vector)> ''   
 > | | | | |   | | |Elemwise{exp,no_inplace} [id EE] <TensorType(float32, vector)> ''   
 > | | | | |   | |   |Join [id EF] <TensorType(float32, vector)> ''   
 > | | | | |   | |     |TensorConstant{0} [id EG] <TensorType(int8, scalar)>
 > | | | | |   | |     |Reshape{1} [id EH] <TensorType(float32, vector)> ''   
 > | | | | |   | |     | |Elemwise{sub,no_inplace} [id EI] <TensorType(float32, matrix)> 'new_rho'   
 > | | | | |   | |     | |TensorConstant{(1,) of -1} [id EJ] <TensorType(int64, (True,))>
 > | | | | |   | |     |Reshape{1} [id EK] <TensorType(float32, vector)> ''   
 > | | | | |   | |     | |Elemwise{sub,no_inplace} [id EL] <TensorType(float32, matrix)> 'new_rho'   
 > | | | | |   | |     | |TensorConstant{(1,) of -1} [id EJ] <TensorType(int64, (True,))>
 > | | | | |   | |     |Reshape{1} [id EM] <TensorType(float32, vector)> ''   
 > | | | | |   | |     | |Elemwise{sub,no_inplace} [id EN] <TensorType(float32, vector)> 'new_rho'   
 > | | | | |   | |     | |TensorConstant{(1,) of -1} [id EJ] <TensorType(int64, (True,))>
 > | | | | |   | |     |Reshape{1} [id EO] <TensorType(float32, vector)> ''   
 > | | | | |   | |     | |Elemwise{sub,no_inplace} [id EP] <TensorType(float32, matrix)> 'new_rho'   
 > | | | | |   | |     | |TensorConstant{(1,) of -1} [id EJ] <TensorType(int64, (True,))>
 > | | | | |   | |     |Reshape{1} [id EQ] <TensorType(float32, vector)> ''   
 > | | | | |   | |       |Elemwise{sub,no_inplace} [id ER] <TensorType(float32, vector)> 'new_rho'   
 > | | | | |   | |       |TensorConstant{(1,) of -1} [id EJ] <TensorType(int64, (True,))>
 > | | | | |   | |Elemwise{log1p,no_inplace} [id ES] <TensorType(float32, vector)> ''   
 > | | | | |   |   |Elemwise{exp,no_inplace} [id ET] <TensorType(float32, vector)> ''   
 > | | | | |   |     |Join [id EU] <TensorType(float32, vector)> ''   
 > | | | | |   |       |TensorConstant{0} [id EG] <TensorType(int8, scalar)>
 > | | | | |   |       |Reshape{1} [id EV] <TensorType(float32, vector)> ''   
 > | | | | |   |       | |bnn.rho_copy [id EW] <TensorType(float32, matrix)> -> [id CW]
 > | | | | |   |       | |TensorConstant{(1,) of -1} [id EJ] <TensorType(int64, (True,))>
 > | | | | |   |       |Reshape{1} [id EX] <TensorType(float32, vector)> ''   
 > | | | | |   |       | |bnn.rho_copy [id EY] <TensorType(float32, matrix)> -> [id CZ]
 > | | | | |   |       | |TensorConstant{(1,) of -1} [id EJ] <TensorType(int64, (True,))>
 > | | | | |   |       |Reshape{1} [id EZ] <TensorType(float32, vector)> ''   
 > | | | | |   |       | |bnn.rho_copy [id FA] <TensorType(float32, vector)> -> [id DB]
 > | | | | |   |       | |TensorConstant{(1,) of -1} [id EJ] <TensorType(int64, (True,))>
 > | | | | |   |       |Reshape{1} [id FB] <TensorType(float32, vector)> ''   
 > | | | | |   |       | |bnn.rho_copy [id FC] <TensorType(float32, matrix)> -> [id DD]
 > | | | | |   |       | |TensorConstant{(1,) of -1} [id EJ] <TensorType(int64, (True,))>
 > | | | | |   |       |Reshape{1} [id FD] <TensorType(float32, vector)> ''   
 > | | | | |   |         |bnn.rho_copy [id FE] <TensorType(float32, vector)> -> [id DF]
 > | | | | |   |         |TensorConstant{(1,) of -1} [id EJ] <TensorType(int64, (True,))>
 > | | | | |   |TensorConstant{(1,) of 2} [id FF] <TensorType(int8, (True,))>
 > | | | | |Elemwise{mul,no_inplace} [id FG] <TensorType(float32, scalar)> ''   
 > | | | |   |TensorConstant{2} [id FH] <TensorType(int8, scalar)>
 > | | | |   |Sum{acc_dtype=float64} [id FI] <TensorType(float32, scalar)> ''   
 > | | | |     |Elemwise{log,no_inplace} [id FJ] <TensorType(float32, vector)> ''   
 > | | | |       |Elemwise{log1p,no_inplace} [id ES] <TensorType(float32, vector)> ''   
 > | | | |Elemwise{mul,no_inplace} [id FK] <TensorType(float32, scalar)> ''   
 > | | |   |TensorConstant{2} [id FH] <TensorType(int8, scalar)>
 > | | |   |Sum{acc_dtype=float64} [id FL] <TensorType(float32, scalar)> ''   
 > | | |     |Elemwise{log,no_inplace} [id FM] <TensorType(float32, vector)> ''   
 > | | |       |Elemwise{log1p,no_inplace} [id ED] <TensorType(float32, vector)> ''   
 > | | |Sum{acc_dtype=float64} [id FN] <TensorType(float32, scalar)> ''   
 > | |   |Elemwise{pow,no_inplace} [id FO] <TensorType(float32, vector)> ''   
 > | |     |Elemwise{true_div,no_inplace} [id FP] <TensorType(float32, vector)> ''   
 > | |     | |Elemwise{sub,no_inplace} [id FQ] <TensorType(float32, vector)> ''   
 > | |     | | |Join [id FR] <TensorType(float32, vector)> ''   
 > | |     | | | |TensorConstant{0} [id EG] <TensorType(int8, scalar)>
 > | |     | | | |Reshape{1} [id FS] <TensorType(float32, vector)> ''   
 > | |     | | | | |Elemwise{sub,no_inplace} [id FT] <TensorType(float32, matrix)> 'new_mu'   
 > | |     | | | | |TensorConstant{(1,) of -1} [id EJ] <TensorType(int64, (True,))>
 > | |     | | | |Reshape{1} [id FU] <TensorType(float32, vector)> ''   
 > | |     | | | | |Elemwise{sub,no_inplace} [id FV] <TensorType(float32, matrix)> 'new_mu'   
 > | |     | | | | |TensorConstant{(1,) of -1} [id EJ] <TensorType(int64, (True,))>
 > | |     | | | |Reshape{1} [id FW] <TensorType(float32, vector)> ''   
 > | |     | | | | |Elemwise{sub,no_inplace} [id FX] <TensorType(float32, vector)> 'new_mu'   
 > | |     | | | | |TensorConstant{(1,) of -1} [id EJ] <TensorType(int64, (True,))>
 > | |     | | | |Reshape{1} [id FY] <TensorType(float32, vector)> ''   
 > | |     | | | | |Elemwise{sub,no_inplace} [id FZ] <TensorType(float32, matrix)> 'new_mu'   
 > | |     | | | | |TensorConstant{(1,) of -1} [id EJ] <TensorType(int64, (True,))>
 > | |     | | | |Reshape{1} [id GA] <TensorType(float32, vector)> ''   
 > | |     | | |   |Elemwise{sub,no_inplace} [id GB] <TensorType(float32, vector)> 'new_mu'   
 > | |     | | |   |TensorConstant{(1,) of -1} [id EJ] <TensorType(int64, (True,))>
 > | |     | | |Join [id GC] <TensorType(float32, vector)> ''   
 > | |     | |   |TensorConstant{0} [id EG] <TensorType(int8, scalar)>
 > | |     | |   |Reshape{1} [id GD] <TensorType(float32, vector)> ''   
 > | |     | |   | |bnn.mu_copy [id GE] <TensorType(float32, matrix)> -> [id CX]
 > | |     | |   | |TensorConstant{(1,) of -1} [id EJ] <TensorType(int64, (True,))>
 > | |     | |   |Reshape{1} [id GF] <TensorType(float32, vector)> ''   
 > | |     | |   | |bnn.mu_copy [id GG] <TensorType(float32, matrix)> -> [id CY]
 > | |     | |   | |TensorConstant{(1,) of -1} [id EJ] <TensorType(int64, (True,))>
 > | |     | |   |Reshape{1} [id GH] <TensorType(float32, vector)> ''   
 > | |     | |   | |bnn.mu_copy [id GI] <TensorType(float32, vector)> -> [id DA]
 > | |     | |   | |TensorConstant{(1,) of -1} [id EJ] <TensorType(int64, (True,))>
 > | |     | |   |Reshape{1} [id GJ] <TensorType(float32, vector)> ''   
 > | |     | |   | |bnn.mu_copy [id GK] <TensorType(float32, matrix)> -> [id DC]
 > | |     | |   | |TensorConstant{(1,) of -1} [id EJ] <TensorType(int64, (True,))>
 > | |     | |   |Reshape{1} [id GL] <TensorType(float32, vector)> ''   
 > | |     | |     |bnn.mu_copy [id GM] <TensorType(float32, vector)> -> [id DE]
 > | |     | |     |TensorConstant{(1,) of -1} [id EJ] <TensorType(int64, (True,))>
 > | |     | |Elemwise{log1p,no_inplace} [id ES] <TensorType(float32, vector)> ''   
 > | |     |TensorConstant{(1,) of 2} [id FF] <TensorType(int8, (True,))>
 > | |TensorConstant{0.5} [id GN] <TensorType(float32, scalar)>
 > |Elemwise{mul,no_inplace} [id GO] <TensorType(float64, scalar)> ''   
 >   |TensorConstant{0.5} [id GN] <TensorType(float32, scalar)>
 >   |Subtensor{int64} [id GP] <TensorType(int64, scalar)> ''   
 >     |Shape [id GQ] <TensorType(int64, vector)> ''   
 >     | |Elemwise{log1p,no_inplace} [id ED] <TensorType(float32, vector)> ''   
 >     |Constant{0} [id GR] <int64>

for{cpu,scan_fn}.1 [id A] <TensorType(int32, 3D)> ''   
 >mrg_uniform{TensorType(float32, vector),no_inplace}.0 [id DG] <TensorType(int32, matrix)> ''   
 >mrg_uniform{TensorType(float32, vector),no_inplace}.0 [id DJ] <TensorType(int32, matrix)> ''   
 >mrg_uniform{TensorType(float32, vector),no_inplace}.0 [id DM] <TensorType(int32, matrix)> ''   
 >mrg_uniform{TensorType(float32, vector),no_inplace}.0 [id DP] <TensorType(int32, matrix)> ''   
 >mrg_uniform{TensorType(float32, vector),no_inplace}.0 [id DS] <TensorType(int32, matrix)> ''   
 >Elemwise{sub,no_inplace} [id DV] <TensorType(float64, scalar)> ''   

for{cpu,scan_fn}.2 [id A] <TensorType(int32, 3D)> ''   
 >mrg_uniform{TensorType(float32, vector),no_inplace}.0 [id DG] <TensorType(int32, matrix)> ''   
 >mrg_uniform{TensorType(float32, vector),no_inplace}.0 [id DJ] <TensorType(int32, matrix)> ''   
 >mrg_uniform{TensorType(float32, vector),no_inplace}.0 [id DM] <TensorType(int32, matrix)> ''   
 >mrg_uniform{TensorType(float32, vector),no_inplace}.0 [id DP] <TensorType(int32, matrix)> ''   
 >mrg_uniform{TensorType(float32, vector),no_inplace}.0 [id DS] <TensorType(int32, matrix)> ''   
 >Elemwise{sub,no_inplace} [id DV] <TensorType(float64, scalar)> ''   

for{cpu,scan_fn}.3 [id A] <TensorType(int32, 3D)> ''   
 >mrg_uniform{TensorType(float32, vector),no_inplace}.0 [id DG] <TensorType(int32, matrix)> ''   
 >mrg_uniform{TensorType(float32, vector),no_inplace}.0 [id DJ] <TensorType(int32, matrix)> ''   
 >mrg_uniform{TensorType(float32, vector),no_inplace}.0 [id DM] <TensorType(int32, matrix)> ''   
 >mrg_uniform{TensorType(float32, vector),no_inplace}.0 [id DP] <TensorType(int32, matrix)> ''   
 >mrg_uniform{TensorType(float32, vector),no_inplace}.0 [id DS] <TensorType(int32, matrix)> ''   
 >Elemwise{sub,no_inplace} [id DV] <TensorType(float64, scalar)> ''   

for{cpu,scan_fn}.4 [id A] <TensorType(int32, 3D)> ''   
 >mrg_uniform{TensorType(float32, vector),no_inplace}.0 [id DG] <TensorType(int32, matrix)> ''   
 >mrg_uniform{TensorType(float32, vector),no_inplace}.0 [id DJ] <TensorType(int32, matrix)> ''   
 >mrg_uniform{TensorType(float32, vector),no_inplace}.0 [id DM] <TensorType(int32, matrix)> ''   
 >mrg_uniform{TensorType(float32, vector),no_inplace}.0 [id DP] <TensorType(int32, matrix)> ''   
 >mrg_uniform{TensorType(float32, vector),no_inplace}.0 [id DS] <TensorType(int32, matrix)> ''   
 >Elemwise{sub,no_inplace} [id DV] <TensorType(float64, scalar)> ''   

for{cpu,scan_fn}.5 [id A] <TensorType(float64, vector)> ''   
 >mrg_uniform{TensorType(float32, vector),no_inplace}.0 [id DG] <TensorType(int32, matrix)> ''   
 >mrg_uniform{TensorType(float32, vector),no_inplace}.0 [id DJ] <TensorType(int32, matrix)> ''   
 >mrg_uniform{TensorType(float32, vector),no_inplace}.0 [id DM] <TensorType(int32, matrix)> ''   
 >mrg_uniform{TensorType(float32, vector),no_inplace}.0 [id DP] <TensorType(int32, matrix)> ''   
 >mrg_uniform{TensorType(float32, vector),no_inplace}.0 [id DS] <TensorType(int32, matrix)> ''   
 >Elemwise{sub,no_inplace} [id DV] <TensorType(float64, scalar)> ''   

Storage map footprint:
 - IncSubtensor{Set;:int64:}.0, Shape: (10, 15360, 6), ElemSize: 4 Byte(s), TotalSize: 3686400 Byte(s)
 - IncSubtensor{Set;:int64:}.0, Shape: (10, 15360, 6), ElemSize: 4 Byte(s), TotalSize: 3686400 Byte(s)
 - IncSubtensor{Set;:int64:}.0, Shape: (10, 15360, 6), ElemSize: 4 Byte(s), TotalSize: 3686400 Byte(s)
 - IncSubtensor{Set;:int64:}.0, Shape: (10, 15360, 6), ElemSize: 4 Byte(s), TotalSize: 3686400 Byte(s)
 - IncSubtensor{Set;:int64:}.0, Shape: (10, 15360, 6), ElemSize: 4 Byte(s), TotalSize: 3686400 Byte(s)
 - for{cpu,scan_fn}.0, Shape: (10, 15360, 6), ElemSize: 4 Byte(s), TotalSize: 3686400 Byte(s)
 - for{cpu,scan_fn}.1, Shape: (10, 15360, 6), ElemSize: 4 Byte(s), TotalSize: 3686400 Byte(s)
 - for{cpu,scan_fn}.2, Shape: (10, 15360, 6), ElemSize: 4 Byte(s), TotalSize: 3686400 Byte(s)
 - for{cpu,scan_fn}.3, Shape: (10, 15360, 6), ElemSize: 4 Byte(s), TotalSize: 3686400 Byte(s)
 - for{cpu,scan_fn}.4, Shape: (10, 15360, 6), ElemSize: 4 Byte(s), TotalSize: 3686400 Byte(s)
 - <TensorType(int32, matrix)>, Shared Input, Shape: (15360, 6), ElemSize: 4 Byte(s), TotalSize: 368640 Byte(s)
 - <TensorType(int32, matrix)>, Shared Input, Shape: (15360, 6), ElemSize: 4 Byte(s), TotalSize: 368640 Byte(s)
 - <TensorType(int32, matrix)>, Shared Input, Shape: (15360, 6), ElemSize: 4 Byte(s), TotalSize: 368640 Byte(s)
 - <TensorType(int32, matrix)>, Shared Input, Shape: (15360, 6), ElemSize: 4 Byte(s), TotalSize: 368640 Byte(s)
 - <TensorType(int32, matrix)>, Shared Input, Shape: (15360, 6), ElemSize: 4 Byte(s), TotalSize: 368640 Byte(s)
 - bnn.mu, Shared Input, Shape: (56, 50), ElemSize: 4 Byte(s), TotalSize: 11200 Byte(s)
 - bnn.rho, Shared Input, Shape: (56, 50), ElemSize: 4 Byte(s), TotalSize: 11200 Byte(s)
 - bnn.mu, Shared Input, Shape: (50, 14), ElemSize: 4 Byte(s), TotalSize: 2800 Byte(s)
 - bnn.rho, Shared Input, Shape: (50, 14), ElemSize: 4 Byte(s), TotalSize: 2800 Byte(s)
 - previous states, Input, Shape: (9, 14), ElemSize: 4 Byte(s), TotalSize: 504 Byte(s)
 - actions, Input, Shape: (126,), ElemSize: 4 Byte(s), TotalSize: 504 Byte(s)
 - next states, Input, Shape: (9, 14), ElemSize: 4 Byte(s), TotalSize: 504 Byte(s)
 - Subtensor{:int64:}.0, Shape: (9, 14), ElemSize: 4 Byte(s), TotalSize: 504 Byte(s)
 - Subtensor{:int64:}.0, Shape: (9, 14), ElemSize: 4 Byte(s), TotalSize: 504 Byte(s)
 - bnn.mu, Shared Input, Shape: (50,), ElemSize: 4 Byte(s), TotalSize: 200 Byte(s)
 - bnn.rho, Shared Input, Shape: (50,), ElemSize: 4 Byte(s), TotalSize: 200 Byte(s)
 - bnn.rho, Shared Input, Shape: (14, 3), ElemSize: 4 Byte(s), TotalSize: 168 Byte(s)
 - bnn.mu, Shared Input, Shape: (14, 3), ElemSize: 4 Byte(s), TotalSize: 168 Byte(s)
 - bnn.mu, Shared Input, Shape: (14,), ElemSize: 4 Byte(s), TotalSize: 56 Byte(s)
 - bnn.rho, Shared Input, Shape: (14,), ElemSize: 4 Byte(s), TotalSize: 56 Byte(s)
 - Subtensor{:int64:}.0, Shape: (9,), ElemSize: 4 Byte(s), TotalSize: 36 Byte(s)
 - Constant{0}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Constant{2}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Constant{1}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Elemwise{minimum,no_inplace}.0, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Constant{-1}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 TotalSize: 38738644.0 Byte(s) 0.036 GB
 TotalSize inputs: 1873592.0 Byte(s) 0.002 GB



In [None]:
#pre-train BNN (mitigate training lag on first iterations where BNN is stupid)
for i in tqdm(range(1000)):
    bnn.train_step(*bnn.sample_from_pool())

In [None]:
#the loop may take eons to finish.
#consider interrupting early.
loss = 0
for i in tqdm(range(1000)):    
    
    
    #train
    for i in range(10):
        pool.update(SEQ_LENGTH,append=True,preprocess=bnn.add_vime_reward)

    for i in range(10):
        loss = loss*0.99 + train_step()*0.01
    
    for i in range(10):
        bnn.train_step(*bnn.sample_from_pool())

    if epoch_counter%100==0:
        #average reward per game tick in current experience replay pool
        pool_mean_reward = np.average(pool.experience_replay.rewards.get_value()[:,:-1],
                                      weights=1+pool.experience_replay.is_alive.get_value()[:,:-1])
        pool_size = pool.experience_replay.rewards.get_value().shape[0]
        print("iter=%i\treward/step=%.5f\tpool_size=%i\tvime ma=%.5f"%(epoch_counter,
                                                         pool_mean_reward,
                                                         pool_size,
                                                         bnn.vime_reward_ma))
        

    ##record current learning progress and show learning curves
    if epoch_counter%500 ==0:
        n_games = 10
        rewards[epoch_counter] = pool.evaluate( record_video=False,n_games=n_games,verbose=False)
        print("Current score(mean over %i) = %.3f"%(n_games,np.mean(rewards[epoch_counter])))
    
    
    epoch_counter  +=1

    
# Time to drink some coffee!

In [None]:
iters, session_rewards=zip(*sorted(rewards.items(),key=lambda pr:pr[0]))

In [None]:
plt.plot(iters,list(map(np.mean, session_rewards)))
plt.title("Training progress")
plt.xlabel("Epoch counter")
plt.ylabel("Mean Income")
plt.show()

In [None]:
_,_,_,_,(pool_policy,pool_V) = agent.get_sessions(
    pool.experience_replay,
    session_length=SEQ_LENGTH,
    experience_replay=True,)

states = pool.experience_replay.observations[0].get_value().reshape((-1, 5)).T[-2:]
values = pool_V.ravel().eval()
optimal_actid = pool_policy.argmax(-1).ravel().eval()

In [None]:
plt.scatter(
    *states,
    c=values,
    alpha = 0.1)
plt.title("predicted state values")
plt.xlabel("previous")
plt.ylabel("current")
plt.show()

In [None]:
obs_x, obs_y = states

for i in range(3):
    sel = optimal_actid==i
    plt.scatter(obs_x[sel],obs_y[sel],
                c=['red','blue','green'][i],
                alpha = 0.1,label=action_names[i])
    
plt.title("most likely action id")
plt.xlabel("previous")
plt.ylabel("current")
plt.legend(loc='best')
plt.show()

In [None]:
#for MountainCar-v0 evaluation session is cropped to 200 ticks
untrained_reward = pool.evaluate(save_path="./records",record_video=True)