In [1]:
%load_ext autoreload
%autoreload 2

import math
import random
import tempfile
import tensorflow as tf
import time

import numpy as np

from collections import defaultdict

from tf_rl.controller import KerasDDPG
from tf_rl.models     import KERASMLP
from tf_rl.simulation import DoublePendulum
from tf_rl            import simulate
from tf_rl.simulation import DoublePendulum2

from keras import backend as K

DOUBLE_PENDULUM_PARAMS = {
    'g_ms2': 9.8, # acceleration due to gravity, in m/s^2
    'l1_m': 1.0, # length of pendulum 1 in m
    'l2_m': 2.0, # length of pendulum 2 in m
    'm1_kg': 1.0, # mass of pendulum 1 in kg
    'm2_kg': 1.0, # mass of pendulum 2 in kg
    'damping': 0.2,
    'max_control_input': 20.0
}

Using TensorFlow backend.


In [2]:
actor = KERASMLP(DoublePendulum.observation_size, [512, 256, 1], ['relu', 'relu', 'tanh'])
critic = KERASMLP(DoublePendulum.observation_size+DoublePendulum.action_size,[512, 256, 1],['relu', 'relu', 'linear'])

In [3]:
current_controller = KerasDDPG(DoublePendulum.observation_size, 
                               DoublePendulum.action_size, actor, critic, discount_rate=0.99, 
                               exploration_period=10000)

In [4]:
fast_mode = False

if fast_mode:
    FPS, SPEED, RES = 5, 20.0, 0.03
else:
    FPS, SPEED, RES = 30, 1., 0.001

try:
    while True:
        d = DoublePendulum2(DOUBLE_PENDULUM_PARAMS)
        simulate(d, current_controller, fps=FPS,
                 simulation_resolution=RES,
                 action_every=10,
                 disable_training=False)
except KeyboardInterrupt:
    print("Interrupted")

Post-50, mean: 3.924496
Starting training step 6602 at 2016-02-26 19:42:20
Critic model fitting took 0.008951 seconds
Policy gradient and update calcs took 0.002947 seconds
Target network updates took 0.002453 seconds
--------------------------------------
Total time spent in training iterations was 0.259273 seconds
Interrupted


In [None]:
c_grad = [K.gradients(K.sum(critic.model.get_output(train=False)), critic.model.get_input(train=False))[0]]

In [None]:
s1,s2,s3,s4,act_grad = tf.split(1,5,c_grad[0])

In [None]:
minibatch_size=32

In [None]:
minibatch = K.variable(minibatch_size)

In [None]:
actor.model.trainable_weights

In [None]:
a_grad_1 = [(K.gradients(K.sum(actor.model.get_output(train=False),axis=1), z)[0] for z in actor.model.trainable_weights)]

In [None]:
a_grad_1_2 = [a_grad_1 / minibatch]

In [None]:
simple_test = [K.sum(actor.model.get_output(train=False),axis=1)]

In [None]:
simp_test_func = K.function(inputs=[actor.model.get_input(train=False)], outputs=simple_test)

In [None]:
simp_test_func([test_input_a])

In [None]:
a_grad_2 = [K.gradients(K.sum(actor.model.get_output(train=False)), actor.model.trainable_weights)[0]]

In [None]:
test_update_1 = K.function(inputs=[critic.model.get_input(train=False),actor.model.get_input(train=False)], outputs=a_grad_1)

In [None]:
test_update_2 = K.function(inputs=[critic.model.get_input(train=False),actor.model.get_input(train=False)], outputs=a_grad_2)

In [None]:
test_input_c = np.random.rand(32,5)

In [None]:
test_input_a = np.random.rand(32,4)

In [None]:
p_grad_1 = test_update_1([test_input_c, test_input_a])

In [None]:
p_grad_2 = test_update_2([test_input_c, test_input_a])

In [None]:
p_grad_1

In [None]:
p_grad_2

In [None]:
current_controller.plot_critic_value_function()

In [None]:
current_controller.plot_actor_policy()

In [None]:
current_controller.plot_critic_value_function()

In [None]:
current_controller.plot_actor_policy()

In [None]:
current_controller.bellman_error[-10:]

In [None]:
print len(current_controller.experience)

In [None]:
current_controller.restore_checkpoint('/home/mderry/tensorflow-deepq/notebooks/logs/pendulum_checkpoint_6001')

In [5]:
fast_mode = False

if fast_mode:
    FPS, SPEED, RES = 5, 20.0, 0.03
else:
    FPS, SPEED, RES = 30, 1., 0.001

DOUBLE_PENDULUM_PARAMS = {
    'g_ms2': 9.8, # acceleration due to gravity, in m/s^2
    'l1_m': 1.0, # length of pendulum 1 in m
    'l2_m': 2.0, # length of pendulum 2 in m
    'm1_kg': 1.0, # mass of pendulum 1 in kg
    'm2_kg': 1.0, # mass of pendulum 2 in kg
    'damping': 0.2,
    'max_control_input': 20.0
}
d = DoublePendulum2(DOUBLE_PENDULUM_PARAMS)
try:
    while True:
        simulate(d, current_controller, fps=FPS, simulation_resolution=RES, wait=False, action_every=3, disable_training=True, ignore_exploration=True)
except KeyboardInterrupt:
    print("Interrupted")

Interrupted


In [None]:
from pympler import tracker

In [None]:
mem_tracker = tracker.SummaryTracker()

In [None]:
mem_tracker.print_diff()