In [1]:
%load_ext autoreload
%autoreload 2

import math
import random
import tempfile
import tensorflow as tf
import time

from collections import defaultdict

from tf_rl.controller import KerasDDPG
from tf_rl.models     import KERASMLP
from tf_rl.simulation import DoublePendulum
from tf_rl            import simulate

DOUBLE_PENDULUM_PARAMS = {
    'g_ms2': 9.8, # acceleration due to gravity, in m/s^2
    'l1_m': 1.0, # length of pendulum 1 in m
    'l2_m': 2.0, # length of pendulum 2 in m
    'm1_kg': 1.0, # mass of pendulum 1 in kg
    'm2_kg': 1.0, # mass of pendulum 2 in kg
    'damping': 0.4,
    'max_control_input': 20.0
}

Using Theano backend.


In [2]:
actor = KERASMLP(DoublePendulum.observation_size, [300, 300, 1], ['relu', 'relu', 'tanh'])
critic = KERASMLP(DoublePendulum.observation_size+DoublePendulum.action_size,[300,300,1],['relu','relu','linear'])
current_controller = KerasDDPG(DoublePendulum.observation_size, 
                               DoublePendulum.action_size, actor, critic, 
                               discount_rate=0.99, exploration_period=10000, max_experience=1000000, 
                               store_every_nth=4, train_every_nth=4, exploration_sigma=0.001)

In [None]:
fast_mode = False

if fast_mode:
    FPS, SPEED, RES = 5, 20.0, 0.03
else:
    FPS, SPEED, RES = 30, 1., 0.03

try:
    while True:
        d = DoublePendulum(DOUBLE_PENDULUM_PARAMS)
        simulate(d, current_controller, fps=FPS,
                 simulation_resolution=RES,
                 action_every=4,
                 disable_training=False)
except KeyboardInterrupt:
    print("Interrupted")

In [None]:
current_controller.print_loss_history()

In [None]:
current_controller.iteration

In [None]:
d = DoublePendulum(DOUBLE_PENDULUM_PARAMS)
d.joint_positions()

In [None]:
d.collect_reward()

In [None]:
d.observe()

In [None]:
current_controller.action(d.observe(), disable_exploration=False)

In [None]:
current_controller.actions_executed_so_far

In [None]:
noise_sigma = ContinuousDeepQ.linear_annealing(current_controller.actions_executed_so_far,
                                               current_controller.exploration_period,
                                               1.0,
                                               current_controller.exploration_sigma)

In [None]:
noise_sigma

In [None]:
current_controller.__class__ = ContinuousDeepQ