# Swing Up and Balancing of Double Inverted Pendulum

In [1]:
%load_ext autoreload
%autoreload 2

import math
import random
import tempfile
import tensorflow as tf
import time

import numpy as np

from collections import defaultdict

from tf_rl.controller import KerasDDPG
from tf_rl.models     import DDPGPolicyMLP, DDPGValueMLP
from tf_rl            import simulate
from tf_rl.simulation import DoublePendulum2

from keras import backend as K

DOUBLE_PENDULUM_PARAMS = {
    'g_ms2': 9.8, # acceleration due to gravity, in m/s^2
    'l1_m': 1.0, # length of pendulum 1 in m
    'l2_m': 1.0, # length of pendulum 2 in m
    'm1_kg': 1.0, # mass of pendulum 1 in kg
    'm2_kg': 1.0, # mass of pendulum 2 in kg
    'damping': 0.2,
    'max_control_input': 10.0
}

Using TensorFlow backend.


## Load Fresh Networks and Controller

In [2]:
actor = DDPGPolicyMLP(DoublePendulum2.observation_size, [200, 200, 1], ['relu', 'relu', 'tanh'])
critic = DDPGValueMLP(DoublePendulum2.observation_size, DoublePendulum2.action_size,[200, 200, 1],['relu', 'relu', 'linear'])

In [3]:
current_controller = KerasDDPG(DoublePendulum2.observation_size, 
                               DoublePendulum2.action_size, actor, critic, 
                               discount_rate=0.99, exploration_period=1000000)

## Train Controller on Simulator

In [None]:
fast_mode = False

if fast_mode:
    FPS, SPEED, RES = 5, 20.0, 0.03
else:
    FPS, SPEED, RES = 60, 1., 0.0001

try:
    while True:
        d = DoublePendulum2(DOUBLE_PENDULUM_PARAMS)
        simulate(d, current_controller, fps=FPS,
                 simulation_resolution=RES,
                 action_every=3,
                 reset_every=600,
                 disable_training=False)
except KeyboardInterrupt:
    print("Interrupted")

## Load Checkpoint and Evaluate Controller 

In [4]:
current_controller.restore_checkpoint('/home/mderry/local_data/rl_logs/pendulum_checkpoint_1950001')

In [5]:
fast_mode = False

if fast_mode:
    FPS, SPEED, RES = 5, 20.0, 0.03
else:
    FPS, SPEED, RES = 60, 1., 0.0001

DOUBLE_PENDULUM_PARAMS = {
    'g_ms2': 9.8, # acceleration due to gravity, in m/s^2
    'l1_m': 1.0, # length of pendulum 1 in m
    'l2_m': 1.0, # length of pendulum 2 in m
    'm1_kg': 1.0, # mass of pendulum 1 in kg
    'm2_kg': 1.0, # mass of pendulum 2 in kg
    'damping': 0.2,
    'max_control_input': 10.0
}
d = DoublePendulum2(DOUBLE_PENDULUM_PARAMS)
try:
    while True:
        simulate(d, current_controller, fps=FPS, 
                 simulation_resolution=RES, wait=False, 
                 action_every=3, disable_training=True, ignore_exploration=True)
except KeyboardInterrupt:
    print("Interrupted")

Interrupted
