## Notebook Setup 
The following cell will install Drake, checkout the underactuated repository, and set up the path (only if necessary).
- On Google's Colaboratory, this **will take approximately two minutes** on the first time it runs (to provision the machine), but should only need to reinstall once every 12 hours.  Colab will ask you to "Reset all runtimes"; say no to save yourself the reinstall.
- On Binder, the machines should already be provisioned by the time you can run this; it should return (almost) instantly.

More details are available [here](http://underactuated.mit.edu/underactuated.html?chapter=drake).

In [None]:
try:
    import pydrake
    import underactuated
except ImportError:
    !curl -s https://raw.githubusercontent.com/RussTedrake/underactuated/master/scripts/setup/jupyter_setup.py > jupyter_setup.py
    from jupyter_setup import setup_underactuated
    setup_underactuated()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from pydrake.all import MathematicalProgram, Variables, Polynomial, Solve
from pydrake.examples.pendulum import PendulumParams

In [None]:
# system dimensions
nx = 2
nz = 3
nu = 1

# map from original state to augmented state for S-procedure
from sympy import sin, cos
x2z = lambda x: np.array([sin(x[0]), cos(x[0]), x[1]])

# system dynamics in augmented state
params = PendulumParams()
inertia = params.mass() * params.length() ** 2
tau_g = params.mass() * params.gravity() * params.length()
def dynamics(z, u):
    return [
        z[1] * z[2],
        - z[0] * z[2],
        (tau_g * z[0] + u[0] - params.damping() * z[2]) / inertia
    ]

# uniform distribution for the initial state
x_max = np.array([np.pi, 2*np.pi])
x_min = - x_max
volume = np.prod(x_max - x_min)

# equilibrium point
x0 = np.array([0, 0])
z0 = x2z(x0)
    
# running cost in augmented state
Q = np.diag([1, 1, 1])
R = np.diag([5])
def running_cost(z, u):
    return (z - z0).dot(Q).dot(z - z0) + u.dot(R).dot(u)

In [None]:
def polyint(p, x_min, x_max, x2z=None):
    '''
    Integrates p(z(x)) over the interval [x_min, x_max].

    Arguments:
        p : pydrake Polynomial
        x_min, x_max : numpy arrays
        x2z : callable python function, if None assumed to be the identity
    '''

    # integration variables
    nx = len(x_min)
    assert(len(x_max) == nx)
    x = [Symbol(f'x({i})') for i in range(nx)]

    # evaluate transformation if provided
    z = x if x2z is None else x2z(x)

    # compute integral one monomial per time
    integral = 0
    for m, c in p.monomial_to_coefficient_map().items():

        # integrand for the current monomial
        m_integrand = 1
        for i, zi in enumerate(p.indeterminates()):
            m_integrand *= z[i] ** m.degree(zi)

        # numeric value of the integral of the monomial
        m_integral = m_integrand
        for i, x_i in enumerate(x):
            m_integral = integrate(m_integral, (x_i, x_min[i], x_max[i]))

        # add monomial integral to the overall polynomial integral
        integral += c * m_integral

    return integral

In [None]:
def get_value_function(deg):
    
    # set up optimization
    prog = MathematicalProgram()
    z = prog.NewIndeterminates(nz, 'z')
    u = prog.NewIndeterminates(nu, 'u')
    J = prog.NewFreePolynomial(Variables(z), deg * 2)
    # J = prog.NewSosPolynomial(Variables(z), deg * 2)[0]
    Jexpr = J.ToExpression()
    # lam0 = prog.NewFreePolynomial(Variables(z), deg * 2).ToExpression()
    # prog.AddSosConstraint(Jexpr + lam0 * (z[0]**2 + z[1]**2 - 1))

    # push up the value function as much as possible
    obj = polyint(J, x_min, x_max, x2z)
    prog.AddLinearCost(- obj / volume)

    # S procedure for s^2 + c^2 = 1
    lam = prog.NewFreePolynomial(Variables(z), deg * 2).ToExpression()

    # cannot go downhill faster than the running cost
    Jdot = Jexpr.Jacobian(z).dot(dynamics(z, u))
    prog.AddSosConstraint(Jdot + running_cost(z, u) + lam * (z[0]**2 + z[1]**2 - 1))

    # value function nonpositive in the terminal set
    J0 = Jexpr.EvaluatePartial({zi: z0[i] for i, zi in enumerate(z)})
    prog.AddLinearConstraint(J0 <= 0)
    
    # solve optimization
    result = Solve(prog)
    print(result.get_solver_id().name())
    assert result.is_success()
    Jstar = Polynomial(result.GetSolution(Jexpr))
    objective = - result.get_optimal_cost()
    
    return z, Jstar, objective

In [None]:
def solve(deg):
    
    # solve SOS program
    z, Jstar, objective = get_value_function(deg)
    print('Objective:', objective)

    # solve for the optimal feedback in augmented coordinates
    Rinv = np.linalg.inv(R)
    f2 = np.array([[0], [0], [1 / inertia]])
    dJdz = Jstar.ToExpression().Jacobian(z)
    ustar = - .5 * Rinv.dot(f2.T).dot(dJdz.T)

    return z, ustar, Jstar

In [None]:
from pydrake.all import DiagramBuilder, Simulator, VectorSystem, WrapToSystem, LogOutput
from pydrake.examples.pendulum import PendulumPlant
from underactuated.pendulum import PendulumVisualizer
from underactuated.jupyter import AdvanceToAndVisualize

offset = np.array([np.pi, 0])
def simulate(controller, x0):
    
    # drake sim
    plant = PendulumPlant()
    simulator = Simulator(plant)
    
    builder = DiagramBuilder()
    pendulum = builder.AddSystem(PendulumPlant())

    wrap = builder.AddSystem(WrapToSystem(2))
    wrap.set_interval(0, 0, 2*np.pi)
    builder.Connect(pendulum.get_output_port(0), wrap.get_input_port(0))
    controller = builder.AddSystem(controller)
    builder.Connect(wrap.get_output_port(0), controller.get_input_port(0))
    builder.Connect(controller.get_output_port(0), pendulum.get_input_port(0))

    logger = LogOutput(pendulum.get_output_port(0), builder)

    visualizer = builder.AddSystem(PendulumVisualizer(show=False))
    builder.Connect(pendulum.get_output_port(0), visualizer.get_input_port(0))

    diagram = builder.Build()
    simulator = Simulator(diagram)
    simulator.get_mutable_context().SetContinuousState(x0 + offset)

    duration = 5
#     visualizer.start_recording()
    simulator.Initialize()
    simulator.AdvanceTo(duration)
#     visualizer.stop_recording()

    return logger#, visualizer.get_recording_as_animation()

def simulate_and_plot(deg, x0):
    
    # solve SOS
    z, ustar, Jstar = solve_and_plot(deg)
    
    # get controller in original coordinates
    def state_feedback(x):
        env = {z[0]: np.sin(x[0]), z[1]: np.cos(x[0]), z[2]: x[1]}
        return np.array([ui.Evaluate(env) for ui in ustar])
    
    # drake controller
    class Controller(VectorSystem):
        def __init__(self, state_feedback):
            VectorSystem.__init__(self, 2, 1)
            self.state_feedback = state_feedback
        def DoCalcVectorOutput(self, context, x, unused, u):
            u[:] = self.state_feedback(x - offset)
            
    # run sim
    logger = simulate(Controller(state_feedback), x0)
    
    # plot state trajectory
    plt.figure()
    plt.plot([xt[0] - offset[0] for xt in logger.data().T], [xt[1] - offset[1] for xt in logger.data().T])
    plt.scatter(0, 0, c='r')
    plt.scatter(2*np.pi, 0, c='r')
    plt.xlabel(r'$x_1$')
    plt.ylabel(r'$x_2$')
    plt.grid(1)
    plt.xlim(-np.pi, 3*np.pi)
    plt.ylim(-2*np.pi, 2*np.pi)
    plt.savefig(f'x_pendulum_deg_{deg}.pdf', bbox_inches='tight')
    
    # evalyate value function along trajectory
    def eval_Jstar(x):
        env = {z[0]: np.sin(x[0]), z[1]: np.cos(x[0]), z[2]: x[1]}
        return Jstar.Evaluate(env)
    
    # plot value function
    plt.figure()
    plt.plot(logger.sample_times(), [eval_Jstar(xt - offset) for xt in logger.data().T])
    plt.xlabel(r'Time $t$')
    plt.ylabel(r'$J^*(t)$')
    plt.grid(1)
    plt.xlim(0, 5)
    plt.savefig(f'Jt_pendulum_deg_{deg}.pdf', bbox_inches='tight')

In [None]:
x0 = np.array([.9 * np.pi, 0])
simulate_and_plot(3, x0)

from matplotlib import animation
ani = simulate()
writer = animation.writers['ffmpeg'](fps=30, bitrate=1800)
ani.save('swing_up.mp4', writer=writer)

from IPython.display import HTML
ani, logger = simulate()
HTML(ani.to_jshtml())