Welcome!  If you are new to Google Colab/Jupyter notebooks, you might take a look at [this notebook](https://colab.research.google.com/notebooks/basic_features_overview.ipynb) first.

**I recommend you run the first code cell of this notebook immediately, to start provisioning drake on the cloud machine, then you can leave this window open as you [read the textbook](http://underactuated.csail.mit.edu/policy_search.html).**

# Notebook Setup

The following cell will:
- on Colab (only), install Drake to `/opt/drake`, install Drake's prerequisites via `apt`, and add pydrake to `sys.path`.  This will take approximately two minutes on the first time it runs (to provision the machine), but should only need to reinstall once every 12 hours.  If you navigate between notebooks using Colab's "File->Open" menu, then you can avoid provisioning a separate machine for each notebook.
- import packages used throughout the notebook.

You will need to rerun this cell if you restart the kernel, but it should be fast (even on Colab) because the machine will already have drake installed.

In [None]:
import importlib
import sys
from urllib.request import urlretrieve

# Install drake (and underactuated).
if 'google.colab' in sys.modules and importlib.util.find_spec('underactuated') is None:
    urlretrieve(f"http://underactuated.csail.mit.edu/scripts/setup/setup_underactuated_colab.py",
                "setup_underactuated_colab.py")
    from setup_underactuated_colab import setup_underactuated
    setup_underactuated(underactuated_sha='560c2adace05eb20ebd78377582015d5b2d3859a', drake_version='0.27.0', drake_build='release')

server_args = []
if 'google.colab' in sys.modules:
  server_args = ['--ngrok_http_tunnel']
# Start a single meshcat server instance to use for the remainder of this notebook.
from meshcat.servers.zmqserver import start_zmq_server_as_subprocess
proc, zmq_url, web_url = start_zmq_server_as_subprocess(server_args=server_args)

from underactuated.jupyter import running_as_notebook


# Policy Optimization for LQR

# Trajectory-based (stochastic) gradient descent

## Pendulum swing-up

TODO(russt): make a version of this that takes the symbolic dynamics from pydrake, but implements everything as tensors on the GPU.


In [None]:
import numpy as np
import torch
import meshcat
import pydot
from IPython.display import display, SVG

from pydrake.all import (
    TemplateSystem, LeafSystem_, BasicVector_, 
    DiagramBuilder, Simulator_, Simulator, 
    SceneGraph,
    AutoDiffXd, initializeAutoDiff, autoDiffToGradientMatrix, autoDiffToValueMatrix,
    Integrator,# wrap_to,
    ConnectMeshcatVisualizer, LogOutput
)
from pydrake.examples.pendulum import PendulumPlant, PendulumGeometry

from underactuated.meshcat_utils import set_planar_viewpoint

@TemplateSystem.define("ParameterizedController_")
def ParameterizedController_(T):

    class Impl(LeafSystem_[T]):

        def _construct(self, converter=None):
            LeafSystem_[T].__init__(self, converter)
            self.DeclareVectorInputPort("state", BasicVector_[T](2))
            self.DeclareNumericParameter(BasicVector_[T](np.zeros((9,1))))
            self.DeclareVectorOutputPort("command", BasicVector_[T](1),self.CommandOutput)

        def _construct_copy(self, other, converter=None):
            Impl._construct(self, converter=converter)

        def CommandOutput(self, context, output):
            x = self.get_input_port().Eval(context)
            q = x[0]
            qd = x[1]
            basis = np.array([1, np.sin(q), np.cos(q), qd, np.sin(q)**2, np.sin(q)*np.cos(q), qd*np.sin(q), qd*np.cos(q), qd**2])
            alpha = context.get_numeric_parameter(0).CopyToVector()
            output[0] = alpha.dot(basis)

    return Impl

ParameterizedController = ParameterizedController_[None]  # Default instantiation

def wrap_to(value, low, high):
    range = high - low
    return value - range * np.floor((value - low) / range)


@TemplateSystem.define("RunningCost_")
def RunningCost_(T):

    class Impl(LeafSystem_[T]):

        def _construct(self, converter=None):
            LeafSystem_[T].__init__(self, converter)
            self.DeclareVectorInputPort("state", BasicVector_[T](2))
            self.DeclareVectorInputPort("command", BasicVector_[T](1))
            self.DeclareVectorOutputPort("cost", BasicVector_[T](1),self.CostOutput)

        def _construct_copy(self, other, converter=None):
            Impl._construct(self, converter=converter)

        def CostOutput(self, context, output):
            x = self.get_input_port(0).Eval(context)
            x[0] = wrap_to(x[0] - np.pi, -np.pi, np.pi)
            u = self.get_input_port(1).Eval(context)[0]
            Q = np.diag([10, 1])
            R = 1
            output[0] = x.dot(Q.dot(x)) + R*u**2

    return Impl

RunningCost = RunningCost_[None]  # Default instantiation


def simulate(alpha, N=10, set_planar=True):
    builder = DiagramBuilder()
    plant = builder.AddSystem(PendulumPlant())
    pi = builder.AddSystem(ParameterizedController())
    builder.Connect(plant.get_state_output_port(), pi.get_input_port())
    builder.Connect(pi.get_output_port(), plant.get_input_port())
    scene_graph = builder.AddSystem(SceneGraph())
    PendulumGeometry.AddToBuilder(builder, plant.get_state_output_port(), scene_graph)
    visualizer = ConnectMeshcatVisualizer(
        builder, 
        scene_graph=scene_graph, 
        zmq_url=zmq_url)
    if set_planar:
        visualizer.set_planar_viewpoint()
    visualizer.vis.delete()

    diagram = builder.Build()
    rng = np.random.default_rng()

    simulator = Simulator(diagram)
    simulator.set_target_realtime_rate(1.0)
    context = simulator.get_mutable_context()
    pi_context = pi.GetMyContextFromRoot(context)
    pi_context.get_mutable_numeric_parameter(0).SetFromVector(alpha)

    for n in range(N):
        context.SetTime(0.)
        context.SetContinuousState(rng.standard_normal((2,1)))
        simulator.Initialize()
        simulator.AdvanceTo(2.0)


vis = meshcat.Visualizer(zmq_url=zmq_url, server_args=server_args)
set_planar_viewpoint(vis)
def plot_rollouts(alpha, N=10):
    builder = DiagramBuilder()
    plant = builder.AddSystem(PendulumPlant())
    pi = builder.AddSystem(ParameterizedController())
    builder.Connect(plant.get_state_output_port(), pi.get_input_port())
    builder.Connect(pi.get_output_port(), plant.get_input_port())
    log = LogOutput(plant.get_state_output_port(), builder)

    diagram = builder.Build()
    rng = np.random.default_rng()

    simulator = Simulator(diagram)
    context = simulator.get_mutable_context()
    pi_context = pi.GetMyContextFromRoot(context)
    pi_context.get_mutable_numeric_parameter(0).SetFromVector(alpha)

    vis.delete()
    tf = 2.5
    vertices = np.vstack([[0, tf], [0, 0], np.array([1, 1])*np.pi])
    vis[f"goal"].set_object(meshcat.geometry.Line(meshcat.geometry.PointsGeometry(vertices),meshcat.geometry.LineBasicMaterial(color=0x00dd00)))
    vertices = np.vstack([[0, tf], [0, 0], -np.array([1, 1])*np.pi])
    vis[f"goal_wrapped"].set_object(meshcat.geometry.Line(meshcat.geometry.PointsGeometry(vertices),meshcat.geometry.LineBasicMaterial(color=0x00dd00)))
    for n in range(N):
        log.reset()
        context.SetTime(0.)
        context.SetContinuousState(rng.standard_normal((2,1)))
        simulator.Initialize()
        simulator.AdvanceTo(tf)
        times = log.sample_times()
        vertices = np.vstack([times, 0*times, log.data()[0,:]])
        vis[f"traj{n}"].set_object(meshcat.geometry.Line(meshcat.geometry.PointsGeometry(vertices),meshcat.geometry.LineBasicMaterial(color=0x0000dd)))


def trajectory_gradient_descent(alpha0):
    builder = DiagramBuilder()
    plant = builder.AddSystem(PendulumPlant())
    plant.set_name("plant")
    pi = builder.AddSystem(ParameterizedController())
    pi.set_name("pi")
    builder.Connect(plant.get_state_output_port(), pi.get_input_port())
    builder.Connect(pi.get_output_port(), plant.get_input_port())

    running_cost = builder.AddSystem(RunningCost())
    running_cost.set_name("running_cost")
    builder.Connect(plant.get_state_output_port(), running_cost.get_input_port(0))
    builder.Connect(pi.get_output_port(), running_cost.get_input_port(1))
    cost = builder.AddSystem(Integrator(1))
    cost.set_name("integrator")
    builder.Connect(running_cost.get_output_port(), cost.get_input_port())

    diagram = builder.Build()
    diagram.set_name("trajectory gradient descent")
    # Useful if you want to poke around:
    #display(SVG(pydot.graph_from_dot_data(diagram.GetGraphvizString())[0].create_svg()))

    diagram_ad = diagram.ToAutoDiffXd()
    plant_ad = diagram_ad.GetSubsystemByName("plant")
    pi_ad = diagram_ad.GetSubsystemByName("pi")
    cost_ad = diagram_ad.GetSubsystemByName("integrator")

    simulator = Simulator_[AutoDiffXd](diagram_ad)
    integrator = simulator.get_mutable_integrator()
    integrator.set_fixed_step_mode(True)
    integrator.set_maximum_step_size(0.05)

    rng = np.random.default_rng()

    context = simulator.get_mutable_context()
    plant_context = plant_ad.GetMyContextFromRoot(context)
    pi_context = pi_ad.GetMyContextFromRoot(context)
    cost_context = cost_ad.GetMyContextFromRoot(context)

    class TorchCost(torch.autograd.Function):
        @staticmethod
        def forward(ctx, input):
            alpha = input.detach().numpy()
            N = 50
            J = 0
            dJdalpha = 0*alpha.T
            pi_context.get_mutable_numeric_parameter(0).SetFromVector(initializeAutoDiff(alpha))
            for n in range(N):
                context.SetTime(0.)
                plant_context.SetContinuousState(rng.standard_normal((2,1)))
                cost_context.SetContinuousState([0])
                simulator.Initialize()
                simulator.AdvanceTo(2.5)
                J += autoDiffToValueMatrix(cost_context.get_continuous_state_vector().CopyToVector())
                dJdalpha += autoDiffToGradientMatrix(cost_context.get_continuous_state_vector().CopyToVector())
            ctx.save_for_backward(torch.from_numpy(dJdalpha/N))
            return torch.from_numpy(J/N)

        @staticmethod
        def backward(ctx, grad_output):
            dJdalpha, = ctx.saved_tensors
            grad_input = dJdalpha.T @ grad_output
            return grad_input, None

    alpha = torch.tensor(alpha0, dtype=torch.float64, requires_grad=True)
#   J = TorchCost.apply(alpha)
#   J.backward()
#   print(J)
#   print(alpha.grad)

    learning_rate = 1e-3
    optimizer = torch.optim.Adam([alpha], lr=learning_rate)
    for epoch in range(2000 if running_as_notebook else 1):
        J = TorchCost.apply(alpha)
        optimizer.zero_grad()
        J.backward()
        optimizer.step()

        print(epoch, J.item())
        if epoch % 100 == 99:
            vis.delete()
            simulate(alpha.detach().numpy(), N=5, set_planar=False)
            print(alpha.detach().numpy().T)
        if epoch % 10 == 9:
            plot_rollouts(alpha.detach().numpy(), N=5)

    return alpha.detach().numpy()

#alpha=0.01*np.ones((9,1))
# From a previous run:
alpha = np.array([0.71027108, 2.61142863, 0.6379523, -0.23501137, 0.64446781, -1.45158088, 0.37477416, 1.05945338, -0.04337835]).reshape((9,1))
alpha = trajectory_gradient_descent(alpha)
print(alpha.T)

In [None]:
plot_rollouts(alpha, N=50)

In [None]:
simulate(alpha, N=10)

# Static Output Feedback Counter-example

In [None]:
import numpy as np
import matplotlib.pyplot as plt

A = np.array([[0, 0, 2], [1, 0, 0], [0, 1, 0]])
B = np.array([[1],[0],[0]])
C = np.array([[1, 1, 3]])

k = np.linspace(0, 3, 101)
lambda_max_real = 0*k
for i in range(101):
    A_cl = (A - B @ (k[i]*C))
    lambda_max_real[i] = np.amax(np.real(np.linalg.eigvals(A_cl)))
    
fig, ax = plt.subplots(figsize=(6,2))
ax.plot(k, lambda_max_real)
ax.plot(k, 0*k, 'k')
ax.set_xlabel('k')
ax.set_ylabel('$\Re(\lambda_{max})$');
