In [None]:
import matplotlib.pyplot as plt

from scipy.integrate import odeint
import numpy as np
import math

from gym_brt.quanser import QubeSimulator
from gym_brt.quanser.qube_interfaces import forward_model_ode

from gym_brt.control import (
    zero_policy,
    constant_policy,
    square_wave_policy,
    pd_control_policy,
    flip_and_hold_policy,
    square_wave_flip_and_hold_policy,
)


# Helper functions

In [None]:
# Run on the hardware
def run_qube(begin_up, policy, nsteps, frequency, integration_steps):
    with QubeHardware(
        forward_model=forward_model,
        frequency=frequency,
        integration_steps=integration_steps,
        max_voltage=3.0,
    ) as qube:
        
        if begin_up is True:
            init_state = qube.reset_up()
        elif begin_up is False:
            init_state = qube.reset_down()

        s = init_state
        a = policy(s, step=0)
        s_hist = [s]
        a_hist = [a]

        for i in range(nsteps):
            s = qube.step(a)
            a = policy(s, step=i+1)

            s_hist.append(s) # States
            a_hist.append(a) # Actions

        # Return a 2d array, hist[n,d] gives the nth timestep and the dth dimension
        # Dims are ordered as: ['Theta', 'Alpha', 'Theta dot', 'Alpha dot', 'Action']
        return np.concatenate((np.array(s_hist), np.array(a_hist)), axis=1), init_state

def run_sim(init_state, policy, nsteps, frequency, integration_steps):
    with QubeSimulator(
        forward_model=forward_model,
        frequency=frequency,
        integration_steps=integration_steps,
        max_voltage=3.0,
    ) as qube:
        qube.state = np.asarray(init_state, dtype=np.float64)  # Set the initial state of the simulator

        s = init_state
        a = policy(s, step=0)
        s_hist = [s]
        a_hist = [a]

        for i in range(nsteps):
            s = qube.step(a)
            a = policy(s, step=i+1)

            s_hist.append(s) # States
            a_hist.append(a) # Actions

        # Return a 2d array, hist[n,d] gives the nth timestep and the dth dimension
        # Dims are ordered as: ['Theta', 'Alpha', 'Theta dot', 'Alpha dot', 'Action']
        return np.concatenate((np.array(s_hist), np.array(a_hist)), axis=1)

In [None]:
def plot_results(hists, labels, colors=None):
    state_dims = ['Theta', 'Alpha', 'Theta dot', 'Alpha dot', 'Action']
    
    f, (ax1, ax2, ax3, ax4, ax5) = plt.subplots(5, 1, sharex=True)
    for i, ax in enumerate((ax1, ax2, ax3, ax4, ax5)):
        if colors is None:
            for hist, label in zip(hists, labels):
                ax.plot(hist[:,i], label=label)
        else:
            for hist, label, color in zip(hists, labels, colors):
                ax.plot(hist[:,i], label=label, color=color)
        ax.set_ylabel(state_dims[i])
        ax.legend()
    plt.show()

# Run experiments

In [None]:
# Constants between experiments
frequency = 250 # in Hz
run_time = 5 # in seconds
nsteps = int(run_time * frequency)
i_steps = 1

In [None]:
plt.rcParams["figure.figsize"] = (20,20) # make graphs BIG

# Natural responses

In [None]:
# Natural response when starting at α = 0 + noise (upright/inverted)
hist_qube, init_state = run_sim(True, zero_policy, nsteps, frequency, i_steps)
hist_ode = run_sim(init_state, zero_policy, nsteps, frequency, i_steps)

plot_results(hists=[hist_qube, hist_ode], labels=['Hardware', 'ODE'], colors=None)

In [None]:
# Natural response when starting at α = π + noise (downwards/at rest)
hist_qube, init_state = run_sim(False, zero_policy, nsteps, frequency, i_steps)
hist_ode = run_sim(init_state, zero_policy, nsteps, frequency, i_steps)

plot_results(hists=[hist_qube, hist_ode], labels=['Hardware', 'ODE'], colors=None)

# Forced response

In [None]:
# Focred responce when starting at α = π + noise (downwards/at rest), and using a constant policy
hist_qube, init_state = run_sim(False, constant_policy, nsteps, frequency, i_steps)
hist_ode = run_sim(init_state, constant_policy, nsteps, frequency, i_steps)

plot_results(hists=[hist_qube, hist_ode], labels=['Hardware', 'ODE'], colors=None)

In [None]:
# Forced response when starting at α = π + noise (downwards/at rest), and using a policy that 
# switches direction every 85 steps
hist_qube, init_state = run_sim(False, square_wave_policy, nsteps, frequency, i_steps)
hist_ode = run_sim(init_state, square_wave_policy, nsteps, frequency, i_steps)

plot_results(hists=[hist_qube, hist_ode], labels=['Hardware', 'ODE'], colors=None)

# PD controller (starting inverted)

In [None]:
# PD controller to hold when starting at α = 0 + noise (inverted)
hist_qube, init_state = run_sim(True, pd_control_policy, nsteps, frequency, i_steps)
hist_ode = run_sim(init_state, pd_control_policy, nsteps, frequency, i_steps)

plot_results(hists=[hist_qube, hist_ode], labels=['Hardware', 'ODE'], colors=None)


# Energy + PD controller to flip and hold (starting down)

In [None]:
# Energy + PD controller to hold when starting at α = π + noise (inverted)
hist_qube, init_state = run_sim(False, flip_and_hold_policy, nsteps, frequency, i_steps)
hist_ode = run_sim(init_state, flip_and_hold_policy, nsteps, frequency, i_steps)

plot_results(hists=[hist_qube, hist_ode], labels=['Hardware', 'ODE'], colors=None)

# Square wave + PD controller to flip and hold (starting down)
This often seems to work on the Qube, so it should work on the sim too

In [None]:
# Energy + PD controller to hold when starting at α = π + noise (inverted)
hist_qube, init_state = run_sim(False, square_wave_flip_and_hold_policy, nsteps, frequency, i_steps)
hist_ode = run_sim(init_state, square_wave_flip_and_hold_policy, nsteps, frequency, i_steps)

plot_results(hists=[hist_qube, hist_ode], labels=['Hardware', 'ODE'], colors=None)