# Prerequisites

In [None]:
# Definitions
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
rc('animation', html='jshtml')

from matplotlib.animation import FuncAnimation

def animate_satellite(trajectory, resolution_Hz=15, duration=None, fig_size=8):
    plt.ioff()
    trajectory = np.array(trajectory)
    trajectory = trajectory.T[0:2].T
    if len(trajectory.shape) == 1:
        trajectory = trajectory.reshape(trajectory.size, 1)
    if trajectory.shape == (trajectory.size, 1):
        trajectory = np.stack((trajectory.T[0], np.zeros(trajectory.size))).T
    if duration == None:
        frames = range(trajectory.shape[0])
    else:
        frames = range(int(duration * resolution_Hz))
    fig, ax = plt.subplots(figsize=(fig_size, fig_size))
    # set the axes limits
    ax.axis([-2,2,-2, 2])
    ax.set_aspect("equal")
    # create a point in the axes
    plt.grid()
    ax.plot(0, 0, markersize=215, marker="o")
    point, = ax.plot(0,1, marker="o")

    # Updating function, to be repeatedly called by the animation
    def update(t):
        # obtain point coordinates 
        x,y = trajectory[int(t) % trajectory.shape[0]]
        # set point's coordinates
        point.set_data([x],[y])
        return point,

    
    ani = FuncAnimation(fig, update, interval=1000/resolution_Hz, blit=True, repeat=True,
                    frames=frames)
    plt.ion()
    return ani

In [None]:
from scipy.linalg import norm

class ParametrizedDiscreteTimeSystem:
    def __init__(self, 
                 state_transition_function,  # f(., .)
                 initial_state,              # x_0
                 running_reward,
                 discount               
                 ):
      self.__state_transition_function = state_transition_function
      self.__initial_state = initial_state
      self.__running_reward = running_reward
      self.__discount = discount

    def run_with_feedback_controller(self, 
                                     feedback_controller,
                                     steps=100):
      trajectory = [self.__initial_state]
      actions = []
      total_reward = 0
      accumulated_discount = 1
      for _ in range(steps):
        current_state = trajectory[-1]
        control_input = feedback_controller(current_state)
        actions.append(control_input)
        try:
            next_state = self.__state_transition_function(current_state, control_input)
            total_reward += self.__running_reward(current_state, control_input) * accumulated_discount
            accumulated_discount *= self.__discount
        except OverflowError:
            print("The trajectory blew up. Ending the episode prematurely.")
            return np.array(trajectory), np.array(actions), total_reward
        trajectory.append(next_state)
      return np.array(trajectory), np.array(actions), total_reward

# Sending a satellite into orbit

The following system decribes the dynamics of an artificial satellite that is being lauched into space. The onboard controller determines the magnitude and direction of the propulsion produced by the rocket's engines. However the controller is only able to update it's action once a minute.
$$
\begin{aligned}
&\begin{cases}
 \frac{\partial}{\partial t}x(t) = \begin{cases}v(t), \text{ if } \lVert x(t)\rVert_2 \geq 1 \\ \left(0 \atop 0\right), \text{ otherwise}\end{cases}\\
 \frac{\partial}{\partial t}v(t) = -0.03\frac{x(t)}{\lVert x(t)\rVert_2^3} + u(t)\end{cases}\\
& x_0 = \left(0 \atop 1 \right) \\
& v_0 = \left(0 \atop 0.1 \right) \\
& u(t) = \rho_\theta\big(x(t \text{ mod } 1)\big), \\
& r(x(t), u(t)) := -(\lVert x(t) \rVert_2 - 1.5)^2 - \lVert u(t)\rVert_2^2, \ \gamma = 0.99
\end{aligned}
$$


## Problem 1: Simulating the satellite

Implement the above system by converting it to discrete time. I.e:

$$
\begin{aligned}
& \left(x_{t + 1} \atop v_{t + 1}\right) = f(x_t, v_t, u_t)\\
& x_0 = \left(0 \atop 1 \right) \\
& v_0 = \left(0 \atop 0.1 \right) \\
& u_t = \rho_\theta(x_t), \\
& r_t := -\int_t^{t + 1}\gamma^{\tau - t}\left((\lVert x(\tau) \rVert_2 - 1.5)^2 + \lVert u_t\rVert_2^2 \right)\text{d} \tau, \ \gamma = 0.99
\end{aligned}
$$

Here $f(\cdot, \cdot, \cdot) := x(t), v(t), u(t) \longmapsto x(t + 1), v(t + 1)$.


**Useful resources**:
* [scipy.integrate.solve_ivp](https://docs.scipy.org/doc/scipy/reference/generated/scipy.integrate.solve_ivp.html)
* [scipy.integrate.quad](
  https://docs.scipy.org/doc/scipy/reference/generated/scipy.integrate.quad.html#scipy.integrate.quad)
* [Inner functions in Python](https://realpython.com/inner-functions-what-are-they-good-for/#:~:text=Inner%20functions%2C%20also%20known%20as,closure%20factories%20and%20decorator%20functions.)

In [None]:
from scipy.integrate import quad, solve_ivp

satellite_initial_state = np.array([0.0, 1.0, 0.0, 0.1])
satellite_discount = 0.99

def satellite_dynamics_function(state_continuous, control_input):
    ## YOUR CODE
    return drift

def satellite_transition_function(state, control_input):
    ## YOUR CODE
    return next_state    

def satellite_running_reward(state, control_input):
    ## YOUR CODE
    return reward

satellite_system = ParametrizedDiscreteTimeSystem(satellite_transition_function,
                                                  satellite_initial_state,
                                                  satellite_running_reward,
                                                  satellite_discount)
    

### Observe the results

In [None]:
def nominal_feedback_controller(state):
    control = np.array([0.008, 0.008])
    return control

trajectory, actions, total_reward = satellite_system.run_with_feedback_controller(nominal_feedback_controller, steps=100)
print("Total reward: %f" % total_reward)
print("First action:", actions[0])
animate_satellite(trajectory)

### Compare to the correct result

In [None]:
trajectory =  np.array([[0.0, 1.0, 0.0, 0.1], [0.003991498651545051, 1.0898863118512063, 0.007967235553123602, 0.08055937649142351], [0.015880979301819503, 1.1623801300866805, 0.015776475383492344, 0.06493605198974471], [0.03546145063266189, 1.2205969000564774, 0.02333758915047965, 0.051844363545727995], [0.06245325573096447, 1.2666530390735276, 0.030590283895059498, 0.04051413195587745], [0.09652247742617152, 1.3020456307055197, 0.03748440641403428, 0.030450718017754133], [0.13728666115601015, 1.3278647123639218, 0.04397225934067033, 0.02132159507884647], [0.1843155060444254, 1.3449230435628432, 0.050005577466995986, 0.012897637223185059], [0.23712976844277908, 1.3538417389390451, 0.05553470777789056, 0.005020764912771565], [0.29520001254559014, 1.355111204673162, 0.06050910106092892, -0.0024147589649259338], [0.35794638633510006, 1.3491374051781635, 0.06487879645682225, -0.009473527241686517], [0.42474031375526905, 1.336279218750057, 0.06859675980487967, -0.016186069691471544], [0.49490886690286073, 1.316879925431149, 0.07162195874201623, -0.022553815481013553], [0.5677426541399118, 1.2912941615935016, 0.07392300440168219, -0.028553223977905634], [0.6425072949179982, 1.2599108700496993, 0.07548201326457413, -0.034140005810641944], [0.7184588725177067, 1.2231716552360539, 0.07629826601112089, -0.03925396551034829], [0.7948624845011676, 1.1815839610877341, 0.0763910711976972, -0.04382471684864608], [0.8710132998618428, 1.1357280078655323, 0.07580128550038986, -0.047778188619292734], [0.9462580656551555, 1.0862570761479073, 0.07459100599362381, -0.05104347745464857], [1.0200153710456852, 1.0338910017576461, 0.07284125712240025, -0.053559391288812014], [1.09179262621639, 0.9794036134798115, 0.07064782930134295, -0.055279926288021586], [1.1611981588731084, 0.923605496809443, 0.0681157780004747, -0.056178035968263026], [1.2279474504677188, 0.8673240094639657, 0.06535333532480488, -0.05624732591058293], [1.2918633310959193, 0.8113826586665226, 0.06246604827360492, -0.05550166615127365], [1.3528707238545512, 0.7565817826993292, 0.0595518368684815, -0.053973047471286464], [1.4109871042498687, 0.7036820317567857, 0.05669741570026913, -0.05170822817296247], [1.4663101374959235, 0.6533915537152043, 0.053976231692326695, -0.048764788936287544], [1.5190039800845159, 0.6063571703489654, 0.05144781409485985, -0.04520715060465042], [1.569285527363536, 0.5631593880927949, 0.04915827208489991, -0.041102965740508535], [1.6174115917928624, 0.5243107215789194, 0.04714159693291478, -0.03652012441799545], [1.663667644441578, 0.49025666140591995, 0.04542143318838884, -0.03152446098424089], [1.708358432614293, 0.461378582996371, 0.0440130360678215, -0.026178134510602212], [1.7518005937382286, 0.4379980117355988, 0.042925207385367314, -0.020538598512615046], [1.7943170789295086, 0.4203816198543861, 0.04216206716905132, -0.014658020293326903]])
print("Total reward: -2045.429401")
animate_satellite(trajectory)

Total reward: -2045.429401
