# Q-learning control for Stochastic Double Integrator
## Method: Q-table

In [23]:
import numpy as np
import jax.numpy as jnp

from src.systems.linear import StochasticDoubleIntegrator
from src.RL.QLearning import QLearning
import matplotlib.pyplot as plt

plt.style.use('ggplot')

In [None]:
x0 = np.array([5, 2])
SDI = StochasticDoubleIntegrator(x0)

n_steps = 1000
n_state = len(x0)

X = np.zeros((n_steps, n_state))
R = np.zeros(n_steps)
U = np.zeros(n_steps)

Q = QLearning()

# epsilon greedy to stimulate exploration
epsilon = .9
decay = .95
min_epsilon = .01

y0 = SDI.observe()
for i in range(0, n_steps - 1):

    if abs(SDI.x[0]) >= 9:
        x0 = np.random.normal(size=2) * 5
        SDI.reset(x0)
        epsilon = .5
        y0 = SDI.observe()

    X[i] = SDI.x

    if np.random.random() < epsilon:
        u_star = np.random.choice(Q.cntr_space)
    else:
        _, u_star, _ = Q.get_control(y0)

    U[i] = u_star

    r0 = SDI.cost(y0, u_star)
    R[i] = - SDI.cost(SDI.x, u_star)

    # take step
    SDI.update(u_star)
    y1 = SDI.observe()

    # update qtable
    Q.update(r0, y0, y1, u_star)
    y0 = y1


plt.figure(figsize=(10,15))
plt.subplot(3,1,1)
plt.plot(X[:-1])
plt.title('state')
plt.subplot(3,1,2)
plt.plot(U[:-1])
plt.title('control')
plt.subplot(3,1,3)
plt.plot(R[:-1])
plt.title('reward')

