# Q-table control for the Stochastic Double Integrator
## Method: Value Iteration (VI)

In [1]:
import numpy as np
from src.systems.linear import StochasticDoubleIntegrator
from src.RL.ValueIteration import ValueIteration
import matplotlib.pyplot as plt

plt.style.use('ggplot')

ModuleNotFoundError: No module named 'src'

### Stochastic Double Integrator

In [None]:
x0 = np.array([2,0])
SDI = StochasticDoubleIntegrator(x0)

_, s_data = SDI.run(200)

fig, ax = plt.subplots(figsize=(12,6))
ax.plot(s_data[:, 0], label='position')
ax.plot(s_data[:, 1], label='velocity')
ax.set_xlabel('position')
ax.set_ylabel('velocity')
ax.legend()

### Method 1: Value Iteration
Here the Q-table is trained through value iteration (VI)

In [None]:
x0 = np.array([2,0])
SDI = StochasticDoubleIntegrator(x0)

Q = ValueIteration()
Q.run_vi(SDI.get_state_update, SDI.cost, iterations=500)

img = np.argmax(Q.Qtable, axis=-1)*2-1

fig, ax = plt.subplots()
cb = ax.imshow(img, cmap='gist_yarg')
cbar = plt.colorbar(cb)
cbar.set_label('control')
n_bins = Q.nbins
amp = Q.state_amp
ax.set_yticks((np.arange(-amp[0], amp[0]+.1, 1)+amp[0])*(n_bins[0]-1)/(2*amp[0]))
ax.set_yticklabels(np.arange(-amp[0], amp[0]+.1, 1))
ax.set_xticks((np.arange(-amp[1], amp[1]+.1, 1)+amp[1])*(n_bins[1]-1)/(2*amp[1]))
ax.set_xticklabels(np.arange(-amp[1], amp[1]+.1, 1))
ax.set_ylabel('position')
ax.set_xlabel('velocity')
plt.show()

In [None]:
diff = abs(np.diff(Q.Qtable, axis=2)[:, :, 0])

fig, ax = plt.subplots(figsize=(12, 8))
cb = ax.imshow(diff, interpolation='bicubic', cmap='viridis')
cbar = plt.colorbar(cb)
cbar.set_label('certainty')
ax.set_yticks((np.arange(-amp[0], amp[0]+.1, 1)+amp[0])*(n_bins[0]-1)/(2*amp[0]))
ax.set_yticklabels(np.arange(-amp[0], amp[0]+.1, 1))
ax.set_xticks((np.arange(-amp[1], amp[1]+.1, 1)+amp[1])*(n_bins[1]-1)/(2*amp[1]))
ax.set_xticklabels(np.arange(-amp[1], amp[1]+.1, 1))
ax.set_ylabel('position')
ax.set_xlabel('velocity')
plt.show()

**Q-table controller performance**

In [None]:
def run_sdi(i_time, sdi, Qi):
    n_time = len(i_time)
    n_state = Qi.state_n

    X = np.zeros((n_time, n_state))
    U = np.zeros(n_time)
    C = np.zeros(n_time)
    for it, t in enumerate(i_time):
        X[it] = sdi.x
        _, u_star, _ = Qi.get_control(sdi.x)
        state, cost = sdi.update(u_star, info=True)
        U[it] = u_star
        C[it] = cost
    return X, U, C

In [None]:
x0 = np.array([2,0])
T = 100
dt = 1

SDI = StochasticDoubleIntegrator(x0, dt=dt)
time = np.arange(0, T, SDI.dt)

X, U, C = run_sdi(time, SDI, Q)

fig, ax = plt.subplots(3, figsize=(12,10))
ax[0].plot(time, X[:, 0], label='position')
ax[0].plot(time, X[:, 1], label='velocity')
ax[0].legend()
ax[0].set_title('state')
ax[1].plot(time, U)
ax[1].set_title('control')
ax[2].plot(time, C)
ax[2].set_title('cost')
labels = {'y': ['pos/vel', 'control', 'cost'], 'x': ['time' for i in range(3)]}
for i in range(3):
    ax[i].set_xlabel(labels['x'][i])
    ax[i].set_ylabel(labels['y'][i])
plt.tight_layout()

In [None]:
T = 30

SDI = StochasticDoubleIntegrator(x0, dt=dt)
time = np.arange(0, T, SDI.dt)

N = 200
it_space = [100, 200, 500]
performance = {'n_it': [], 'mu': [], 'std': []}

fig, ax = plt.subplots(3, figsize=(12, 10))

for n_iter in it_space:
    Q = ValueIteration()
    Q.run_vi(SDI.get_state_update, SDI.cost, iterations=n_iter)

    X, U, C = run_sdi(time, SDI, Q)
    X0, X1 = X.T

    AC = np.zeros(N)
    for i_trial in range(N):
        x0 = np.random.uniform(-3, 3, size=2)
        SDI.reset(x0)
        Xt, Ut, Ct = run_sdi(time, SDI, Q)
        xt0, xt1 = Xt.T
        X0 = np.vstack((X0, xt0))
        X1 = np.vstack((X1, xt1))
        C = np.vstack((C, Ct))
        AC[i_trial] = np.mean(Ct)

    performance['n_it'].append(n_iter)
    performance['mu'].append(np.mean(AC))
    performance['std'].append(np.std(AC))

    ax[0].plot(time, np.std(X0, axis=0), label='N = {}'.format(n_iter))
    ax[1].plot(time, np.std(X1, axis=0), label='N = {}'.format(n_iter))
    ax[2].plot(time, np.mean(C, axis=0), label='N = {}'.format(n_iter))

labels = {'y': ['st.dev.', 'st.dev.', 'avg cost'],
          'x': ['time' for i in range(3)],
          'title': ['position error', 'velocity error', 'average cost']}
for i in range(3):
    ax[i].set_xlabel(labels['x'][i])
    ax[i].set_ylabel(labels['y'][i])
    ax[i].set_title(labels['title'][i])
    ax[i].legend()
plt.tight_layout()

print(performance) #TODO: Include performance graph