In [1]:
import numpy as np
from scipy.linalg import *
from drl.env import TwoLinkArm
import time

%matplotlib notebook

In [16]:
epsilon = 1e-5
Ts = 10
env = TwoLinkArm(g=0.)

Q = np.eye(env.state_dim)*100.
R = np.eye(env.action_dim)*0.1


## Finite difference derivatives

In [3]:
def calc_derivatives(x, u):
    A = np.zeros((env.state_dim, env.state_dim))
    for i in range(env.state_dim):
        x_tmp = x.copy()
        x_tmp[i] += epsilon
        f_1 = env.simulate_system(x_tmp, u)
        x_tmp = x.copy()
        x_tmp[i] -= epsilon
        f_2 = env.simulate_system(x_tmp, u)
        fxdx = (f_1 - f_2) / (2*epsilon)
        A[:, i] = fxdx
        
    B = np.zeros((env.state_dim, env.action_dim))
    for i in range(env.action_dim):
        u_tmp = u.copy()
        u_tmp[i] += epsilon
        f1 = env.simulate_system(x, u_tmp)
        u_tmp = u.copy()
        u_tmp[i] -= epsilon
        f2 = env.simulate_system(x, u_tmp)
        fxdu = (f1 - f2) / (2*epsilon)
        B[:, i] = fxdu
        
    return A, B

In [4]:
def run_experiment():
    x = env.reset()
    env.render()
    u = [0.]*env.action_dim

    for _ in range(int(Ts/env.dt)):
        # Calculate optimal feedback gain K
        error = x - np.pad(env.goal, (0, 2), 'constant')
        A, B = calc_derivatives(error, u)

        P = solve_continuous_are(A, B, Q, R)
        K = np.dot(np.linalg.pinv(R), np.dot(B.T, P))

        u = -np.dot(K, error)

        x, _, _, _ = env.step(u)
        env.render()
        
    return np.pad(env.goal, (0, 2), 'constant') - x

In [None]:
for i in range(5):
    error = run_experiment()
    print('Episode %s - Error: %s' % (str(i), error))

In [6]:
env.render(close=True)