In [16]:
import numpy as np
import matplotlib.pyplot as plt
np.set_printoptions(suppress=True, precision=2)
import scipy.optimize as optimize

# Define goal

Our goal will be denoted as $g$, for now we simply assume its defined in the full state space

In [17]:
g = np.array([5, 0, 6, 0, 7, 0])

# Collect Initial Data

Let's pretend our data comes from pulling a 2-link object to the right for 6 time steps

In [106]:
training_data = [
        (np.array([[i],[0],[i+1],[0],[i+2],[0]]),
         np.array([[1],[0]]),
         np.array([[i+1],[0],[i+2],[0],[i+3],[0]]),
         np.array([[g[0] - i]])) for i in range(6)]

# Optimize Linear Models

We assume linear forms for all our functions:
$
\begin{align}
f(s_t) &= As_t \\
f(s_t) &= Bo + Cu \\
\hat{c}(s_t) &= Ds_t \\
\end{align}
$

In [129]:
def params_to_matrices(params):
    a1 = params[0]
    a2 = params[1]
    a3 = params[2]
    a4 = params[3]
    a5 = params[4]
    a6 = params[5]
    b = params[6]
    c1 = params[7]
    c2 = params[8]
    d = params[9]

    A = np.array([[a1, a2, a3, a4, a5, a6]])
    B = np.array([[b]])
    C = np.array([[c1, c2]])
    D = np.array([[d]])
    return A, B, C, D

def train(training_data):
    """
    param training_data: a list of tuples ([6x1], [2x1], [6x1], [1x1])
    """
        
    def objective(params):
        """ return: MSE over all training examples """
        A, B, C, D = params_to_matrices(params)
        
        err = np.zeros(len(training_data))
        for i, (s, u, s_, c) in enumerate(training_data):
            err[i] = np.linalg.norm(A@s + (B@A@s + C@u) - A@s_) + 10*abs(D@A@s - c)
        
        obj = (err**2).mean()
        return obj
    
    for i in range(10):
        result = optimize.minimize(objective, np.random.randn(10), method='Nelder-Mead')
        if result.success:
            break
    
    if not result.success:
        print("Status: {:d}, Message: {:s}".format(result.status, result.message))
        return None
    
    return result.x

In [130]:
params = train(training_data)
A, B, C, D = params_to_matrices(params)
print(A)
print(B)
print(C)
print(D)

[[ 2.11  0.14  0.13  0.02 -1.61  0.07]]
[[-0.32]]
[[-0.86 -1.39]]
[[-1.61]]


# Check if the matrices we solved for successfully minimize the error

What is the predicted cost of the plan $[[1,0], \dots]$

In [131]:
actions = [np.array([[1],[0]]), np.array([[1],[0]]),np.array([[1],[0]]),np.array([[1],[0]]),np.array([[1],[0]])]
s0 = np.array([[0], [0], [1], [0], [2], [0]])
o = A@s0
for i, u in enumerate(actions):
    o_ += B@o + C@u
    c_hat = D@o
    print(o, c_hat)
    o = o_

[[-3.1]] [[5.]]
[[-205.04]] [[330.8]]
[[-140.23]] [[226.24]]
[[-96.18]] [[155.17]]
[[-66.24]] [[106.86]]


In [132]:
for i, (s, u, s_, c) in enumerate(training_data):
    print("================")
    print(s.T)
    print(u.T)
    print(s_.T)
    print(c)
    print(A@s)
    print(A@s + B@o + C@u)
    print(A@s_)
    print(D@A@s)

[[0 0 1 0 2 0]]
[[1 0]]
[[1 0 2 0 3 0]]
[[5]]
[[-3.1]]
[[17.25]]
[[-2.48]]
[[5.]]
[[1 0 2 0 3 0]]
[[1 0]]
[[2 0 3 0 4 0]]
[[4]]
[[-2.48]]
[[17.87]]
[[-1.86]]
[[4.]]
[[2 0 3 0 4 0]]
[[1 0]]
[[3 0 4 0 5 0]]
[[3]]
[[-1.86]]
[[18.49]]
[[-1.24]]
[[3.]]
[[3 0 4 0 5 0]]
[[1 0]]
[[4 0 5 0 6 0]]
[[2]]
[[-1.24]]
[[19.11]]
[[-0.62]]
[[2.]]
[[4 0 5 0 6 0]]
[[1 0]]
[[5 0 6 0 7 0]]
[[1]]
[[-0.62]]
[[19.73]]
[[-0.]]
[[1.]]
[[5 0 6 0 7 0]]
[[1 0]]
[[6 0 7 0 8 0]]
[[0]]
[[-0.]]
[[20.35]]
[[0.62]]
[[0.]]
