In [1]:
import numpy as np
import matplotlib.pyplot as plt
np.set_printoptions(suppress=True, precision=2)
import scipy.optimize as optimize

# Define goal

Our goal will be denoted as $g$, for now we simply assume its defined in the full state space

In [2]:
g = np.array([[5], [0], [6], [0], [7], [0]])

# Collect Initial Data

Let's pretend our data comes from pulling a 2-link object to the right for 6 time steps

In [3]:
training_data = []
for _ in range(100):
    y = 0 #np.random.randint(-10,10)
    i = np.random.randint(-5,5)
    training_data.append((np.array([[i],[y],[i+1],[y],[i+2],[y]]),
         np.array([[1],[0]]),
         np.array([[i+1],[y],[i+2],[y],[i+3],[y]]),
         np.array([[g[0] - i]]),
         np.array([[g[0] - i+1]])))

# Optimize Simple Models

We assume linear forms for all our functions:
$
\begin{align}
f(s_t) &= As_t \\
f(s_t) &= Bo_t + Cu_t \\
\hat{c}(s_t) &= D(Ag - As_t) = D(Ag - o_t)\\
\end{align}
$

In [127]:
def params_to_matrices(params):
    a1 = params[0]
    a2 = params[1]
    a3 = params[2]
    a4 = params[3]
    a5 = params[4]
    a6 = params[5]
    b = params[6]
    c1 = params[7]
    c2 = params[8]
    d = params[9]

    A = np.array([[a1, a2, a3, a4, a5, a6]])
    B = np.array([[b]])
    C = np.array([[c1, c2]])
    D = np.array([[d]])
    return A, B, C, D

def latent_prediction_objective(params, data, alpha=0.5, regularization=0):
    """ return: MSE over all training examples """
    A, B, C, D = params_to_matrices(params)

    err = np.zeros(len(data))
    for i, (s, u, s_, c, c_) in enumerate(data):
        o = A@s
        o_ = A@s_
        o_g = A@g
        err[i] = alpha*np.linalg.norm(o + (B@o + C@u) - o_) + (1-alpha)*abs(D@(o_g-o) - c) + regularization * np.linalg.norm(A)

    obj = (err**2).mean()
    return obj

In [128]:
def train(training_data, objective_func, **kwargs):
    """
    param training_data: a list of tuples ([6x1], [2x1], [6x1], [1x1])
    """
        
    def __objective(params):
        return objective_func(params, training_data, **kwargs)
    
    initial_params = np.random.randn(10)
    for i in range(10):
        result = optimize.minimize(__objective, initial_params, method='Powell')
        if result.success:
            break
    
    if not result.success:
        print("Status: {:d}, Message: {:s}".format(result.status, result.message))
        return None
    print('Finished in {:d} iterations'.format(result.nit))
    return result.x

# Test Objective Function

To double check my objective function, let's make sure that my hand-designed model reductions give zero in the objective function:

In [129]:
my_params = np.array([1, 0, 0, 0, 0, 0, 0, 1, 0, 1])
print("loss for hand-designed good params:", latent_prediction_objective(my_params, data=training_data, alpha=0.5))

my_params = np.array([0, 1, 0, 0, 0, 0, 0, 1, 0, 1])
print("loss for hand-designed bad params:", latent_prediction_objective(my_params, data=training_data, alpha=0.5))

loss for hand-designed good params: 0.0
loss for hand-designed bad params: 18.7525


In [130]:
params = train(training_data, latent_prediction_objective, alpha=0.5)

Finished in 7 iterations


In [131]:
print("Objective Cost/Loss:", latent_prediction_objective(params, training_data, alpha=0.5))
A, B, C, D = params_to_matrices(params)
print("Model reduction Matrix:", A)
print("Dynamics Matrix:", B, ',', C)
print("Cost Matrix:", D)

Objective Cost/Loss: 1.030034520278715e-22
Model reduction Matrix: [[ 0.05  1.01 -2.26 -0.26 -0.41 -0.75]]
Dynamics Matrix: [[0.]] , [[-2.63 18.44]]
Cost Matrix: [[-0.38]]


# We can now perfectly predict future latent state and cost

Consider what happens if we follow a plan of pulling right from the origin
What is the predicted cost of the plan $[[1,0], \dots]$ ? In the real world, the sum of cost at each state is $5+4+3+2+1=15$

In [132]:
actions = [np.array([[1],[0]]), np.array([[1],[0]]),np.array([[1],[0]]),np.array([[1],[0]]),np.array([[1],[0]])]
s0 = np.array([[0], [0], [1], [0], [2], [0]])
o = A@s0
predicted_total_cost = 0
for i, u in enumerate(actions):
    o_ = o + B@o + C@u
    c_hat = D@(A@g - o)
    predicted_total_cost += c_hat
    o = o_
print(predicted_total_cost)

[[15.]]


Ok great! **We can perfectly predict the future latent state and cost!**

# What if we only consider cost, and predicting cost?

In [133]:
def one_step_cost_prediction_objective(params, data, alpha=0.5, regularization=0):
    """ return: MSE over all training examples """
    A, B, C, D = params_to_matrices(params)
    
    def _cost(_o):
            return D@(o_g - _o)

    err = np.zeros(len(data))
    for i, (s, u, s_, c, c_) in enumerate(data):
        o = A@s
        o_ = A@s_
        o_g = A@g
        predicted_o_  = o + B@o + C@u
        one_step_predicted_cost_accuracy = np.linalg.norm(_cost(predicted_o_) - _cost(o_))
        current_cost_accuracy = abs(_cost(o) - c)
        err[i] = (1-alpha)*current_cost_accuracy + alpha * one_step_predicted_cost_accuracy + regularization * np.linalg.norm(A)
        
    obj = (err**2).mean()
    return obj

In [134]:
my_params = np.array([1, 0, 0, 0, 0, 0, 0, 1, 0, 1])
print("loss for hand-designed good params:",
      one_step_cost_prediction_objective(my_params, data=training_data, alpha=0.5))

my_params = np.array([0, 1, 0, 0, 0, 0, 0, 1, 0, 1])
print("loss for hand-designed bad params:",
      one_step_cost_prediction_objective(my_params, data=training_data, alpha=0.5))

loss for hand-designed good params: 0.0
loss for hand-designed bad params: 18.7525


In [135]:
params = train(training_data, one_step_cost_prediction_objective)

Finished in 7 iterations


In [136]:
print("Prediction Objective:", latent_prediction_objective(params, training_data, alpha=0.5))
print("Cost-Only Objective:", one_step_cost_prediction_objective(params, training_data, alpha=0.5))
A, B, C, D = params_to_matrices(params)
print("Model reduction Matrix:", A)
print("Dynamics Matrix:", B, ',', C)
print("Cost Matrix:", D)

Prediction Objective: 9.841737428322009e-23
Cost-Only Objective: 9.825558328280835e-23
Model reduction Matrix: [[-0.54 -0.14 -0.06  0.01  1.61  0.14]]
Dynamics Matrix: [[-0.]] , [[ 1.01 21.36]]
Cost Matrix: [[0.99]]


Unsurprisingly, we are also able to fit our data in this case. Much more interestingly, **We also get latent state prediction accuracy for free, just by predicting in cost!**

# What if the data is more diverse?

What if we include various y values?

In [137]:
training_data = []
for _ in range(100):
    y = np.random.randint(-10,10)
    i = np.random.randint(-5,5)
    training_data.append((np.array([[i],[y],[i+1],[y],[i+2],[y]]),
         np.array([[1],[0]]),
         np.array([[i+1],[y],[i+2],[y],[i+3],[y]]),
         np.array([[g[0] - i]]),
         np.array([[g[0] - i+1]])))

In [138]:
my_params = np.array([1, 0, 0, 0, 0, 0, 0, 1, 0, 1])
print("loss for hand-designed good params:", latent_prediction_objective(my_params, data=training_data, alpha=0.5))

my_params = np.array([0, 1, 0, 0, 0, 0, 0, 1, 0, 1])
print("loss for hand-designed bad params:", latent_prediction_objective(my_params, data=training_data, alpha=0.5))

loss for hand-designed good params: 0.0
loss for hand-designed bad params: 16.54


In [139]:
params = train(training_data, latent_prediction_objective)

Finished in 6 iterations


In [140]:
print("Objective Cost/Loss:", latent_prediction_objective(params, training_data, alpha=0.5))
A, B, C, D = params_to_matrices(params)
print("Model reduction Matrix:", A)
print("Dynamics Matrix:", B, ',', C)
print("Cost Matrix:", D)

Objective Cost/Loss: 2.3064113713282737e-22
Model reduction Matrix: [[-4.   -1.9   3.06 -0.55 -0.13  2.45]]
Dynamics Matrix: [[0.]] , [[-1.07 17.74]]
Cost Matrix: [[-0.93]]


Yup, still works. Although this data is not realistic to the motion of the links. This just shows that if we add a distractor variable which does not help explain the cost, we can still "ignore it". Specifically, the B matrix (just a real number here) is still 0 and the y components of the model reduction can be arbitrary values.

In [141]:
# Shows that the parameters in the y components don't matter
test_params = np.array([[-1.81, -100, .24, 100, .26, 0]])
print(latent_prediction_objective(params, training_data))
test_params = np.array([[-1.81, 0, .24, -100, .26, 1000]])
latent_prediction_objective(params, training_data)


2.3064113713282737e-22


2.3064113713282737e-22

Since they don't matter, do we want to consider enforcing that the norm of the model reduction vector is minimized?

# Add Regularization to the Model Reduction Matrix

In [148]:
params = train(training_data, latent_prediction_objective, regularization=2)
print("Objective Cost/Loss:", latent_prediction_objective(params, training_data, alpha=0.5))
A, B, C, D = params_to_matrices(params)
print("Model reduction Matrix:", A)
print("Dynamics Matrix:", B, ',', C)
print("Cost Matrix:", D)

Finished in 4 iterations
Objective Cost/Loss: 3.956812237944807e-17
Model reduction Matrix: [[-0.46 -0.7  -0.09  0.59  1.07  0.12]]
Dynamics Matrix: [[0.]] , [[ 0.52 14.24]]
Cost Matrix: [[1.91]]


This doesn't work like I expected... I don't think this is a great idea.