In [None]:
%load_ext autoreload
%autoreload 2
import numpy as np
import gym
from main.alpaca import *
from main.dataset import *
from main.dataViz import *
import main.train_policy
from baselines.common import tf_util as U
from baselines import logger
import yaml

In [None]:
cfg_filename = 'configs/hopper-config.yml'
with open(cfg_filename,'r') as ymlfile:
    config = yaml.load(ymlfile)

In [None]:
def randomize_hopper(env):
    ts = config['torso_min'] + np.random.rand()*(config['torso_max'] - config['torso_min'])
    f = config['friction_min'] + np.random.rand()*(config['friction_max'] - config['friction_min'])
    
    env.friction = f
    env.torso_size = ts
    env.apply_env_modifications()

In [None]:
env_id = 'Hopper-v2'
env = Randomizer(gym.make(env_id),randomize_hopper)

Train policy if it hasn't been trained already:

In [None]:
logger.configure()
pi = main.train_policy.train(env_id, num_timesteps=1000000, seed=1124, model_path='policies/hopper')

Or just load it:

In [None]:
logger.configure()
pi = train_policy.train(env_id, num_timesteps=1, seed=1124)
U.load_state('policies/hopper')

In [None]:
def train_hopper_pol(ob):
    return pi.act(stochastic=True, ob=ob)[0]
def test_hopper_pol(ob):
    return pi.act(stochastic=False, ob=ob)[0]

In [None]:
N_train = 30000
N_test = 500

DG = DataGenerator(config,env,'Hopper-v2')
Y,X = DG.sample_trajectories(train_hopper_pol,400,N_train,return_lists=False)
Y_test,X_test = DG.sample_trajectories(test_hopper_pol,400,N_test,return_lists=False)
with open('hopper_data_long.npz', 'wb') as f:
    np.savez(f, Y=Y, X=X, Y_test=Y_test, X_test=X_test)

In [None]:
with open('hopper_data.npz', 'rb') as f:
    data = np.load(f)

    X = data['X']
    Y = data['Y']
    Y_test = data['Y_test']
    X_test = data['X_test']

In [None]:
dataset = GymDataset(env,train_hopper_pol)
test_dataset = GymDataset(env,test_hopper_pol)

N_test = 500
N_train = 30000
test_horz = 100

X_train, Y_train = dataset.sample(N_train, test_horz, verbose=True)
X_test, Y_test = test_dataset.sample(N_test, test_horz, verbose=True)

In [None]:
train_dataset = PresampledDataset(X_train, Y_train)

## DynamicsAgent

In [None]:
g3 = tf.Graph()
sess3 = tf.Session(config=tf.ConfigProto(log_device_placement=True), graph=g3)
dyn_model = AdaptiveDynamics(config,sess3,g3)
dyn_model.construct_model()

In [None]:
dyn_model.train(train_dataset, 5000)

In [None]:
N_test = 500
test_horz = 100
X_test, Y_test = dataset.sample(N_test, test_horz)

In [None]:
ind =7
import matplotlib.pyplot as plt
plt.figure(figsize=(5,24))
T = X_test[ind,:,:].shape[0]
for i in range(config['x_dim']):
    plt.subplot(config['x_dim'],1,i+1)
    plt.plot(np.arange(T), X_test[ind,:,i])
    plt.ylabel(r"$x_"+str(i+1)+"(t)$")
plt.show()

In [None]:
ind = 4
T = 75
N_samples = 50
t_init = 0

fig = plt.figure(figsize=(9,7))
for i,Nu in enumerate( [1, 28, 56] ):
    test_adaptive_dynamics(dyn_model, X_test[ind:ind+1,t_init:t_init+T,:], Y_test[ind:ind+1,t_init:t_init+T,:], N_samples, Nu, T_rollout=15)

plt.tight_layout(w_pad=0.2,h_pad=0.2)
plt.savefig('figures/hopper_rollouts_2.pdf')
plt.show()