In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys
sys.path.append("..")
sys.path.append(".")
import copy
import torch
import pickle
import numpy as np
from datetime import datetime
import plotly
import plotly.graph_objs as go
plotly.offline.init_notebook_mode(connected=True)

import robust_value_approx.train_value as train_value
import robust_value_approx.utils as utils
import plotting_utils

# Double Integrator Example

In [None]:
import double_integrator_utils

vf = double_integrator_utils.get_value_function(N=5)

x0_lo = -1 * torch.ones(vf.sys.x_dim, dtype=vf.dtype)
x0_up = 1 * torch.ones(vf.sys.x_dim, dtype=vf.dtype)

# validation options
num_breaks_validation = [100] * vf.sys.x_dim

# file options
sys_name = 'double_int'
x_samples_file = '../data/learn_value_function_' + sys_name + '_x'
v_samples_file = '../data/learn_value_function_' + sys_name + '_v'
model_file = '../data/' + sys_name

# neural network options
nn_width = 16
nn_depth = 1

# setting up adversarial training options
train_opt = train_value.AdversarialWithBaselineTrainingOptions()
train_opt.num_iter_desired = 1000
train_opt.num_steps_between_sampling = 200
train_opt.init_buffer_size = 100
train_opt.init_num_train_steps = 1000
train_opt.num_rand_extra = 0
train_opt.num_x_adv_opt = 20
train_opt.x_adv_max_iter = 3
train_opt.x_adv_conv_tol = 1e-5
train_opt.x_adv_lr = .2
train_opt.batch_size = 60
train_opt.max_buffer_size = 100000

num_training_run = 2

# Vertical Ball Paddle Example

In [None]:
import ball_paddle_utils

vf = ball_paddle_utils.get_value_function_vertical(N=5)

x0_lo = torch.Tensor([1.5, .15, -5., -1.]).type(vf.dtype)
x0_up = torch.Tensor([2., .15, 1., 5.]).type(vf.dtype)

# validation options
num_breaks_validation = [20, 1, 20, 20]

# data file options
sys_name = 'ball_paddle_vertical'
x_samples_file = '../data/learn_value_function_' + sys_name + '_x'
v_samples_file = '../data/learn_value_function_' + sys_name + '_v'
model_file = '../data/' + sys_name

# neural network options
nn_width = 32
nn_depth = 1

# setting up adversarial training options
train_opt = train_value.AdversarialWithBaselineTrainingOptions()
train_opt.num_iter_desired = 4000
train_opt.num_steps_between_sampling = 100
train_opt.init_buffer_size = 500
train_opt.init_num_train_steps = 1000
train_opt.num_rand_extra = 0
train_opt.num_x_adv_opt = 20
train_opt.x_adv_max_iter = 2
train_opt.x_adv_conv_tol = 1e-5
train_opt.x_adv_lr = .2
train_opt.batch_size = 150
train_opt.max_buffer_size = 100000

num_training_run = 1

In [None]:
# checking the spread of trajectories for sanity check
fig = plotting_utils.rollout_range(vf, x0_lo, x0_up, [0, 1], ["ball", "paddle"], n=50)
fig.show()

# SLIP Goal Example

In [None]:
import slip_utils

vf, slip = slip_utils.get_value_function(torch.Tensor([6., 1.25, 0.]), N=3)

In [None]:
x0_lo = torch.Tensor([0, .95, 4.]).type(vf.dtype)
x0_up = torch.Tensor([0, 1.25, 9.]).type(vf.dtype)

# validation options
num_breaks_validation = [1, 70, 70]

# data file options
sys_name = 'slip'
x_samples_file = '../data/learn_value_function_' + sys_name + '_x'
v_samples_file = '../data/learn_value_function_' + sys_name + '_v'
model_file = '../data/' + sys_name

# neural network options
nn_width = 64
nn_depth = 1

# setting up adversarial training options
train_opt = train_value.AdversarialWithBaselineTrainingOptions()
train_opt.num_iter_desired = 10000
train_opt.num_steps_between_sampling = 500
train_opt.init_buffer_size = 1000
train_opt.init_num_train_steps = 2000
train_opt.num_rand_extra = 0
train_opt.num_x_adv_opt = 20
train_opt.x_adv_max_iter = 2
train_opt.x_adv_conv_tol = 1e-5
train_opt.x_adv_lr = .2
train_opt.batch_size = 150
train_opt.max_buffer_size = 100000

num_training_run = 1

In [None]:
# checking the spread of trajectories for sanity check
fig = plotting_utils.rollout_range(vf, x0_lo, x0_up, [0, 1], ["x", "y"], n=30)
fig.show()

# SLIP Gait Example

In [None]:
import slip_utils

vf, slip = slip_utils.get_value_function_gait(torch.Tensor([0., 1.2, 5]), N=3)

In [None]:
x0_lo = torch.Tensor([0, .95, 4.]).type(vf.dtype)
x0_up = torch.Tensor([0, 1.25, 9.]).type(vf.dtype)

# validation options
num_breaks_validation = [1, 70, 70]

# data file options
sys_name = 'slipgait'
x_samples_file = '../data/learn_value_function_' + sys_name + '_x'
v_samples_file = '../data/learn_value_function_' + sys_name + '_v'
model_file = '../data/' + sys_name

# neural network options
nn_width = 64
nn_depth = 1

# setting up adversarial training options
train_opt = train_value.AdversarialWithBaselineTrainingOptions()
train_opt.num_iter_desired = 3000
train_opt.num_steps_between_sampling = 500
train_opt.init_buffer_size = 500
train_opt.init_num_train_steps = 1000
train_opt.num_rand_extra = 0
train_opt.num_x_adv_opt = 20
train_opt.x_adv_max_iter = 3
train_opt.x_adv_conv_tol = 1e-5
train_opt.x_adv_lr = .2
train_opt.batch_size = 150
train_opt.max_buffer_size = 1000

num_training_run = 1

# Generate/Load Validation Data

In [None]:
x_samples_validation, v_samples_validation = vf.get_value_sample_grid(x0_lo, x0_up, num_breaks_validation, update_progress=True)

In [None]:
torch.save(x_samples_validation, x_samples_file + '_validation.pt')
torch.save(v_samples_validation, v_samples_file + '_validation.pt')

In [None]:
x_samples_validation = torch.load(x_samples_file + '_validation.pt')
v_samples_validation = torch.load(v_samples_file + '_validation.pt')

# Generate/Load Initial Data Prior

In [None]:
adv = train_value.AdversarialWithBaseline(vf, x0_lo, x0_up)
x_samples_init, v_samples_init = adv.get_random_samples(train_opt.init_buffer_size)

In [None]:
torch.save(x_samples_init, x_samples_file + '_init.pt')
torch.save(v_samples_init, v_samples_file + '_init.pt')

In [None]:
x_samples_init = torch.load(x_samples_file + '_init.pt')
v_samples_init = torch.load(v_samples_file + '_init.pt')

# Adversarial Training

In [None]:
training_runs = []
baseline_models = []
robust_models = []

In [None]:
num_fresh_runs = 5
for run_i in range(num_fresh_runs):
    adv = train_value.AdversarialWithBaseline(vf, x0_lo, x0_up,
                                              nn_width=nn_width, nn_depth=nn_depth,
                                              x_samples_validation=x_samples_validation,
                                              v_samples_validation=v_samples_validation,
                                              x_samples_init=x_samples_init[:train_opt.init_buffer_size,:],
                                              v_samples_init=v_samples_init[:train_opt.init_buffer_size,:])
    state_log = []
    for _ in range(num_training_run):
        adv.train(train_opt)
        state_log.append(adv.get_state())
    training_runs.append(state_log)
    baseline_models.append(copy.deepcopy(adv.baseline_model))
    robust_models.append(copy.deepcopy(adv.robust_model))
    utils.update_progress((run_i + 1) / num_fresh_runs)

In [None]:
# improvement on test set
# fig = plotting_utils.validation_delta([s_log[-1] for s_log in training_runs], window=50)
i = 0
fig = plotting_utils.validation_delta([s_log[-1] for s_log in training_runs[i:i+1]], window=1)
fig.show()

In [None]:
# plot training losses
fig = plotting_utils.training_loss(training_runs[0][-1], window=100)
fig.show()

In [None]:
# 2D plots of sample buffers
fig = plotting_utils.buffer_plot(training_runs[0], x0_lo, x0_up, 1, 2, cmax=1000)
fig.show()

In [None]:
# 3D plots of adversarial samples (only integrator for now)
fig = plotting_utils.bilevel_plot(training_runs[0][1], adv, 0, 1, show_buffer=False)
fig.show()

# Saving Results

In [None]:
now = datetime.now()
pickle.dump(training_runs, open("training_runs_" + sys_name + "_" + now.strftime("%m%d%Y%H%M%S") + ".p", "wb"))

In [None]:
now = datetime.now()
for model_i in range(len(baseline_models)):
    torch.save(baseline_models[model_i], model_file + '_baseline_model_' + str(model_i) + "_" + now.strftime("%m%d%Y%H%M%S") + '.pt')
    torch.save(robust_models[model_i], model_file + '_robust_model_' + str(model_i)+ "_" + now.strftime("%m%d%Y%H%M%S") + '.pt')

# Saving Specific Model for Control Test

In [None]:
model_i = 0
torch.save(baseline_models[model_i], model_file + '_baseline_model.pt')
torch.save(robust_models[model_i], model_file + '_robust_model.pt')

# Loading Old Results

In [None]:
training_runs = pickle.load(open("training_runs_slipgait_01302020161422.p", "rb"))

# Checking if projection is "overfitting" test set

In [None]:
import double_integrator_utils

vf = double_integrator_utils.get_value_function(N=5)

x0_lo = -1 * torch.ones(vf.sys.x_dim, dtype=vf.dtype)
x0_up = 1 * torch.ones(vf.sys.x_dim, dtype=vf.dtype)

# validation options
num_breaks_validation = [100] * vf.sys.x_dim

# file options
sys_name = 'double_int'
x_samples_file = '../data/learn_value_function_' + sys_name + '_x'
v_samples_file = '../data/learn_value_function_' + sys_name + '_v'
model_file = '../data/' + sys_name

# neural network options
nn_width = 16
nn_depth = 1

# setting up adversarial training options
train_opt = train_value.AdversarialWithBaselineTrainingOptions()
train_opt.num_iter_desired = 1000
train_opt.num_steps_between_sampling = 200
train_opt.init_buffer_size = 100
train_opt.init_num_train_steps = 1000
train_opt.num_rand_extra = 0
train_opt.num_x_adv_opt = 20
train_opt.x_adv_max_iter = 3
train_opt.x_adv_conv_tol = 1e-5
train_opt.x_adv_lr = .2
train_opt.batch_size = 60
train_opt.max_buffer_size = 100000

num_training_run = 2

In [None]:
# train_opt.num_rand_extra = 30
train_opt.x_adv_max_iter = 2
train_opt.baseline_use_limits = True

In [None]:
x_samples_validation, v_samples_validation = vf.get_value_sample_grid(x0_lo-.1*torch.abs(x0_lo), x0_up+.1*torch.abs(x0_up), num_breaks_validation, update_progress=True)

In [None]:
training_runs = []
baseline_models = []
robust_models = []

In [None]:
num_fresh_runs = 5
for run_i in range(num_fresh_runs):
    adv = train_value.AdversarialWithBaseline(vf, x0_lo, x0_up,
                                              nn_width=nn_width, nn_depth=nn_depth,
                                              x_samples_validation=x_samples_validation,
                                              v_samples_validation=v_samples_validation)
#                                               x_samples_init=x_samples_init[:train_opt.init_buffer_size,:],
#                                               v_samples_init=v_samples_init[:train_opt.init_buffer_size,:])
    state_log = []
    for _ in range(num_training_run):
        adv.train(train_opt)
        state_log.append(adv.get_state())
    training_runs.append(state_log)
    baseline_models.append(copy.deepcopy(adv.baseline_model))
    robust_models.append(copy.deepcopy(adv.robust_model))
    utils.update_progress((run_i + 1) / num_fresh_runs)

In [None]:
# improvement on test set
fig = plotting_utils.validation_delta_overunder([s_log[-1] for s_log in training_runs], window=50)
fig.show()

In [None]:
# 2D plots of sample buffers
fig = plotting_utils.buffer_plot(training_runs[1], x0_lo, x0_up, 0, 1, cmax=10)
fig.show()

In [None]:
now = datetime.now()
pickle.dump(training_runs, open("training_runs_overunder_" + sys_name + "_" + now.strftime("%m%d%Y%H%M%S") + ".p", "wb"))