In [2]:
import numpy as np
import gym


def funcval_decorator(func, terrain):
    def func_val(design_var, true_val=False):
        return func(design_var, terrain)
    return func_val


class constrained_LunarLanderBenchmark():
    def __init__(self, m_terrains, minimum_reward):
        self.m_terrains = m_terrains
        self.env = gym.make('LunarLander-v2')
        self.minimum_reward = minimum_reward
    
    def f(self,design_var, true_val=False ):
        
        simulations = self.simulator(design_var)
        mean_response = np.mean(simulations, axis=1)
        return mean_response
        
    def c(self, design_var, true_val=True):

        simulations = self.simulator(design_var)
        min_response = simulations#np.min(simulations, axis=1)
        return min_response
    
    def simulator(self, design_var, true_val=False):

        design_var = np.atleast_2d(design_var)
        self.last_evaluated_design_var = design_var

        m = self.m_terrains

        terrains = range(m)
        averaged_cum_reward_list = []
        cum_reward_matrix = []
        # iterate over all designs.
        for xvar in design_var:

            # for each design, show m different terrains
            cum_reward_list = []
            for i_episode in range(m):
            
                self.env.seed(seed=terrains[i_episode])  # terrain landscape control
                observation = self.env.reset()
                cum_reward = 0
                counter = 0
                done=False
#                 print("######################NEW EPISODE ##################################")
                while not done:#(counter < 2000) and (not self.env.game_over) and (self.env.lander.awake):
                    self.env.render()
                    counter+=1
                    action = self.heuristic_Controller(observation, xvar)  # controller
                    observation, reward, done, info = self.env.step(action)  # obervation from environment
                    cum_reward += reward
                    
#                     print("reward:",reward," cum_reward:",cum_reward)
                    
#                     if reward==-100:
#                         break
                cum_reward_list.append(cum_reward)

                self.env.close()

            cum_reward_matrix.append(cum_reward_list)
            averaged_cum_reward = cum_reward_list
            averaged_cum_reward_list.append(averaged_cum_reward)

        return np.array(averaged_cum_reward_list)  


    
    def heuristic_Controller(self, s, w):
        angle_targ = s[0] * w[0] + s[2] * w[1]
        if angle_targ > w[2]:
            angle_targ = w[2]
        if angle_targ < -w[2]:
            angle_targ = -w[2]
        hover_targ = w[3] * np.abs(s[0])

        angle_todo = (angle_targ - s[4]) * w[4] - (s[5]) * w[5]
        hover_todo = (hover_targ - s[1]) * w[6] - (s[3]) * w[7]

        if s[6] or s[7]:
            angle_todo = w[8]
            hover_todo = -(s[3]) * w[9]

        a = 0
        if hover_todo > np.abs(angle_todo) and hover_todo > w[10]:
            a = 2
        elif angle_todo < -w[11]:
            a = 3
        elif angle_todo > +w[11]:
            a = 1
        return a
    
    #0:0.5, 1: 1.0, 2: 0.4, 3:0.55, 4:0.5,5:1.0,6:0.5,7:0.5, 8:0, 9:0.5, 10:0.05, 11:0.05
#     def heuristic_Controller(self, s, w):
#         angle_targ = s[0] * 0.5 + s[2] * 1.0  # angle should point towards center
#         if angle_targ > 0.4:
#             angle_targ = 0.4  # more than 0.4 radians (22 degrees) is bad
#         if angle_targ < -0.4:
#             angle_targ = -0.4
#         hover_targ = 0.55 * np.abs(s[0])  # target y should be proportional to horizontal offset

#         angle_todo = (angle_targ - s[4]) * 0.5 - (s[5]) * 1.0
#         hover_todo = (hover_targ - s[1]) * 0.5 - (s[3]) * 0.5

#         if s[6] or s[7]:  # legs have contact
#             angle_todo = 0
#             hover_todo = (
#                 -(s[3]) * 0.5
#             )  # override to reduce fall speed, that's all we need after contact

#         a = 0
#         if hover_todo > np.abs(angle_todo) and hover_todo > 0.05:
#             a = 2
#         elif angle_todo < -0.05:
#             a = 3
#         elif angle_todo > +0.05:
#             a = 1
#         return a

In [3]:
m_terrains = 30
lunarlander_class = constrained_LunarLanderBenchmark(m_terrains=m_terrains, minimum_reward= 200)
# weight = np.array([[0.5, 1.0, 0.4, 0.55, 0.5,1.0,0.5,0.5, 0, 0.5, 0.05, 0.05]])
weight = np.array([[0.,1.60442364, 1.17352496, 2., 2., 2., 0.,1.45697319, 0., 1.82126096, 0.18713509, 0.]])
lunarlander_class.f(weight)

KeyboardInterrupt: 

In [None]:
np.min(lunarlander_class.c(weight))

In [None]:
-(4.8421*50 -200)/50


In [None]:
import matplotlib.pyplot as plt
reward_slice = np.linspace(0,2,50)
basemx_original = np.ones((50,12))*weight
basemx_modf = basemx_original

for i in range(1):
    basemx_modf[:,i] = reward_slice

    reward = lunarlander_class.f(basemx_modf)
    constraint = lunarlander_class.c_min(basemx_modf)
    
    plt.title("dim_"+str(i))
    plt.plot(reward_slice, reward)
    plt.show()
    
    plt.title("dim_"+str(i))
    plt.plot(reward_slice, constraint)
    plt.show()

    basemx_modf = basemx_original

In [None]:
import matplotlib.pyplot as plt
reward_slice = np.linspace(0,2,20)
basemx_original = np.ones((20,12))*weight
basemx_modf = basemx_original

for t in range(10):
    for i in range(1):
        basemx_modf[:,i] = reward_slice

        reward = lunarlander_class.f(basemx_modf)

        plt.title("dim_"+str(i)+"_terrain_"+str(t))
        plt.plot(reward_slice, reward)
        plt.show()

        basemx_modf = basemx_original

In [None]:
def _update_model( model, model_c, X, Y, C):
    """
    Updates the model (when more than one observation is available) and saves the parameters (if available).
    """

    ### --- input that goes into the model (is unziped in case there are categorical variables)
    X_inmodel = space.unzip_inputs(X)
    Y_inmodel = list(Y)
    C_inmodel = list(C)


    model.updateModel(X_inmodel, Y_inmodel)
    model_c.updateModel(X_inmodel, C_inmodel)
    return model, model_c

In [None]:
from Real_Experiments.LunarLander.real_functions_caller import constrained_LunarLanderBenchmark
import GPy as GPy
from multi_objective import MultiObjective
from multi_outputGP import multi_outputGP
import GPyOpt
from pyDOE import *

m_terrains = 1

lunarlander_class = constrained_LunarLanderBenchmark(m_terrains=m_terrains, minimum_reward= 200)
lunar_lander_constraints = lunarlander_class.c_builder()
# --- Attributes
#repeat same objective function to solve a 1 objective problem
f = MultiObjective([lunarlander_class.f])
c = MultiObjective(lunar_lander_constraints)

# --- Attributes
#repeat same objective function to solve a 1 objective problem
input_size = 12
# --- Space
#define space of variables
space =  GPyOpt.Design_space(space =[{'name': 'var', 'type': 'continuous', 'domain': (0.0,2)}]*input_size)#GPyOpt.Design_space(space =[{'name': 'var_1', 'type': 'continuous', 'domain': (0,100)}])#
n_f = 1
n_c = m_terrains
model_f = multi_outputGP(output_dim = n_f,   noise_var=[1e-06]*n_f, exact_feval=[True]*n_f)
model_c = multi_outputGP(output_dim = n_c,  noise_var=[1e-06]*n_c, exact_feval=[True]*n_c)

n_samples = 50
X = lhs(12,n_samples)

Y, cost_values = f.evaluate(X)
C, cost_values = c.evaluate(X)

varY = np.std(Y)

Ytransf = Y/varY
model_f, model_c = _update_model(model_f, model_c, X, Ytransf, C)

In [None]:
model_f.get_model_parameters()

In [None]:
varY

In [None]:
np.max(Y)

In [None]:
import matplotlib.pyplot as plt

n_samples = 100
X_test = lhs(12,n_samples)

Y_test, cost_values = f.evaluate(X_test)
C_test, cost_values = c.evaluate(X_test)

muY  = model_f.predict(X_test)[0]
muC  = model_c.predict(X_test)[0]

Y_testtransf = Y_test/varY

plt.title("objective")
plt.scatter(Y_testtransf, muY)
plt.plot(np.linspace(np.min([np.min(Y_testtransf), np.min(muY)]), np.max([np.max(Y_testtransf), np.max(muY)]), 2), 
        np.linspace(np.min([np.min(Y_testtransf), np.min(muY)]), np.max([np.max(Y_testtransf), np.max(muY)]), 2), color="black")
plt.xlabel("test vals")
plt.ylabel("predict vals")
plt.show()

In [None]:
np.max(Y_test)