Shunichi09
diff --git a/‎Environments.md‎
Lines changed: 26 additions & 4 deletions b/‎Environments.md‎
Lines changed: 26 additions & 4 deletions
diff --git a/‎PythonLinearNonlinearControl/configs/cartpole.py‎
Lines changed: 218 additions & 0 deletions b/‎PythonLinearNonlinearControl/configs/cartpole.py‎
Lines changed: 218 additions & 0 deletions
diff --git a/‎PythonLinearNonlinearControl/configs/make_configs.py‎
Lines changed: 4 additions & 1 deletion b/‎PythonLinearNonlinearControl/configs/make_configs.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎PythonLinearNonlinearControl/envs/cartpole.py‎
Lines changed: 39 additions & 12 deletions b/‎PythonLinearNonlinearControl/envs/cartpole.py‎
Lines changed: 39 additions & 12 deletions
diff --git a/‎PythonLinearNonlinearControl/envs/make_envs.py‎
Lines changed: 2 additions & 2 deletions b/‎PythonLinearNonlinearControl/envs/make_envs.py‎
Lines changed: 2 additions & 2 deletions
@@ -9,26 +9,48 @@
 
 ## FistOrderLagEnv
 
-System equations.
+### System equation.
 
 <img src="assets/firstorderlag.png" width="550">
 
 You can set arbinatry time constant, tau. The default is 0.63 s
 
+### Cost.
+
+<img src="assets/quadratic_score.png" width="200">
+
+Q = diag[1., 1., 1., 1.], 
+R = diag[1., 1.]
+
+X_g denote the goal states.
+
 ## TwoWheeledEnv
 
-System equations.
+### System equation.
 
 <img src="assets/twowheeled.png" width="300">
 
+### Cost.
+
+<img src="assets/quadratic_score.png" width="200">
+
+Q = diag[5., 5., 1.], 
+R = diag[0.1, 0.1]
+
+X_g denote the goal states.
+
 ## CatpoleEnv (Swing up)
 
-System equations.
+System equation.
 
 <img src="assets/cartpole.png" width="600">
 
 You can set arbinatry parameters, mc, mp, l and g. 
 
 Default settings are as follows:
 
-mc = 1, mp = 0.2, l = 0.5, g = 9.8
+mc = 1, mp = 0.2, l = 0.5, g = 9.81
+
+### Cost.
+
+<img src="assets/cartpole_score.png" width="300">
@@ -0,0 +1,218 @@
+import numpy as np
+
+class CartPoleConfigModule():
+    # parameters
+    ENV_NAME = "CartPole-v0"
+    TYPE = "Nonlinear"
+    TASK_HORIZON = 500
+    PRED_LEN = 50
+    STATE_SIZE = 4
+    INPUT_SIZE = 1
+    DT = 0.02
+    # cost parameters
+    R = np.diag([0.01])
+    # bounds
+    INPUT_LOWER_BOUND = np.array([-3.])
+    INPUT_UPPER_BOUND = np.array([3.])
+    # parameters
+    MP = 0.2
+    MC = 1.
+    L = 0.5
+    G = 9.81
+
+    def __init__(self):
+        """ 
+        """
+        # opt configs
+        self.opt_config = {
+            "Random": {
+                "popsize": 5000
+            },
+            "CEM": {
+                "popsize": 500,
+                "num_elites": 50,
+                "max_iters": 15,
+                "alpha": 0.3,
+                "init_var":9.,
+                "threshold":0.001
+            },
+            "MPPI":{
+                "beta" : 0.6,
+                "popsize": 5000,
+                "kappa": 0.9,
+                "noise_sigma": 0.5,
+            },
+            "MPPIWilliams":{
+                "popsize": 5000,
+                "lambda": 1.,
+                "noise_sigma": 0.9,
+            },
+           "iLQR":{
+                "max_iter": 500,
+                "init_mu": 1.,
+                "mu_min": 1e-6,
+                "mu_max": 1e10,
+                "init_delta": 2.,
+                "threshold": 1e-6,
+           },
+           "DDP":{
+                "max_iter": 500,
+                "init_mu": 1.,
+                "mu_min": 1e-6,
+                "mu_max": 1e10,
+                "init_delta": 2.,
+                "threshold": 1e-6,
+           },
+           "NMPC-CGMRES":{
+           },
+           "NMPC-Newton":{
+           },
+        } 
+
+    @staticmethod
+    def input_cost_fn(u):
+        """ input cost functions
+        Args:
+            u (numpy.ndarray): input, shape(pred_len, input_size)
+                or shape(pop_size, pred_len, input_size)
+        Returns:
+            cost (numpy.ndarray): cost of input, shape(pred_len, input_size) or
+                shape(pop_size, pred_len, input_size)
+        """
+        return (u**2) * np.diag(CartPoleConfigModule.R)
+    
+    @staticmethod
+    def state_cost_fn(x, g_x):
+        """ state cost function
+        Args:
+            x (numpy.ndarray): state, shape(pred_len, state_size)
+                or shape(pop_size, pred_len, state_size)
+            g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
+                or shape(pop_size, pred_len, state_size)
+        Returns:
+            cost (numpy.ndarray): cost of state, shape(pred_len, 1) or
+                shape(pop_size, pred_len, 1)
+        """
+
+        if len(x.shape) > 2:
+            return (6. * (x[:, :, 0]**2) \
+                   + 12. * ((np.cos(x[:, :, 2]) + 1.)**2) \
+                   + 0.1 * (x[:, :, 1]**2) \
+                   + 0.1 *  (x[:, :, 3]**2))[:, :, np.newaxis]
+
+        elif len(x.shape) > 1:
+            return (6. * (x[:, 0]**2) \
+                   + 12. * ((np.cos(x[:, 2]) + 1.)**2) \
+                   + 0.1 * (x[:, 1]**2) \
+                   + 0.1 * (x[:, 3]**2))[:,  np.newaxis]
+            
+        return 6. * (x[0]**2) \
+               + 12. * ((np.cos(x[2]) + 1.)**2) \
+               + 0.1 * (x[1]**2) \
+               + 0.1 * (x[3]**2)
+
+    @staticmethod
+    def terminal_state_cost_fn(terminal_x, terminal_g_x):
+        """
+        Args:
+            terminal_x (numpy.ndarray): terminal state,
+                shape(state_size, ) or shape(pop_size, state_size)
+            terminal_g_x (numpy.ndarray): terminal goal state,
+                shape(state_size, ) or shape(pop_size, state_size)
+        Returns:
+            cost (numpy.ndarray): cost of state, shape(pred_len, ) or
+                shape(pop_size, pred_len)
+        """
+
+        if len(terminal_x.shape) > 1:
+            return (6. * (terminal_x[:, 0]**2) \
+                   + 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2) \
+                   + 0.1 * (terminal_x[:, 1]**2) \
+                   + 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis]
+            
+        return 6. * (terminal_x[0]**2) \
+               + 12. * ((np.cos(terminal_x[2]) + 1.)**2) \
+               + 0.1 * (terminal_x[1]**2) \
+               + 0.1 * (terminal_x[3]**2)
+    
+    @staticmethod
+    def gradient_cost_fn_with_state(x, g_x, terminal=False):
+        """ gradient of costs with respect to the state
+
+        Args:
+            x (numpy.ndarray): state, shape(pred_len, state_size)
+            g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
+        
+        Returns:
+            l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size)
+                or shape(1, state_size)
+        """
+        if not terminal:
+            return None
+        
+        return None
+
+    @staticmethod
+    def gradient_cost_fn_with_input(x, u):
+        """ gradient of costs with respect to the input
+
+        Args:
+            x (numpy.ndarray): state, shape(pred_len, state_size)
+            u (numpy.ndarray): goal state, shape(pred_len, input_size)
+        
+        Returns:
+            l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size)
+        """
+        return None
+
+    @staticmethod
+    def hessian_cost_fn_with_state(x, g_x, terminal=False):
+        """ hessian costs with respect to the state
+
+        Args:
+            x (numpy.ndarray): state, shape(pred_len, state_size)
+            g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
+        
+        Returns:
+            l_xx (numpy.ndarray): gradient of cost,
+                shape(pred_len, state_size, state_size) or
+                shape(1, state_size, state_size) or
+        """
+        if not terminal:
+            (pred_len, _) = x.shape
+            return None              
+        
+        return None
+
+    @staticmethod
+    def hessian_cost_fn_with_input(x, u):
+        """ hessian costs with respect to the input
+
+        Args:
+            x (numpy.ndarray): state, shape(pred_len, state_size)
+            u (numpy.ndarray): goal state, shape(pred_len, input_size)
+        
+        Returns:
+            l_uu (numpy.ndarray): gradient of cost,
+                shape(pred_len, input_size, input_size)
+        """
+        (pred_len, _) = u.shape
+
+        return None
+    
+    @staticmethod
+    def hessian_cost_fn_with_input_state(x, u):
+        """ hessian costs with respect to the state and input
+
+        Args:
+            x (numpy.ndarray): state, shape(pred_len, state_size)
+            u (numpy.ndarray): goal state, shape(pred_len, input_size)
+        
+        Returns:
+            l_ux (numpy.ndarray): gradient of cost ,
+                shape(pred_len, input_size, state_size)
+        """
+        (_, state_size) = x.shape
+        (pred_len, input_size) = u.shape
+
+        return np.zeros((pred_len, input_size, state_size))
@@ -1,5 +1,6 @@
 from .first_order_lag import FirstOrderLagConfigModule
 from .two_wheeled import TwoWheeledConfigModule
+from .cartpole import CartPoleConfigModule
 
 def make_config(args):
     """
@@ -9,4 +10,6 @@ def make_config(args):
     if args.env == "FirstOrderLag":
         return FirstOrderLagConfigModule()
     elif args.env == "TwoWheeledConst" or args.env == "TwoWheeled":
-        return TwoWheeledConfigModule()
+        return TwoWheeledConfigModule()
+    elif args.env == "CartPole":
+        return CartPoleConfigModule()
@@ -14,12 +14,16 @@ class CartPoleEnv(Env):
     def __init__(self):
         """
         """
-        self.config = {"state_size" : 4,\
-                       "input_size" : 1,\
-                       "dt" : 0.02,\
-                       "max_step" : 1000,\
-                       "input_lower_bound": None,\
-                       "input_upper_bound": None,
+        self.config = {"state_size" : 4,
+                       "input_size" : 1,
+                       "dt" : 0.02,
+                       "max_step" : 500,
+                       "input_lower_bound": [-3.],
+                       "input_upper_bound": [3.],
+                       "mp": 0.2,
+                       "mc": 1.,
+                       "l": 0.5,
+                       "g": 9.81,
                        }
 
         super(CartPoleEnv, self).__init__(self.config)
@@ -33,13 +37,13 @@ def reset(self, init_x=None):
         """
         self.step_count = 0
 
-        self.curr_x = np.zeros(self.config["state_size"])
+        self.curr_x = np.array([0., 0., 0., 0.])
 
         if init_x is not None:
             self.curr_x = init_x
 
         # goal
-        self.g_x = np.array([0., 0., np.pi, 0.])
+        self.g_x = np.array([0., 0., -np.pi, 0.])
 
         # clear memory
         self.history_x = []
@@ -65,20 +69,43 @@ def step(self, u):
                         self.config["input_upper_bound"])
 
         # step
-        next_x = np.zeros(self.config["state_size"])
+        # x
+        d_x0 = self.curr_x[1]
+        # v_x
+        d_x1 = (u[0] + self.config["mp"] * np.sin(self.curr_x[2]) \
+               * (self.config["l"] * (self.curr_x[3]**2) \
+                  + self.config["g"] * np.cos(self.curr_x[2]))) \
+               / (self.config["mc"] + self.config["mp"] \
+                  * (np.sin(self.curr_x[2])**2))
+        # theta
+        d_x2 = self.curr_x[3]
+        
+        # v_theta
+        d_x3 = (-u[0] * np.cos(self.curr_x[2]) \
+                - self.config["mp"] * self.config["l"] * (self.curr_x[3]**2) \
+                  * np.cos(self.curr_x[2]) * np.sin(self.curr_x[2]) \
+                - (self.config["mc"] + self.config["mp"]) * self.config["g"] \
+                   * np.sin(self.curr_x[2])) \
+               / (self.config["l"] * (self.config["mc"] + self.config["mp"] \
+                                      * (np.sin(self.curr_x[2])**2)))
+        
+        next_x = self.curr_x +\
+                 np.array([d_x0, d_x1, d_x2, d_x3]) * self.config["dt"] 
 
         # TODO: costs
         costs = 0.
         costs += 0.1 * np.sum(u**2)
-        costs += np.sum((self.curr_x - self.g_x)**2)
-
+        costs += 6. * self.curr_x[0]**2 \
+                 + 12. * (np.cos(self.curr_x[2]) + 1.)**2 \
+                 + 0.1 * self.curr_x[1]**2 \
+                 + 0.1 * self.curr_x[3]**2
 
         # save history
         self.history_x.append(next_x.flatten())
         self.history_g_x.append(self.g_x.flatten())
 
         # update
-        self.curr_x = next_x.flatten()
+        self.curr_x = next_x.flatten().copy()
         # update costs
         self.step_count += 1
 
 
@@ -1,6 +1,6 @@
 from .first_order_lag import FirstOrderLagEnv
 from .two_wheeled import TwoWheeledConstEnv
-from .cartpole import CartpoleEnv
+from .cartpole import CartPoleEnv
 
 def make_env(args):
 
@@ -9,6 +9,6 @@ def make_env(args):
     elif args.env == "TwoWheeledConst":
         return TwoWheeledConstEnv()
     elif args.env == "CartPole":
-        return CartpoleEnv()
+        return CartPoleEnv()
 
     raise NotImplementedError("There is not {} Env".format(args.env))