Shunichi09 · Shunichi09 · Feb 13, 2021 · Feb 13, 2021 · Feb 13, 2021 · Feb 13, 2021
diff --git a/Environments.md b/Environments.md
@@ -6,6 +6,8 @@
 | Two wheeled System (Constant Goal) | x | ✓ | 3 | 2 | 
 | Two wheeled System (Moving Goal) (Coming soon) | x | ✓ | 3 | 2 | 
 | Cartpole (Swing up) | x | ✓ | 4 | 1 | 
+| Nonlinear Sample System Env | x | ✓ | 2 | 1 | 
+
 
 ## [FistOrderLagEnv](PythonLinearNonlinearControl/envs/first_order_lag.py)
 
@@ -53,4 +55,14 @@ mc = 1, mp = 0.2, l = 0.5, g = 9.81
 
 ### Cost.
 
-<img src="assets/cartpole_score.png" width="300">
+<img src="assets/cartpole_score.png" width="300">
+
+## [Nonlinear Sample System Env](PythonLinearNonlinearControl/envs/nonlinear_sample_system.py)
+
+## System equation.
+
+<img src="assets/nonlinear_sample_system.png" width="400">
+
+### Cost.
+
+<img src="assets/nonlinear_sample_system_score.png" width="400">
diff --git a/PythonLinearNonlinearControl/common/utils.py b/PythonLinearNonlinearControl/common/utils.py
@@ -1,8 +1,9 @@
 import numpy as np
 
+
 def rotate_pos(pos, angle):
     """ Transformation the coordinate in the angle
-    
+
     Args:
         pos (numpy.ndarray): local state, shape(data_size, 2) 
         angle (float): rotate angle, in radians
@@ -14,9 +15,10 @@ def rotate_pos(pos, angle):
 
     return np.dot(pos, rot_mat.T)
 
+
 def fit_angle_in_range(angles, min_angle=-np.pi, max_angle=np.pi):
     """ Check angle range and correct the range
-    
+
     Args:
         angle (numpy.ndarray): in radians
         min_angle (float): maximum of range in radians, default -pi
@@ -29,7 +31,7 @@ def fit_angle_in_range(angles, min_angle=-np.pi, max_angle=np.pi):
     if (max_angle - min_angle) < 2.0 * np.pi:
         raise ValueError("difference between max_angle \
                           and min_angle must be greater than 2.0 * pi")
-    
+
     output = np.array(angles)
     output_shape = output.shape
 
@@ -41,4 +43,76 @@ def fit_angle_in_range(angles, min_angle=-np.pi, max_angle=np.pi):
     output += min_angle
 
     output = np.minimum(max_angle, np.maximum(min_angle, output))
-    return output.reshape(output_shape)
+    return output.reshape(output_shape)
+
+
+def update_state_with_Runge_Kutta(state, u, functions, dt=0.01, batch=True):
+    """ update state in Runge Kutta methods
+    Args:
+        state (array-like): state of system
+        u (array-like): input of system
+        functions (list): update function of each state,
+            each function will be called like func(state, u)
+            We expect that this function returns differential of each state 
+        dt (float): float in seconds
+        batch (bool): state and u is given by batch or not
+
+    Returns:
+        next_state (np.array): next state of system
+
+    Notes:
+        sample of function is as follows:
+
+        def func_x(self, x_1, x_2, u):
+            x_dot = (1. - x_1**2 - x_2**2) * x_2 - x_1 + u
+            return x_dot
+
+        Note that the function return x_dot.
+    """
+    if not batch:
+        state_size = len(state)
+        assert state_size == len(functions), \
+            "Invalid functions length, You need to give the state size functions"
+
+        k0 = np.zeros(state_size)
+        k1 = np.zeros(state_size)
+        k2 = np.zeros(state_size)
+        k3 = np.zeros(state_size)
+
+        for i, func in enumerate(functions):
+            k0[i] = dt * func(state, u)
+
+        for i, func in enumerate(functions):
+            k1[i] = dt * func(state + k0 / 2., u)
+
+        for i, func in enumerate(functions):
+            k2[i] = dt * func(state + k1 / 2., u)
+
+        for i, func in enumerate(functions):
+            k3[i] = dt * func(state + k2, u)
+
+        return state + (k0 + 2. * k1 + 2. * k2 + k3) / 6.
+
+    else:
+        batch_size, state_size = state.shape
+        assert state_size == len(functions), \
+            "Invalid functions length, You need to give the state size functions"
+
+        k0 = np.zeros((batch_size, state_size))
+        k1 = np.zeros((batch_size, state_size))
+        k2 = np.zeros((batch_size, state_size))
+        k3 = np.zeros((batch_size, state_size))
+
+        for i, func in enumerate(functions):
+            k0[:, i] = dt * func(state, u)
+
+        for i, func in enumerate(functions):
+            k1[:, i] = dt * func(state + k0 / 2., u)
+
+        for i, func in enumerate(functions):
+            k2[:, i] = dt * func(state + k1 / 2., u)
+
+        for i, func in enumerate(functions):
+            k3[:, i] = dt * func(state + k2, u)
+
+        return state + (k0 + 2. * k1 + 2. * k2 + k3) / 6.
diff --git a/PythonLinearNonlinearControl/configs/cartpole.py b/PythonLinearNonlinearControl/configs/cartpole.py
@@ -1,5 +1,6 @@
 import numpy as np
 
+
 class CartPoleConfigModule():
     # parameters
     ENV_NAME = "CartPole-v0"
@@ -12,7 +13,7 @@ class CartPoleConfigModule():
     DT = 0.02
     # cost parameters
     R = np.diag([0.01])  # 0.01 is worked for MPPI and CEM and MPPIWilliams
-                       # 1. is worked for iLQR 
+    # 1. is worked for iLQR
     TERMINAL_WEIGHT = 1.
     Q = None
     Sf = None
@@ -39,41 +40,41 @@ def __init__(self):
                 "num_elites": 50,
                 "max_iters": 15,
                 "alpha": 0.3,
-                "init_var":9.,
-                "threshold":0.001
+                "init_var": 9.,
+                "threshold": 0.001
             },
-            "MPPI":{
-                "beta" : 0.6,
+            "MPPI": {
+                "beta": 0.6,
                 "popsize": 5000,
                 "kappa": 0.9,
                 "noise_sigma": 0.5,
             },
-            "MPPIWilliams":{
+            "MPPIWilliams": {
                 "popsize": 5000,
                 "lambda": 1.,
                 "noise_sigma": 0.9,
             },
-           "iLQR":{
+            "iLQR": {
                 "max_iter": 500,
                 "init_mu": 1.,
                 "mu_min": 1e-6,
                 "mu_max": 1e10,
                 "init_delta": 2.,
                 "threshold": 1e-6,
-           },
-           "DDP":{
+            },
+            "DDP": {
                 "max_iter": 500,
                 "init_mu": 1.,
                 "mu_min": 1e-6,
                 "mu_max": 1e10,
                 "init_delta": 2.,
                 "threshold": 1e-6,
-           },
-           "NMPC-CGMRES":{
-           },
-           "NMPC-Newton":{
-           },
-        } 
+            },
+            "NMPC-CGMRES": {
+            },
+            "NMPC-Newton": {
+            },
+        }
 
     @staticmethod
     def input_cost_fn(u):
@@ -87,7 +88,7 @@ def input_cost_fn(u):
                 shape(pop_size, pred_len, input_size)
         """
         return (u**2) * np.diag(CartPoleConfigModule.R)
-    
+
     @staticmethod
     def state_cost_fn(x, g_x):
         """ state cost function
@@ -103,21 +104,21 @@ def state_cost_fn(x, g_x):
         """
 
         if len(x.shape) > 2:
-            return (6. * (x[:, :, 0]**2) \
-                   + 12. * ((np.cos(x[:, :, 2]) + 1.)**2) \
-                   + 0.1 * (x[:, :, 1]**2) \
-                   + 0.1 *  (x[:, :, 3]**2))[:, :, np.newaxis]
+            return (6. * (x[:, :, 0]**2)
+                    + 12. * ((np.cos(x[:, :, 2]) + 1.)**2)
+                    + 0.1 * (x[:, :, 1]**2)
+                    + 0.1 * (x[:, :, 3]**2))[:, :, np.newaxis]
 
         elif len(x.shape) > 1:
-            return (6. * (x[:, 0]**2) \
-                   + 12. * ((np.cos(x[:, 2]) + 1.)**2) \
-                   + 0.1 * (x[:, 1]**2) \
-                   + 0.1 * (x[:, 3]**2))[:,  np.newaxis]
-            
+            return (6. * (x[:, 0]**2)
+                    + 12. * ((np.cos(x[:, 2]) + 1.)**2)
+                    + 0.1 * (x[:, 1]**2)
+                    + 0.1 * (x[:, 3]**2))[:,  np.newaxis]
+
         return 6. * (x[0]**2) \
-               + 12. * ((np.cos(x[2]) + 1.)**2) \
-               + 0.1 * (x[1]**2) \
-               + 0.1 * (x[3]**2)
+            + 12. * ((np.cos(x[2]) + 1.)**2) \
+            + 0.1 * (x[1]**2) \
+            + 0.1 * (x[3]**2)
 
     @staticmethod
     def terminal_state_cost_fn(terminal_x, terminal_g_x):
@@ -134,45 +135,45 @@ def terminal_state_cost_fn(terminal_x, terminal_g_x):
         """
 
         if len(terminal_x.shape) > 1:
-            return (6. * (terminal_x[:, 0]**2) \
-                   + 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2) \
-                   + 0.1 * (terminal_x[:, 1]**2) \
-                   + 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis] \
-                    * CartPoleConfigModule.TERMINAL_WEIGHT
-
-        return (6. * (terminal_x[0]**2) \
-               + 12. * ((np.cos(terminal_x[2]) + 1.)**2) \
-               + 0.1 * (terminal_x[1]**2) \
-               + 0.1 * (terminal_x[3]**2)) \
+            return (6. * (terminal_x[:, 0]**2)
+                    + 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2)
+                    + 0.1 * (terminal_x[:, 1]**2)
+                    + 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis] \
                 * CartPoleConfigModule.TERMINAL_WEIGHT
-
+
+        return (6. * (terminal_x[0]**2)
+                + 12. * ((np.cos(terminal_x[2]) + 1.)**2)
+                + 0.1 * (terminal_x[1]**2)
+                + 0.1 * (terminal_x[3]**2)) \
+            * CartPoleConfigModule.TERMINAL_WEIGHT
+
     @staticmethod
     def gradient_cost_fn_with_state(x, g_x, terminal=False):
         """ gradient of costs with respect to the state
 
         Args:
             x (numpy.ndarray): state, shape(pred_len, state_size)
             g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
-        
+
         Returns:
             l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size)
                 or shape(1, state_size)
         """
         if not terminal:
-            cost_dx0 = 12. * x[:, 0] 
+            cost_dx0 = 12. * x[:, 0]
             cost_dx1 = 0.2 * x[:, 1]
             cost_dx2 = 24. * (1 + np.cos(x[:, 2])) * -np.sin(x[:, 2])
             cost_dx3 = 0.2 * x[:, 3]
-            cost_dx = np.stack((cost_dx0, cost_dx1,\
+            cost_dx = np.stack((cost_dx0, cost_dx1,
                                 cost_dx2, cost_dx3), axis=1)
             return cost_dx
-        
-        cost_dx0 = 12. * x[0] 
+
+        cost_dx0 = 12. * x[0]
         cost_dx1 = 0.2 * x[1]
         cost_dx2 = 24. * (1 + np.cos(x[2])) * -np.sin(x[2])
         cost_dx3 = 0.2 * x[3]
         cost_dx = np.array([[cost_dx0, cost_dx1, cost_dx2, cost_dx3]])
-        
+
         return cost_dx * CartPoleConfigModule.TERMINAL_WEIGHT
 
     @staticmethod
@@ -206,21 +207,21 @@ def hessian_cost_fn_with_state(x, g_x, terminal=False):
             hessian[:, 0, 0] = 12.
             hessian[:, 1, 1] = 0.2
             hessian[:, 2, 2] = 24. * -np.sin(x[:, 2]) \
-                               * (-np.sin(x[:, 2])) \
-                               + 24. * (1. + np.cos(x[:, 2])) \
-                                 * -np.cos(x[:, 2])
+                * (-np.sin(x[:, 2])) \
+                + 24. * (1. + np.cos(x[:, 2])) \
+                * -np.cos(x[:, 2])
             hessian[:, 3, 3] = 0.2
 
             return hessian
-        
+
         state_size = len(x)
         hessian = np.eye(state_size)
         hessian[0, 0] = 12.
         hessian[1, 1] = 0.2
         hessian[2, 2] = 24. * -np.sin(x[2]) \
-                         * (-np.sin(x[2])) \
-                        + 24. * (1. + np.cos(x[2])) \
-                          * -np.cos(x[2])
+            * (-np.sin(x[2])) \
+            + 24. * (1. + np.cos(x[2])) \
+            * -np.cos(x[2])
         hessian[3, 3] = 0.2
 
         return hessian[np.newaxis, :, :] * CartPoleConfigModule.TERMINAL_WEIGHT
@@ -239,7 +240,7 @@ def hessian_cost_fn_with_input(x, u):
         (pred_len, _) = u.shape
 
         return np.tile(2.*CartPoleConfigModule.R, (pred_len, 1, 1))
-    
+
     @staticmethod
     def hessian_cost_fn_with_input_state(x, u):
         """ hessian costs with respect to the state and input
@@ -254,4 +255,4 @@ def hessian_cost_fn_with_input_state(x, u):
         (_, state_size) = x.shape
         (pred_len, input_size) = u.shape
 
-        return np.zeros((pred_len, input_size, state_size))
+        return np.zeros((pred_len, input_size, state_size))