In [5]:
import numpy as np
import gym
from types import SimpleNamespace

In [6]:
const = SimpleNamespace()
const.mu = 3.986e14
const.Re = 6037.1e3
sat = SimpleNamespace()
sat.J = np.array([[75, 1, 2], [1, 40, -1], [2, -1, 80]]) # \pm20%
sat.J_inv = np.linalg.inv(sat.J)
sat.altitude = 600e3
sat.mean_motion = np.sqrt(const.mu / (const.Re + sat.altitude)**3.)
sat.fm_freq = np.array([5.6, 19.3, 35.4]) # flexible mode frequencies [rad/s] \pm20%
sat.dzeta = np.array([0.005, 0.005, 0.005]) # flexible mode damping 
sat.D = np.diag(2 * sat.dzeta * sat.fm_freq)
sat.K = np.diag(sat.fm_freq * sat.fm_freq)
sat.L_p = np.array([[0, 2.51, 0], [-3.84, 0, 0], [0, 12.5, 0]]) * 0.1
sat.Aom = np.linalg.inv(np.identity(3) - sat.J_inv.dot(sat.L_p.T).dot(sat.L_p))
sat.Aeta = np.linalg.inv(np.identity(3) - sat.L_p.dot(sat.J_inv).dot(sat.L_p.T))


class TorqueDynamics(gym.Env):
    def __init__(self, dt, q_req, obs_space_shape=16, add_9=0.25, generate_quat=None):
        self.state = None
        self.dt = dt
        self.q_req = q_req
        self.q_req_conj = quat_conjugate(self.q_req)
        self.w_req = np.zeros(3)
        self.history = []
        self.t = []
        self.action_space = self.init_actions()
        self.observation_space = gym.spaces.Box(-1, 1, shape=(obs_space_shape,))
        self.q_prev = None
        self.add_9 = add_9
        self.generate_quat = generate_quat

    @staticmethod
    def init_actions(power=3):
        actions = np.array([])
        for i in range(power):
            a = np.linspace(-1, 1, 21)
            a /= 10 ** i
            actions = np.concatenate((actions, a))
        actions = np.unique(actions.round(10))
#         actions = actions[actions>=-0.5]
#         actions = actions[actions<=0.5]

        s = actions.shape
        actions = np.vstack((actions, np.zeros(s)))
        actions = np.vstack((actions, np.zeros(s)))
        actions = actions.T
        aroll1 = np.roll(actions, 1)
        aroll2 = np.roll(actions, 2)
        actions = np.concatenate((actions, aroll1))
        actions = np.concatenate((actions, aroll2))
        actions = np.unique(actions, axis=0)
        return actions*10

    def reset(self, state=None):
        if state is not None:
            self.state = state
        else:
            self.state = self.observation_space.sample()
            phi = self.state[0] * np.pi / 4.5
            self.state[0] = np.cos(phi / 2)
            self.state[1:4] = normalize(self.state[1:4]) * np.sin(phi / 2)
            assert -1e-5 < np.linalg.norm(self.state[:4]) - 1 < 1e-5
        self.state[4:] = 0
        self.history = [self.state]
        self.t = [0]
        self.q_prev = self.state[:4]
        self.multiplier = 1
        return self.state

    @staticmethod
    def r_a(phi, q_current, q_prev):
        return np.exp(2-phi) if q_current > q_prev else np.exp(2-phi)/2


    def r_t(self, reward, phi):
        return reward + 9 if phi <= self.add_9 else reward

    def step(self, action):
        t0 = 0
        tf = self.dt
        x_0 = self.state
        if isinstance(action, int) or isinstance(action, np.int64):
            action = self.action_space[action].copy()

        action *= self.multiplier

        sol = solve_ivp(lambda t, x: rhs(t, x, sat, action), (t0,tf), x_0)#, t_eval=t_eval)
        x = sol.y.T
        t = sol.t[1:]
        observation = x[-1]
        observations = normalize(observation)
        time_ = self.t[-1] + self.dt
        self.state = observation
        self.history.append(observation)
        self.t.append(time_)

        # calculating rewards:
        q_current = observation[:4]
        q_error = quat_product(self.q_req_conj, q_current)
        q_error = np.clip(q_error, -1, 1)
        w_current = observation[4:7]
        #print('q_error', q_error)
        phi = 2*np.arccos(q_error[0])
        r_inter = self.r_a(phi, q_current[0], self.q_prev[0])
        r1 = self.r_t(r_inter, phi)
        # r2 = -np.sum(np.abs(observation[4:7]))
        # Qreward = np.exp(-0.1 * np.linalg.norm(q_current - self.q_req))
        # Wreward = np.exp(-0.1 * np.linalg.norm(w_current - self.w_req))
        # reward = Qreward * Wreward
        # reward = self.r_t(reward, phi)
        reward = r_inter #r1 #+ 10 * r2
        #print('rewards', r1, r2)

        self.multiplier = 1 #if phi > np.pi/8 else np.sin(4*phi)

        self.q_prev = q_current

        q_req_ext = np.concatenate([self.q_req, self.w_req])
        done = np.linalg.norm(observation[:7] - q_req_ext) < 1e-4

        info = dict()
        info['x'] = self.history
        info['t'] = self.t
        return observation, reward, done, info
  
    def render(self):
        pass

In [12]:
TorqueDynamics.init_actions(5)

array([[-1.e+02,  0.e+00,  0.e+00],
       [-9.e+01,  0.e+00,  0.e+00],
       [-8.e+01,  0.e+00,  0.e+00],
       [-7.e+01,  0.e+00,  0.e+00],
       [-6.e+01,  0.e+00,  0.e+00],
       [-5.e+01,  0.e+00,  0.e+00],
       [-4.e+01,  0.e+00,  0.e+00],
       [-3.e+01,  0.e+00,  0.e+00],
       [-2.e+01,  0.e+00,  0.e+00],
       [-1.e+01,  0.e+00,  0.e+00],
       [-9.e+00,  0.e+00,  0.e+00],
       [-8.e+00,  0.e+00,  0.e+00],
       [-7.e+00,  0.e+00,  0.e+00],
       [-6.e+00,  0.e+00,  0.e+00],
       [-5.e+00,  0.e+00,  0.e+00],
       [-4.e+00,  0.e+00,  0.e+00],
       [-3.e+00,  0.e+00,  0.e+00],
       [-2.e+00,  0.e+00,  0.e+00],
       [-1.e+00,  0.e+00,  0.e+00],
       [-9.e-01,  0.e+00,  0.e+00],
       [-8.e-01,  0.e+00,  0.e+00],
       [-7.e-01,  0.e+00,  0.e+00],
       [-6.e-01,  0.e+00,  0.e+00],
       [-5.e-01,  0.e+00,  0.e+00],
       [-4.e-01,  0.e+00,  0.e+00],
       [-3.e-01,  0.e+00,  0.e+00],
       [-2.e-01,  0.e+00,  0.e+00],
       [-1.e-01,  0.e+00,  0