# Configuration

In [21]:
import numpy as np
import cloudpickle
from scipy import integrate
import matplotlib.pyplot as plt

In [41]:
# Parameters
myParam = np.array([
    0.05, 0.05, 0.05,
    1/12 * 0.05 * 0.1**2, 
    1/12 * 0.05 * 0.1**2, 
    1/12 * 0.05 * 0.1**2,
    0.15, 0.1, 0.1, 0.1,
    0.05, 0.05, 0.05,
    0.05, 0.05,
    9.8
])

# Constraints
myCon = np.array([100, 10, 0.01, np.deg2rad(80)])
a_max = 100

In [19]:
# Config
t0 = 0
t1 = 10
dt = 0.01
t = np.arange(t0, t1, dt)
y0 = np.array([np.pi/6, 0, 0, 0, 0, 0])

In [93]:
f = cloudpickle.load(open('./model/swing_dynamic.dll', 'rb'))
kinematic = cloudpickle.load(open('./model/swing_kinematic.dll', 'rb'))

In [25]:
# def f(t, y, tau):
#     v = y[3:]
#     a = ddq_np(y, tau, myParam, myCon).squeeze()
#     return np.r_[v, a]
# sol2 = odeint(f, y0, t, args=(np.ones(2)*5, ))

# Environment

In [127]:
def continuous_reward(state, action, next_state):
    reward = -np.cos(next_state[0])
    done = False
    if reward > 0:
        done = True
    return reward, done

In [120]:
class Swing:
    def __init__(self, ode, reward_fcn, theta0=np.pi/6, max_steps=1000):
        self.reward_fcn = reward_fcn
        self.theta0 = theta0
        self.done = True
        self.max_steps = max_steps
        self.ode = ode
        
    def step(self, action):
        # check init
        if self.done:
            raise RuntimeError('The environment is not initialized.')
        # update
        clip_action = np.clip(action, -np.ones_like(action), np.ones_like(action))
        torque = clip_action * a_max
        self.ode.set_f_params(torque)
        if self.ode.successful() and self.t < self.max_steps:
            self.ode.integrate(self.ode.t + dt)
        next_state = np.array(self.ode.y)
        # compute reward
        reward, self.done = self.reward_fcn(self.state, clip_action, next_state)
        # check max step
        if self.t >= self.max_steps:
            self.done = True
        self.state = np.array(next_state)
        self.t += 1
        return (next_state, reward, self.done, None)

    def reset(self):
        self.t = 0
        self.done = False
        self.state = np.array([np.random.uniform(-1, 1) * self.theta0, 0, 0, 0, 0, 0])
        self.ode.set_initial_value(self.state, t0)
        return self.state
    def seed(self, s):
        np.random.seed(s)

In [121]:
ode = integrate.ode(f).set_integrator('vode', method='bdf')
env = Swing(ode, continuous_reward)

In [123]:
env.seed(171)
state = env.reset()

In [128]:
np.random.uniform?