In [1]:
import numpy as np
import gym
import k3d
from ratelimiter import RateLimiter
from k3d.platonic import Cube
from time import time

rate_limiter = RateLimiter(max_calls=4, period=1)

env = gym.make('CartPole-v0')
observation = env.reset()

plot = k3d.plot(grid_auto_fit=False, camera_auto_fit=False, grid=(-1,-1,-1,1,1,1))

joint_positions = np.array([observation[0], 0, 0], dtype=np.float32)
pole_positions = joint_positions + np.array([np.sin(observation[2]), 0, np.cos(observation[2])], dtype=np.float32)

cart = Cube(origin=joint_positions, size=0.1).mesh
cart.scaling = [1, 0.5, 1]

joint = k3d.points(np.mean(cart.vertices[[0,2,4,6]], axis=0), point_size=0.03, color=0xff00, shader='mesh')
pole = k3d.line(vertices=np.array([joint.positions, pole_positions]), shader='mesh', color=0xff0000)
box = cart.vertices
mass = k3d.points(pole_positions, point_size=0.03, color=0xff0000, shader='mesh')

plot += pole + cart + joint + mass

plot.display()

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m


Output()

In [None]:
for i_episode in range(20):
    observation = env.reset()
    for t in range(100):
        with rate_limiter:
            joint_positions = np.array([observation[0], 0, 0], dtype=np.float32)
            pole_positions = joint_positions + np.array([np.sin(observation[2]), 0, np.cos(observation[2])], dtype=np.float32)

            cart.vertices = box + joint_positions
            joint.positions = np.mean(cart.vertices[[0,2,4,6]], axis=0)
            pole.vertices = [joint.positions, pole_positions]
            mass.positions = pole_positions
            
            action = env.action_space.sample()
            observation, reward, done, info = env.step(action)
            if done:
                break

In [None]:
plot.display()

In [None]:
for i_episode in range(20):
    observation = env.reset()
    for t in range(100):
        
        joint_positions = np.array([observation[0], 0, 0], dtype=np.float32)
        pole_positions = joint_positions + np.array([np.sin(observation[2]), 0, np.cos(observation[2])], dtype=np.float32)
        
        with rate_limiter:
            cart.vertices = box + joint_positions
            joint.positions = np.mean(cart.vertices[[0,2,4,6]], axis=0)
            pole.vertices = [joint.positions, pole_positions]
            mass.positions = pole_positions

        action = env.action_space.sample()
        observation, reward, done, info = env.step(action)
        
        if done:
            break

In [None]:
max_calls, period = 3, 1
call_time = period/max_calls

for i_episode in range(20):
    observation = env.reset()
    for t in range(100):
        
        joint_positions = np.array([observation[0], 0, 0], dtype=np.float32)
        pole_positions = joint_positions + np.array([np.sin(observation[2]), 0, np.cos(observation[2])], dtype=np.float32)
        time_stamp2 = time()
        
        if t>0:
            d = time_stamp2 - time_stamp1
            if d < call_time:
                cart.vertices = box + joint_positions
                joint.positions = np.mean(cart.vertices[[0,2,4,6]], axis=0)
                pole.vertices = [joint.positions, pole_positions]
                mass.positions = pole_positions
                
        if t==0:
            cart.vertices = box + joint_positions
            joint.positions = np.mean(cart.vertices[[0,2,4,6]], axis=0)
            pole.vertices = [joint.positions, pole_positions]
            mass.positions = pole_positions
            
        time_stamp1 = time()
        action = env.action_space.sample()
        observation, reward, done, info = env.step(action)
        
        if done:
            break

In [3]:
max_calls, period = 3, 1
call_time = period/max_calls
i = 1
all_it_time = 0
cache = []
iterator = []


for i_episode in range(20):
    cache.append([])
    observation = env.reset()
    for t in range(100):
        ts1 = time()
        joint_positions = np.array([observation[0], 0, 0], dtype=np.float32)
        pole_positions = joint_positions + np.array([np.sin(observation[2]), 0, np.cos(observation[2])], dtype=np.float32)

        # [cart.vertices, joint.positions, pole.vertices, mass.positions]
        cache[i_episode].append([box + joint_positions, np.mean((box + joint_positions)[[0,2,4,6]], axis=0),
                                [np.mean((box + joint_positions)[[0,2,4,6]], axis=0), pole_positions],
                                pole_positions])
        
        if all_it_time > call_time*i:
            i += 1
            iterator = iter(iterator)
            element = next(iterator)
            cart.vertices = element[0]
            joint.positions = element[1]
            pole.vertices = element[2]
            mass.positions = element[3]

        action = env.action_space.sample()
        observation, reward, done, info = env.step(action)
        ts2 = time()

        it_time = ts2 - ts1
        all_it_time += it_time

        if done:
            break

    temp_list = []
    to_pull = t//max_calls
    if max_calls > t:
        to_pull = 1

    for j in range(max_calls):
        temp_list.append(cache[i_episode][to_pull*i])

    iterator = list(iterator) + temp_list

del cache
for element in iterator:
    with RateLimiter(max_calls=max_calls):

        i += 1
        iterator = iter(iterator)
        element = next(iterator)
        cart.vertices = element[0]
        joint.positions = element[1]
        pole.vertices = element[2]
        mass.positions = element[3]

In [2]:
plot.display()

Output()