In [3]:
import torch
import numpy as np

In [4]:
class SerializedBuffer:
    def __init__(self, path, device):
        tmp = torch.load(path)
        self.buffer_size = self._n = tmp['state'].size(0)
        self.device = device

        self.states = tmp['state'].clone().to(self.device)
        self.actions = tmp['action'].clone().to(self.device)
        self.rewards = tmp['reward'].clone().to(self.device)
        self.dones = tmp['done'].clone().to(self.device)
        self.next_states = tmp['next_state'].clone().to(self.device)

    def sample(self, batch_size):
        idxes = np.random.randint(low=0, high=self._n, size=batch_size)
        return (
            self.states[idxes],
            self.actions[idxes],
            self.rewards[idxes],
            self.dones[idxes],
            self.next_states[idxes]
        )

In [5]:
exp_path = "buffers/InvertedPendulum-v2/size1000000_std0.01_prand0.0.pth"
trace_exp = SerializedBuffer(path=exp_path, device="cpu")

In [8]:
print(trace_exp.states.shape)
print(trace_exp.actions.shape)
print(trace_exp.rewards.shape)
print(trace_exp.dones.shape)

torch.Size([1000000, 4])
torch.Size([1000000, 1])
torch.Size([1000000, 1])
torch.Size([1000000, 1])


In [9]:
trace_exp.states[:1]

tensor([[-0.0089,  0.0093,  0.0027, -0.0042]])

In [11]:
trace_exp.actions[:5]

tensor([[-0.9236],
        [ 0.0359],
        [ 0.6258],
        [ 0.1225],
        [ 0.3658]])

### 查看环境信息

In [12]:
import gym

gym.logger.set_level(40)


def make_env(env_id):
    return NormalizedEnv(gym.make(env_id))


class NormalizedEnv(gym.Wrapper):

    def __init__(self, env):
        gym.Wrapper.__init__(self, env)
        self._max_episode_steps = env._max_episode_steps

        self.scale = env.action_space.high
        self.action_space.high /= self.scale
        self.action_space.low /= self.scale

    def step(self, action):
        return self.env.step(action * self.scale)

In [13]:
env = make_env("InvertedPendulum-v2")

In [14]:
print(env.observation_space.shape)
print(env.action_space.shape)

(4,)
(1,)


In [2]:
import roboschool
import gym

In [3]:
env = gym.make('RoboschoolAnt-v1')
state = env.reset()
while True:
    next_state, reward, done, _ = env.step(env.action_space.sample())
    env.render()
    if done: break



In [6]:
print(5e5)

500000.0


In [1]:
10 * 1e-4

0.001

In [1]:
5e5

500000.0