In [2]:
import gym
import numpy as np
import torch
from PIL import Image
import optuna
import imageio

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
import tqdm
import os
import gym
from PIL import Image
import numpy as np
import numpy.random as nr
import argparse
import torch

class CarRacing_rollouts():
    def __init__(self, seed_num=0):
        self.env = gym.make('CarRacing-v2', render_mode='rgb_array', domain_randomize=False)
        self.env.reset(seed=seed_num)
        self.file_dir = './data/CarRacing/'

    def get_rollouts(self, num_rollouts=10000, reflesh_rate=5, max_episode=300):
        start_idx = 0
        if os.path.exists(self.file_dir):
            start_idx = len(os.listdir(self.file_dir)) 
        for i in tqdm.tqdm(range(start_idx, num_rollouts+1)):
            state_sequence = []
            action_sequence = []
            reward_sequence = []
            done_sequence = []
            state = self.env.reset()
            done = False
            iter = 0
            while iter < max_episode:
                if iter < 20:
                    action = np.array([-0.1, 1, 0])
                elif iter % reflesh_rate == 0:
                    steering, acceleration, brake = self.env.action_space.sample()
                    action = np.array([steering, acceleration, brake])
                    rn = nr.randint(0, 9)
                    if rn in [0]:
                        action = np.array([0, 0, 0])
                    elif rn in [1, 2, 3, 4]:
                        action = np.array([0, nr.uniform(0.0, 1.0), 0])
                    elif rn in [5, 6]:
                        action = np.array([nr.uniform(-1.0, 0.0), 0, 0])
                    elif rn in [7, 8]:
                        action = np.array([nr.uniform(0.0, 1.0), 0, 0])
                    elif rn in [9]:
                        action = np.array([0, 0, nr.uniform(0.0, 1.0)])
                    else:
                        pass

                state, reward, done, _, _ = self.env.step(action)
                state = self.reshape_state(state)
                state_sequence.append(state)
                action_sequence.append(action)
                reward_sequence.append(reward)
                done_sequence.append(done)
                iter += 1
            np.savez_compressed(os.path.join(self.file_dir, 'rollout_{}.npz'.format(i)), state=state_sequence, action=action_sequence, reward=reward_sequence, done=done_sequence)
            # np.savez(os.path.join(self.file_dir, 'rollout_{}.npz'.format(i)), state=state_sequence, action=action_sequence, reward=reward_sequence, done=done_sequence)

    def load_rollout(self, idx_rolloout):
        data = np.load(os.path.join(self.file_dir, 'rollout_{}.npz'.format(idx_rolloout)))
        return data['state'], data['action'], data['reward'], data['done']
    
    def load_rollouts(self, idx_rolloouts):
        states = []
        actions = []
        rewards = []
        dones = []
        for idx_rolloout in idx_rolloouts:
            data = np.load(os.path.join(self.file_dir, 'rollout_{}.npz'.format(idx_rolloout)))
            states.append(data['state'])
            actions.append(data['action'])
            rewards.append(data['reward'])
            dones.append(data['done'])
        return states, actions, rewards, dones

    def reshape_state(self, state):
        # state（画像）をリサイズして64x64にする．値を0-1に正規化する処理は行っていない．
        HEIGHT = 64
        WIDTH = 64
        state = state[0:84, :, :]
        state = Image.fromarray(state).resize((HEIGHT, WIDTH))
        return state
    
    def make_gif(self, idx_rolloout):
        state, _, _, _ = self.load_rollout(idx_rolloout)
        images = []
        for i in range(len(state)):
            pil_image = Image.fromarray(state[i].astype("uint8"))
            images.append(pil_image)
        imageio.mimsave('./rollout.gif'.format(idx_rolloout), images, duration=10)
        

In [3]:
import tqdm
import os
import gym
from PIL import Image
import numpy as np
import numpy.random as nr
import argparse
import torch

class CarRacing_rollouts():
    def __init__(self, seed_num=0):
        self.env = gym.make('CarRacing-v2', render_mode='rgb_array', domain_randomize=False)
        self.env.reset(seed=seed_num)
        self.file_dir = './data/CarRacing/'

    def get_rollouts(self, num_rollouts=10000, reflesh_rate=5, max_episode=300):
        start_idx = 8600
        # if os.path.exists(self.file_dir):
        #     start_idx = len(os.listdir(self.file_dir)) 
        for i in tqdm.tqdm(range(start_idx, num_rollouts+1)):
            if len(self.load_rollout(i)) == 300:
                continue
            state_sequence = []
            action_sequence = []
            reward_sequence = []
            done_sequence = []
            state = self.env.reset()
            done = False
            iter = 0
            while iter < max_episode:
                if iter < 20:
                    action = np.array([-0.1, 1, 0])
                elif iter % reflesh_rate == 0:
                    steering, acceleration, brake = self.env.action_space.sample()
                    action = np.array([steering, acceleration, brake])
                    rn = nr.randint(0, 9)
                    if rn in [0]:
                        action = np.array([0, 0, 0])
                    elif rn in [1, 2, 3, 4]:
                        action = np.array([0, nr.uniform(0.0, 1.0), 0])
                    elif rn in [5, 6]:
                        action = np.array([nr.uniform(-1.0, 0.0), 0, 0])
                    elif rn in [7, 8]:
                        action = np.array([nr.uniform(0.0, 1.0), 0, 0])
                    elif rn in [9]:
                        action = np.array([0, 0, nr.uniform(0.0, 1.0)])
                    else:
                        pass

                state, reward, done, _, _ = self.env.step(action)
                state = self.reshape_state(state)
                state_sequence.append(state)
                action_sequence.append(action)
                reward_sequence.append(reward)
                done_sequence.append(done)
                iter += 1
            np.savez_compressed(os.path.join(self.file_dir, 'rollout_{}.npz'.format(i)), state=state_sequence, action=action_sequence, reward=reward_sequence, done=done_sequence)
            # np.savez(os.path.join(self.file_dir, 'rollout_{}.npz'.format(i)), state=state_sequence, action=action_sequence, reward=reward_sequence, done=done_sequence)

    def load_rollout(self, idx_rolloout):
        data = np.load(os.path.join(self.file_dir, 'rollout_{}.npz'.format(idx_rolloout)))
        return data['state'], data['action'], data['reward'], data['done']
    
    def load_rollouts(self, idx_rolloouts):
        states = []
        actions = []
        rewards = []
        dones = []
        for idx_rolloout in idx_rolloouts:
            data = np.load(os.path.join(self.file_dir, 'rollout_{}.npz'.format(idx_rolloout)))
            states.append(data['state'])
            actions.append(data['action'])
            rewards.append(data['reward'])
            dones.append(data['done'])
        return states, actions, rewards, dones

    def reshape_state(self, state):
        # state（画像）をリサイズして64x64にする．値を0-1に正規化する処理は行っていない．
        HEIGHT = 64
        WIDTH = 64
        state = state[0:84, :, :]
        state = Image.fromarray(state).resize((HEIGHT, WIDTH))
        return state
    
    def make_gif(self, idx_rolloout):
        state, _, _, _ = self.load_rollout(idx_rolloout)
        images = []
        for i in range(len(state)):
            pil_image = Image.fromarray(state[i].astype("uint8"))
            images.append(pil_image)
        imageio.mimsave('./rollout.gif'.format(idx_rolloout), images, duration=10)
        

In [4]:
env = CarRacing_rollouts()
env.get_rollouts(10000, 5, 300)

  if not isinstance(terminated, (bool, np.bool8)):
100%|██████████| 1401/1401 [1:21:43<00:00,  3.50s/it]


In [6]:
for i in range(10000):
    state, _, _, _ = env.load_rollout(i)
    if len(state) != 300:
        print(i)

In [5]:
env.make_gif(0)

In [6]:
env.make_gif(55)

In [41]:
import numpy as np
import h5py
import glob, cv2, os
f = h5py.File('data.hdf5', mode='w')

In [42]:
group = f.create_group("/CarRacing")

In [43]:
file_list = [p.replace("\\", "/") for p in glob.glob("./data/CarRacing/*.npz", recursive=True)]
file_list = sorted(file_list, key=lambda x: int(x.split("/")[-1].split(".")[0].split("_")[-1]))

In [44]:
A = np.load(file_list[0])
for a in A.keys():
    print(a)
    print(type(a))

state
<class 'str'>
action
<class 'str'>
reward
<class 'str'>
done
<class 'str'>


In [46]:
A

NpzFile './data/CarRacing/rollout_0.npz' with keys: state, action, reward, done

In [45]:
for file in file_list:
    # key: state, action, reward, doneを持つnpzファイルをdatasetに追加
    data = np.load(file)
    dataset = group.create_dataset(os.path.basename(file), data=data)
f.close()


TypeError: No conversion path for dtype: dtype('<U6')