In [2]:
import gymnasium as gym
import numpy as np
import pickle
env = gym.make("CarRacing-v3", continuous=False)

num_samples = 100000
total_obs = []

while num_samples > 0:
    done = False
    rollout_list = []
    obs_info = env.reset()
    while not done and num_samples > 0:
        action = env.action_space.sample()
        obs, reward, terminated, truncated, info = env.step(action)
        done = terminated or truncated
        rollout_list.append(obs)
        num_samples -= 1

    rollout_obs = np.stack(rollout_list, axis=0)
    rollout_obs = np.moveaxis(rollout_obs, -1, 1)
    total_obs.append(rollout_obs)

print(len(total_obs))

with open("../data/rollouts.pkl", "wb") as f:
    pickle.dump(total_obs, f)
    

100


In [3]:
import gymnasium as gym
import numpy as np
import random
from tqdm.notebook import tqdm

NUM_ENVS = 5
NUM_SAMPLES = 2000  # total episodes across all envs
FORWARD_STEPS = 20
MAX_STEPS = 320

COLLECT_STEPS = MAX_STEPS - FORWARD_STEPS  # number of steps to store per episode

def make_env(seed_offset):
    def _init():
        env = gym.make("CarRacing-v3", continuous=True, render_mode=None)
        env = gym.wrappers.TimeLimit(env, max_episode_steps=MAX_STEPS)
        env.reset(seed=random.randint(0, 1_000_000) + seed_offset)
        return env
    return _init

envs = gym.vector.AsyncVectorEnv([make_env(i) for i in range(NUM_ENVS)])

total_episodes = []
episodes_collected = 0
pbar = tqdm(total=NUM_SAMPLES, desc="Collecting episodes")

while episodes_collected < NUM_SAMPLES:
    obs, infos = envs.reset()
    done = [False] * NUM_ENVS
    step_count = [0] * NUM_ENVS
    episode_buffers = [[] for _ in range(NUM_ENVS)]

    while not all(done):
        actions = []
        for i in range(NUM_ENVS):
            # First FORWARD_STEPS: accelerate
            if step_count[i] < FORWARD_STEPS:
                action = np.array([0.0, 1.0, 0.0])
            else:
                # After forward steps: random actions
                rn = random.randint(0, 9)
                if rn == 0:
                    action = np.array([0, 0, 0])
                elif rn in [1, 2, 3, 4]:
                    action = np.array([0, random.random(), 0])
                elif rn in [5, 6]:
                    action = np.array([-random.random(), 0.1, 0])
                elif rn in [7, 8]:
                    action = np.array([random.random(), 0.1, 0])
                elif rn == 9:
                    action = np.array([0, 0, random.random()])
            actions.append(action)

        obs, rewards, terminated, truncated, infos = envs.step(np.array(actions))

        for i in range(NUM_ENVS):
            # Only start recording after FORWARD_STEPS
            if step_count[i] >= FORWARD_STEPS and step_count[i] < MAX_STEPS:
                episode_buffers[i].append(obs[i])
            step_count[i] += 1

            if step_count[i] >= MAX_STEPS:
                done[i] = True
                # Ensure each episode has exactly COLLECT_STEPS
                while len(episode_buffers[i]) < COLLECT_STEPS:
                    # pad with last frame if episode ended early
                    episode_buffers[i].append(episode_buffers[i][-1])
                total_episodes.append(np.stack(episode_buffers[i][:COLLECT_STEPS]))
                episodes_collected += 1
                pbar.update(1)

pbar.close()
envs.close()

# Save rollouts
np.save('../data/600k_rollouts.npy', total_episodes, allow_pickle=True)
print("Saved rollouts to '../data/600k_rollouts.npy'")


Collecting episodes:   0%|          | 0/2000 [00:00<?, ?it/s]

Saved rollouts to '../data/600k_rollouts.npy'


In [None]:
import gymnasium as gym
import pygame
import numpy as np
import pickle
import time

# =========================
# SETTINGS
# =========================
MAX_STEPS_PER_EPISODE = 1000
TOTAL_FRAMES = 10000
SAVE_PATH = "car_racing_human_rollouts.pkl"
DEVICE = "cpu"  # env is CPU only

# =========================
# HELPER: map keyboard to action
# =========================
def get_action_from_keys(keys):
    """
    Returns [steering, gas, brake] as a numpy array.
    """
    action = np.array([0.0, 0.0, 0.0], dtype=np.float32)
    
    if keys[pygame.K_UP]:
        action[1] = 1.0  # gas
    if keys[pygame.K_DOWN]:
        action[2] = 1.0  # brake
    if keys[pygame.K_LEFT]:
        action[0] = -1.0  # steer left
    if keys[pygame.K_RIGHT]:
        action[0] = 1.0   # steer right
    
    return action

# =========================
# SETUP ENVIRONMENT
# =========================
env = gym.make("CarRacing-v3", continuous=True, render_mode="human")
env = gym.wrappers.TimeLimit(env, max_episode_steps=MAX_STEPS_PER_EPISODE)

# Initialize pygame
pygame.init()
screen = pygame.display.set_mode((1,1))  # small dummy screen for key capture

total_rollouts = []
frames_collected = 0

# =========================
# MAIN LOOP
# =========================
while frames_collected < TOTAL_FRAMES:
    obs, info = env.reset()
    done = False
    step_count = 0
    
    print("New episode started! Use arrow keys to drive.")

    while not done and frames_collected < TOTAL_FRAMES and step_count < MAX_STEPS_PER_EPISODE:
        # Capture pygame events to keep it responsive
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                done = True
                break

        keys = pygame.key.get_pressed()
        action = get_action_from_keys(keys)

        obs, reward, terminated, truncated, info = env.step(action)
        done = terminated or truncated

        total_rollouts.append({'obs': obs, 'action': action})
        frames_collected += 1
        step_count += 1

# =========================
# CLEANUP
# =========================
env.close()
pygame.quit()

# =========================
# SAVE TO PICKLE
# =========================
with open(SAVE_PATH, "wb") as f:
    pickle.dump(total_rollouts, f)

print(f"Saved {len(total_rollouts)} frames to {SAVE_PATH}")
