In [5]:
import gymnasium as gym
from stable_baselines3 import SAC
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import CheckpointCallback, CallbackList
from stable_baselines3.common.logger import configure
from tqdm import trange

# === Setup environment ===
def make_env():
    env = gym.make("CarRacing-v3", render_mode="rgb_array", continuous=True)
    env = Monitor(env)  # Records episode rewards, lengths, etc.
    return env

env = DummyVecEnv([make_env])
env = VecTransposeImage(env)

# === TensorBoard Logger ===
log_dir = "./sac_logs/"
new_logger = configure(log_dir, ["stdout", "tensorboard"])

# === Checkpoint Callback ===
checkpoint_callback = CheckpointCallback(
    save_freq=100_000,
    save_path="./checkpoints/",
    name_prefix="sac_carracing"
)

# === SAC Model ===
model = SAC(
    policy="CnnPolicy",
    env=env,
    verbose=0,
    buffer_size= 100_000,
    learning_rate=3e-4,
    batch_size=64,
    train_freq=1,
    gradient_steps=1,
    ent_coef= 0,
    tensorboard_log=log_dir,
    device="cuda"
)

model.set_logger(new_logger)

# === Training ===
total_timesteps = 1_000_000
steps_per_iter = 10_000  # tqdm updates every X steps

print("Starting training with tqdm and TensorBoard logging...")

for _ in trange(total_timesteps // steps_per_iter, desc="Training Progress"):
    model.learn(total_timesteps=steps_per_iter, reset_num_timesteps=False, callback=checkpoint_callback)

# === Save final model ===
model.save("sac_carracing_sb3")
print("Training complete. Model saved as 'sac_carracing_sb3'.")


Logging to ./sac_logs/


MemoryError: Unable to allocate 2.57 GiB for an array with shape (100000, 1, 3, 96, 96) and data type uint8