In [9]:
!pip uninstall -y torch torchvision torchaudio

Found existing installation: torch 2.6.0
Uninstalling torch-2.6.0:
  Successfully uninstalled torch-2.6.0
Found existing installation: torchvision 0.21.0
Uninstalling torchvision-0.21.0:
  Successfully uninstalled torchvision-0.21.0
Found existing installation: torchaudio 2.6.0
Uninstalling torchaudio-2.6.0:
  Successfully uninstalled torchaudio-2.6.0


You can safely remove it manually.


In [10]:
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126

Looking in indexes: https://download.pytorch.org/whl/cu126
Collecting torch
  Downloading https://download.pytorch.org/whl/cu126/torch-2.6.0%2Bcu126-cp312-cp312-win_amd64.whl.metadata (28 kB)
Collecting torchvision
  Downloading https://download.pytorch.org/whl/cu126/torchvision-0.21.0%2Bcu126-cp312-cp312-win_amd64.whl.metadata (6.3 kB)
Collecting torchaudio
  Downloading https://download.pytorch.org/whl/cu126/torchaudio-2.6.0%2Bcu126-cp312-cp312-win_amd64.whl.metadata (6.8 kB)
Downloading https://download.pytorch.org/whl/cu126/torch-2.6.0%2Bcu126-cp312-cp312-win_amd64.whl (2496.1 MB)
   ---------------------------------------- 0.0/2.5 GB ? eta -:--:--
   ---------------------------------------- 0.0/2.5 GB 33.6 MB/s eta 0:01:15
   ---------------------------------------- 0.0/2.5 GB 36.2 MB/s eta 0:01:09
   ---------------------------------------- 0.0/2.5 GB 37.3 MB/s eta 0:01:07
    --------------------------------------- 0.0/2.5 GB 38.5 MB/s eta 0:01:04
    ---------------------------

In [4]:
import torch
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))


CUDA available: True
GPU: NVIDIA GeForce GTX 1080


In [8]:
import gymnasium as gym
import numpy as np
import cv2
import os
from stable_baselines3 import SAC
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage, VecFrameStack

SEED = 42
log_dir = "./sac_carracing_tensorboard/"
os.makedirs(log_dir, exist_ok=True)

class GrayScaleObservationWrapper(gym.ObservationWrapper):
    def __init__(self, env):
        super().__init__(env)
        self.observation_space = gym.spaces.Box(low=0, high=255, shape=(96, 96, 1), dtype=np.uint8)

    def observation(self, obs):
        gray = cv2.cvtColor(obs, cv2.COLOR_RGB2GRAY)
        return np.expand_dims(gray, axis=-1)

def make_env():
    env = gym.make("CarRacing-v3", render_mode="rgb_array", continuous=True)
    env = GrayScaleObservationWrapper(env)
    env.reset(seed=SEED)
    env.action_space.seed(SEED)
    return env

env = DummyVecEnv([make_env])
env = VecTransposeImage(env)

model = SAC(
    "CnnPolicy",
    env,
    seed=SEED,
    ent_coef="auto",
    buffer_size=50000,
    tensorboard_log=log_dir,
    verbose=1,
    device="cuda" 
)

model.learn(total_timesteps=100_000, tb_log_name="SAC_CarRacing")
model.save("sac_carracing_cuda")

Using cuda device
Logging to ./sac_carracing_tensorboard/SAC_CarRacing_1
---------------------------------
| time/              |          |
|    episodes        | 4        |
|    fps             | 18       |
|    time_elapsed    | 212      |
|    total_timesteps | 4000     |
| train/             |          |
|    actor_loss      | -17.9    |
|    critic_loss     | 0.231    |
|    ent_coef        | 0.311    |
|    ent_coef_loss   | -5.9     |
|    learning_rate   | 0.0003   |
|    n_updates       | 3899     |
---------------------------------
---------------------------------
| time/              |          |
|    episodes        | 8        |
|    fps             | 18       |
|    time_elapsed    | 425      |
|    total_timesteps | 8000     |
| train/             |          |
|    actor_loss      | -19.6    |
|    critic_loss     | 0.144    |
|    ent_coef        | 0.0935   |
|    ent_coef_loss   | -12      |
|    learning_rate   | 0.0003   |
|    n_updates       | 7899     |
---------

In [1]:
import gymnasium as gym
import numpy as np
import cv2
import time
from stable_baselines3 import SAC
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage, VecFrameStack

class GrayScaleObservationWrapper(gym.ObservationWrapper):
    def __init__(self, env):
        super().__init__(env)
        self.observation_space = gym.spaces.Box(low=0, high=255, shape=(96, 96, 1), dtype=np.uint8)

    def observation(self, obs):
        gray = cv2.cvtColor(obs, cv2.COLOR_RGB2GRAY)
        return np.expand_dims(gray, axis=-1)

def make_env():
    env = gym.make("CarRacing-v3", render_mode="human", continuous=True)
    env = GrayScaleObservationWrapper(env)
    return env

env = DummyVecEnv([make_env])
env = VecTransposeImage(env)
env = VecFrameStack(env, n_stack=4, channels_order='first')  
model = SAC.load("sac_carracing_cuda", env=env, device="cuda")

n_episodes = 5
for ep in range(n_episodes):
    obs = env.reset()
    done = False
    total_reward = 0

    while not done:
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, _ = env.step(action)
        total_reward += reward[0] 
        
    print(f"Episode {ep + 1} reward: {total_reward:.2f}")


Episode 1 reward: 6.01
Episode 2 reward: -29.29



KeyboardInterrupt

