In [2]:
!amd-smi

Permission needed to access required GPU device node(s):
  - /dev/dri/card0: Permission denied; owner=root(0):video(44);
  - /dev/dri/card1: Permission denied; owner=root(0):video(44);
  - /dev/dri/card2: Permission denied; owner=root(0):video(44);
  - /dev/dri/card3: Permission denied; owner=root(0):video(44);
  - /dev/dri/card4: Permission denied; owner=root(0):video(44);
  - /dev/dri/card5: Permission denied; owner=root(0):video(44);
  - /dev/dri/card6: Permission denied; owner=root(0):video(44);
  - /dev/dri/card7: Permission denied; owner=root(0):video(44);
  - /dev/dri/card8: Permission denied; owner=root(0):video(44);

You can try:
  • Add your user to the group that owns these devices:
      sudo usermod -aG <group> "$USER"

+------------------------------------------------------------------------------+
| AMD-SMI 26.2.0+021c61fc      amdgpu version: 6.16.6   ROCm version: 7.1.1    |
| VBIOS version: 00162356                                                      |
| Platform: Li

In [3]:
import torch
if torch.cuda.is_available():
    print("Nvidia GPU detected!")
    gpu_count = torch.cuda.device_count()
    print(f"Total visible GPUs: {gpu_count}")

    for i in range(gpu_count):
        props = torch.cuda.get_device_properties(i)
        print(f"--- GPU {i} ---")
        print(f"Name: {props.name}")
        print(f"Total memory: {props.total_memory / (1024**2):.0f} MB")
        # Add more properties if needed
else:
    print("No Nvidia GPU visible to PyTorch, or using CPU only.")

Nvidia GPU detected!
Total visible GPUs: 1
--- GPU 0 ---
Name: AMD Radeon PRO W7900D
Total memory: 49136 MB


In [5]:
import torch.nn as nn 

In [6]:
class BasicBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += self.shortcut(x)
        out = self.relu(out)
        return out


class ResNet18(nn.Module):
    def __init__(self, num_classes=10):
        super(ResNet18, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        
        self.layer1 = self._make_layer(BasicBlock, 64, 2, stride=1)
        self.layer2 = self._make_layer(BasicBlock, 128, 2, stride=2)
        self.layer3 = self._make_layer(BasicBlock, 256, 2, stride=2)
        self.layer4 = self._make_layer(BasicBlock, 512, 2, stride=2)
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, num_classes)

    def _make_layer(self, block, out_channels, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        
        out = self.avgpool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = ResNet18(num_classes = 10)
model = model.to(device)

In [9]:
sample = torch.rand(16,3,64,64)

In [11]:
!amd-smi

Permission needed to access required GPU device node(s):
  - /dev/dri/card0: Permission denied; owner=root(0):video(44);
  - /dev/dri/card1: Permission denied; owner=root(0):video(44);
  - /dev/dri/card2: Permission denied; owner=root(0):video(44);
  - /dev/dri/card3: Permission denied; owner=root(0):video(44);
  - /dev/dri/card4: Permission denied; owner=root(0):video(44);
  - /dev/dri/card5: Permission denied; owner=root(0):video(44);
  - /dev/dri/card6: Permission denied; owner=root(0):video(44);
  - /dev/dri/card7: Permission denied; owner=root(0):video(44);
  - /dev/dri/card8: Permission denied; owner=root(0):video(44);

You can try:
  • Add your user to the group that owns these devices:
      sudo usermod -aG <group> "$USER"

+------------------------------------------------------------------------------+
| AMD-SMI 26.2.0+021c61fc      amdgpu version: 6.16.6   ROCm version: 7.1.1    |
| VBIOS version: 00162356                                                      |
| Platform: Li

In [12]:
!rocm-smi




Device  Node  IDs              Temp    Power  Partitions          SCLK  MCLK   Fan    Perf  PwrCap  VRAM%  GPU%  
[3m              (DID,     GUID)  (Edge)  (Avg)  (Mem, Compute, ID)                                                 [0m
0       2     0x744b,   6853   23.0°C  15.0W  N/A, N/A, 0         0Mhz  96Mhz  20.0%  auto  241.0W  0%     0%    
1       3     0x744b,   49884  24.0°C  14.0W  N/A, N/A, 0         0Mhz  96Mhz  20.0%  auto  241.0W  0%     0%    
2       4     0x744b,   60148  23.0°C  14.0W  N/A, N/A, 0         0Mhz  96Mhz  20.0%  auto  241.0W  0%     0%    
3       5     0x744b,   13037  24.0°C  12.0W  N/A, N/A, 0         0Mhz  96Mhz  20.0%  auto  241.0W  0%     0%    
4       6     0x744b,   47780  22.0°C  12.0W  N/A, N/A, 0         0Mhz  96Mhz  20.0%  auto  241.0W  0%     0%    
5       7     0x744b,   25277  22.0°C  9.0W   N/A, N/A, 0         0Mhz  96Mhz  20.0%  auto  241.0W  0%     0%    
6       8     0x744b,   19093  23.0°C  9.0W   N/A, N/A, 0         0Mhz  96Mhz

In [14]:
sample = sample.to(device)
output = model(sample)

In [None]:
sample

In [15]:
!amd-smi
!rocm-smi

Permission needed to access required GPU device node(s):
  - /dev/dri/card0: Permission denied; owner=root(0):video(44);
  - /dev/dri/card1: Permission denied; owner=root(0):video(44);
  - /dev/dri/card2: Permission denied; owner=root(0):video(44);
  - /dev/dri/card3: Permission denied; owner=root(0):video(44);
  - /dev/dri/card4: Permission denied; owner=root(0):video(44);
  - /dev/dri/card5: Permission denied; owner=root(0):video(44);
  - /dev/dri/card6: Permission denied; owner=root(0):video(44);
  - /dev/dri/card7: Permission denied; owner=root(0):video(44);
  - /dev/dri/card8: Permission denied; owner=root(0):video(44);

You can try:
  • Add your user to the group that owns these devices:
      sudo usermod -aG <group> "$USER"

+------------------------------------------------------------------------------+
| AMD-SMI 26.2.0+021c61fc      amdgpu version: 6.16.6   ROCm version: 7.1.1    |
| VBIOS version: 00162356                                                      |
| Platform: Li

In [20]:
import torch

def bytes_to_mb(x):
    return x / (1024 ** 2)

if torch.cuda.is_available():
    print("✅ Nvidia GPU detected!")
    print(f"PyTorch CUDA version: {torch.version.cuda}")
    print(f"CUDA runtime available: {torch.cuda.is_initialized()}")
    print("=" * 50)

    gpu_count = torch.cuda.device_count()
    print(f"Total visible GPUs: {gpu_count}\n")

    for i in range(gpu_count):
        props = torch.cuda.get_device_properties(i)

        print(f"--- GPU {i} ---")
        print(f"Name: {props.name}")
        print(f"Compute capability: {props.major}.{props.minor}")
        print(f"Total memory: {bytes_to_mb(props.total_memory):.0f} MB")

        # Architecture details
        print(f"Multiprocessors (SMs): {props.multi_processor_count}")
        print(f"Max threads per SM: {props.max_threads_per_multi_processor}")
        # print(f"Max threads per block: {props.max_threads_per_block}")
        print(f"Warp size: {props.warp_size}")

        # Clock rates
        # print(f"GPU clock rate: {props.clock_rate / 1000:.1f} MHz")
        # print(f"Memory clock rate: {props.memory_clock_rate / 1000:.1f} MHz")

        # Memory system
        # print(f"Memory bus width: {props.memory_bus_width} bits")
        # print(f"L2 cache size: {props.l2_cache_size / 1024:.0f} KB")

        # Current memory usage (PyTorch-side)
        torch.cuda.set_device(i)
        print(f"Memory allocated: {bytes_to_mb(torch.cuda.memory_allocated(i)):.1f} MB")
        print(f"Memory reserved : {bytes_to_mb(torch.cuda.memory_reserved(i)):.1f} MB")

        print()

else:
    print("❌ No Nvidia GPU visible to PyTorch (CPU-only mode).")


✅ Nvidia GPU detected!
PyTorch CUDA version: None
CUDA runtime available: True
Total visible GPUs: 1

--- GPU 0 ---
Name: AMD Radeon PRO W7900D
Compute capability: 11.0
Total memory: 49136 MB
Multiprocessors (SMs): 48
Max threads per SM: 2048
Warp size: 32
Memory allocated: 406.5 MB
Memory reserved : 414.0 MB



In [21]:
import torch

def bytes_to_mb(x):
    return x / (1024 ** 2)

if torch.cuda.is_available():
    print("✅ Nvidia GPU detected!")
    print(f"PyTorch CUDA version: {torch.version.cuda}")
    print(f"CUDA runtime available: {torch.cuda.is_initialized()}")
    print("=" * 50)

    gpu_count = torch.cuda.device_count()
    print(f"Total visible GPUs: {gpu_count}\n")

    for i in range(gpu_count):
        props = torch.cuda.get_device_properties(i)

        print(f"--- GPU {i} ---")
        print(f"Name: {props.name}")
        print(f"Compute capability: {props.major}.{props.minor}")
        print(f"Total memory: {bytes_to_mb(props.total_memory):.0f} MB")

        # Architecture details
        print(f"Multiprocessors (SMs): {props.multi_processor_count}")
        print(f"Max threads per SM: {props.max_threads_per_multi_processor}")
        # print(f"Max threads per block: {props.max_threads_per_block}")
        print(f"Warp size: {props.warp_size}")

        # Clock rates
        # print(f"GPU clock rate: {props.clock_rate / 1000:.1f} MHz")
        # print(f"Memory clock rate: {props.memory_clock_rate / 1000:.1f} MHz")

        # Memory system
        # print(f"Memory bus width: {props.memory_bus_width} bits")
        # print(f"L2 cache size: {props.l2_cache_size / 1024:.0f} KB")

        # Current memory usage (PyTorch-side)
        torch.cuda.set_device(i)
        print(f"Memory allocated: {bytes_to_mb(torch.cuda.memory_allocated(i)):.1f} MB")
        print(f"Memory reserved : {bytes_to_mb(torch.cuda.memory_reserved(i)):.1f} MB")

        print()

else:
    print("❌ No Nvidia GPU visible to PyTorch (CPU-only mode).")


✅ Nvidia GPU detected!
PyTorch CUDA version: None
CUDA runtime available: True
Total visible GPUs: 1

--- GPU 0 ---
Name: AMD Radeon PRO W7900D
Compute capability: 11.0
Total memory: 49136 MB
Multiprocessors (SMs): 48
Max threads per SM: 2048
Warp size: 32
Memory allocated: 406.5 MB
Memory reserved : 414.0 MB



In [1]:
!amd-smi

Permission needed to access required GPU device node(s):
  - /dev/dri/card0: Permission denied; owner=root(0):video(44);
  - /dev/dri/card1: Permission denied; owner=root(0):video(44);
  - /dev/dri/card2: Permission denied; owner=root(0):video(44);
  - /dev/dri/card3: Permission denied; owner=root(0):video(44);
  - /dev/dri/card4: Permission denied; owner=root(0):video(44);
  - /dev/dri/card5: Permission denied; owner=root(0):video(44);
  - /dev/dri/card6: Permission denied; owner=root(0):video(44);
  - /dev/dri/card7: Permission denied; owner=root(0):video(44);
  - /dev/dri/card8: Permission denied; owner=root(0):video(44);

You can try:
  • Add your user to the group that owns these devices:
      sudo usermod -aG <group> "$USER"

+------------------------------------------------------------------------------+
| AMD-SMI 26.2.0+021c61fc      amdgpu version: 6.16.6   ROCm version: 7.1.1    |
| VBIOS version: 00162356                                                      |
| Platform: Li

In [2]:
!python --version

Python 3.10.18


In [10]:
!python unifolm-world-model-action/scripts/evaluation/world_model_interaction.py \
        --seed $123 \
        --ckpt_path "./ASC26-Embodied-World-Model-Optimization/ckpts/unifolm_wma_dual.ckpt" \
        --config "./unifolm-world-model-action/configs/inference/world_model_interaction.yaml" \
        --savedir "./results/run/testing/unitree_z1_stackbox" \
        --bs 1 --height 320 --width 512 \
        --unconditional_guidance_scale 1.0 \
        --ddim_steps 50 \
        --ddim_eta 1.0 \
        --prompt_dir "./unifolm-world-model-action/examples/world_model_interaction_prompts" \
        --dataset "unitree_z1_stackbox" \
        --video_length 16 \
        --frame_stride 4 \
        --n_action_steps 16 \
        --exe_steps 16 \
        --n_iter 12 \
        --timestep_spacing 'uniform_trailing' \
        --guidance_rescale 0.7 \
        --perframe_ae

2026-02-08 15:02:51.832581: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2026-02-08 15:02:51.866187: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2026-02-08 15:02:51.866219: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2026-02-08 15:02:51.867870: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2026-02-08 15:02:51.875924: I tensorflow/core/platform/cpu_feature_guar