In [3]:
# how to print nvidia-smi
import subprocess
def print_nvidia_smi():
    try:
        result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
        if result.returncode == 0:
            print(result.stdout)
        else:
            print("Error executing nvidia-smi:", result.stderr)
    except FileNotFoundError:
        print("nvidia-smi command not found. Make sure NVIDIA drivers are installed.")
if __name__ == "__main__":
    print_nvidia_smi()
    

Sat Nov 22 03:23:32 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 581.80                 Driver Version: 581.80         CUDA Version: 13.0     |
+-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 5070 ...  WDDM  |   00000000:01:00.0 Off |                  N/A |
| N/A   35C    P5             21W /   33W |       0MiB /  12227MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+

+----------------------------------------------

In [13]:
import torch

print("Torch:", torch.__version__, "| CUDA runtime:", torch.version.cuda)
print("CUDA available:", torch.cuda.is_available())
print("Device count:", torch.cuda.device_count())
if torch.cuda.is_available():
    print("Current device:", torch.cuda.current_device())
    print("Device name:", torch.cuda.get_device_name(0))


Torch: 2.9.1+cu130 | CUDA runtime: 13.0
CUDA available: True
Device count: 1
Current device: 0
Device name: NVIDIA GeForce RTX 5070 Ti Laptop GPU


In [14]:
import torch
from torch import nn

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

model = nn.Sequential(
    nn.Linear(4096, 4096),
    nn.ReLU(),
    nn.Linear(4096, 4096),
).to(device)

x = torch.randn(64, 4096, device=device)
y = torch.randn(64, 4096, device=device)

opt = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.MSELoss()

for step in range(5):
    opt.zero_grad()
    out = model(x)
    loss = loss_fn(out, y)
    loss.backward()
    opt.step()
    print(f"step {step} | loss {loss.item():.4f}")


import torch
from torch import nn

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

model = nn.Sequential(
    nn.Linear(4096, 4096),
    nn.ReLU(),
    nn.Linear(4096, 4096),
).to(device)

x = torch.randn(64, 4096, device=device)
y = torch.randn(64, 4096, device=device)

opt = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.MSELoss()

for step in range(5):
    opt.zero_grad()
    out = model(x)
    loss = loss_fn(out, y)
    loss.backward()
    opt.step()
    print(f"step {step} | loss {loss.item():.4f}")


Using device: cuda
step 0 | loss 1.0507
step 1 | loss 0.8406
step 2 | loss 0.5866
step 3 | loss 0.3403
step 4 | loss 0.1685
Using device: cuda
step 0 | loss 1.0527
step 1 | loss 0.8470
step 2 | loss 0.5899
step 3 | loss 0.3480
step 4 | loss 0.1731


In [12]:
import torch
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

transform = transforms.Compose([
    transforms.ToTensor(),
])

train_ds = datasets.CIFAR10(root="./data", train=True, download=True, transform=transform)
train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)

images, labels = next(iter(train_loader))
print("Batch:", images.shape, images.dtype, "device:", images.device)

images = images.to(device)
print("Moved to:", images.device)

Using device: cuda


100.0%


Batch: torch.Size([64, 3, 32, 32]) torch.float32 device: cpu
Moved to: cuda:0


In [15]:
import os
import torch
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

print("CWD:", os.getcwd())

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

transform = transforms.Compose([
    transforms.ToTensor(),
])

train_ds = datasets.CIFAR10(root="./data", train=True, download=True, transform=transform)
train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)

images, labels = next(iter(train_loader))
print("Batch:", images.shape, images.dtype, "device:", images.device)

images = images.to(device)
print("Moved to:", images.device)

CWD: c:\Users\ASUS\Documents\Deep Learning
Using device: cuda
Batch: torch.Size([64, 3, 32, 32]) torch.float32 device: cpu
Moved to: cuda:0
