# GPU的使用

In [None]:
!nvidia-smi

In [2]:
import torch
from torch import nn
# 使用CPU、使用第一个GPU、使用第二个GPU
torch.device('cpu'),torch.cuda.device('cuda'),torch.cuda.device('cuda:1')

(device(type='cpu'),
 <torch.cuda.device at 0x27d6c95fa00>,
 <torch.cuda.device at 0x27d6c2be2e0>)

In [3]:
# 看GPU数量
torch.cuda.device_count()

1

In [11]:
def try_gpu(i=0):
    '''
    取出第i个GPU
    :param i:需要使用的GPU
    :return:如果有GPU则返回gpu(i)，否则返回cpu()
    '''
    # GPU从0开始
    if torch.cuda.device_count() >= i+1:
        return torch.device(f'cuda:{i}')
    return torch.device('cpu')

def try_all_gpus():  #@save
    """返回所有可用的GPU，如果没有GPU，则返回[cpu(),]"""
    devices = [torch.device(f'cuda:{i}')
             for i in range(torch.cuda.device_count())]
    return devices if devices else [torch.device('cpu')]

try_gpu(), try_gpu(10), try_all_gpus()

(device(type='cuda', index=0),
 device(type='cpu'),
 [device(type='cuda', index=0)])

In [5]:
x = torch.tensor([1,2,3])
# 看创建的张量在哪里（默认在CPU）
x.device

device(type='cpu')

In [13]:
# 怎么在GPU创建tensor
X = torch.ones(2,3,device=try_gpu())
X

tensor([[1., 1., 1.],
        [1., 1., 1.]], device='cuda:0')

In [15]:
Y = torch.rand(2,3,device=try_gpu())
Y

tensor([[0.4527, 0.9779, 0.1488],
        [0.8313, 0.7587, 0.8547]], device='cuda:0')

In [17]:
# z把X移动到第二个GPU，但是X还在第1个GPU
# z = X.cuda(1)
# print(X)
# print(z)

RuntimeError: CUDA error: invalid device ordinal
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.

In [18]:
Y+X

tensor([[1.4527, 1.9779, 1.1488],
        [1.8313, 1.7587, 1.8547]], device='cuda:0')

In [19]:
net = nn.Sequential(nn.Linear(3,1))
# 把net移动到0号GPU
net = net.to(device=try_gpu())

net(X)

tensor([[0.7471],
        [0.7471]], device='cuda:0', grad_fn=<AddmmBackward0>)

In [20]:
net[0].weight.data.device

device(type='cuda', index=0)