### 数据迁移

In [18]:
import torch
from torch import nn
import numpy as np

![](./gpu.jpg)


**使用方法**

- data.to('cpu')

- module.to('cuda')

    to函数：转换数据类型/设备
    
- 区别

    张量的to函数不执行in_place        module执行in_place操作

In [2]:
x = torch.ones((3, 3))
x.to(torch.float64)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)

In [3]:
x = torch.ones((3, 3))
x.to('cuda')

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], device='cuda:0')

In [5]:
linear=nn.Linear(2,2)
linear.to(torch.float64)

Linear(in_features=2, out_features=2, bias=True)

In [6]:
gpu = torch.device('cuda')
linear.to(gpu)

Linear(in_features=2, out_features=2, bias=True)

In [8]:
# ========================== tensor to cuda

x_cpu = torch.ones((3, 3))
print("x_cpu:\ndevice: {} is_cuda: {} id: {}".format(x_cpu.device, x_cpu.is_cuda, id(x_cpu)))

x_gpu = x_cpu.to('cuda')
print("x_gpu:\ndevice: {} is_cuda: {} id: {}".format(x_gpu.device, x_gpu.is_cuda, id(x_gpu)))

x_cpu:
device: cpu is_cuda: False id: 2890607536888
x_gpu:
device: cuda:0 is_cuda: True id: 2890607536024


In [10]:
# ========================== module to cuda
net = nn.Sequential(nn.Linear(3, 3))

print("\nid:{} is_cuda: {}".format(id(net), next(net.parameters()).is_cuda))

net.to('cuda')
print("\nid:{} is_cuda: {}".format(id(net), next(net.parameters()).is_cuda))


id:2890607416712 is_cuda: False

id:2890607416712 is_cuda: True


## torch.cuda的常用方法

    torch.cuda.device_count()
    
- 查看可用的GPU数量

    torch.cuda.get_device_name()
    
- 获取GPU名称

In [12]:
# ========================== 查看当前gpu 序号，尝试修改可见gpu，以及主gpu
current_device = torch.cuda.current_device()
print("current_device: ", current_device)

torch.cuda.set_device(0)
current_device = torch.cuda.current_device()
print("current_device: ", current_device)

#
cap = torch.cuda.get_device_capability(device=None)
print(cap)
#
name = torch.cuda.get_device_name()
print(name)

is_available = torch.cuda.is_available()
print(is_available)

current_device:  0
current_device:  0
(6, 1)
GeForce GTX 1050
True


In [13]:
# ===================== seed
seed = 2
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

current_seed = torch.cuda.initial_seed()
print(current_seed)

s = torch.cuda.seed()
s_all = torch.cuda.seed_all()

2


In [14]:
# ========================== 查看 gpu数量/名称
device_count = torch.cuda.device_count()
print("\ndevice_count: {}".format(device_count))

device_name = torch.cuda.get_device_name(0)
print("\ndevice_name: {}".format(device_name))


device_count: 1

device_name: GeForce GTX 1050


In [16]:
# ============================ 手动选择gpu
import os
gpu_list = [0]
gpu_list_str = ','.join(map(str, gpu_list))
os.environ.setdefault("CUDA_VISIBLE_DEVICES", gpu_list_str) 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [19]:
# ============================ 依内存情况自动选择主gpu
def get_gpu_memory():
    import platform
    if 'Windows' != platform.system():
        import os
        os.system('nvidia-smi -q -d Memory | grep -A4 GPU | grep Free > tmp.txt')
        memory_gpu = [int(x.split()[2]) for x in open('tmp.txt', 'r').readlines()]
        os.system('rm tmp.txt')
    else:
        memory_gpu = False
        print("显存计算功能暂不支持windows操作系统")
    return memory_gpu


gpu_memory = get_gpu_memory()
if not gpu_memory:
    print("\ngpu free memory: {}".format(gpu_memory))
    gpu_list = np.argsort(gpu_memory)[::-1]

    gpu_list_str = ','.join(map(str, gpu_list))
    os.environ.setdefault("CUDA_VISIBLE_DEVICES", gpu_list_str)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

显存计算功能暂不支持windows操作系统

gpu free memory: False


## 多GPU的分发并行机制

    torch.nn.DataParallel(module,device_ids,output_device,dim)

- module

    需要包装分发的模型

- device_ids

    可分发的GPU

- output_device

    结果输出设备

In [20]:
class FooNet(nn.Module):
    def __init__(self, neural_num, layers=3):
        super(FooNet, self).__init__()
        self.linears = nn.ModuleList([nn.Linear(neural_num, neural_num, bias=False) for i in range(layers)])

    def forward(self, x):

        print("\nbatch size in forward: {}".format(x.size()[0]))

        for (i, linear) in enumerate(self.linears):
            x = linear(x)
            x = torch.relu(x)
        return x

In [21]:
batch_size = 16

# data
inputs = torch.randn(batch_size, 3)
labels = torch.randn(batch_size, 3)

inputs, labels = inputs.to(device), labels.to(device)

# model
net = FooNet(neural_num=3, layers=3)  # 使用了并行机制
net = nn.DataParallel(net)
net.to(device)

# training
for epoch in range(1):

    outputs = net(inputs)

    print("model outputs.size: {}".format(outputs.size()))

print("CUDA_VISIBLE_DEVICES :{}".format(os.environ["CUDA_VISIBLE_DEVICES"]))
print("device_count :{}".format(torch.cuda.device_count()))


batch size in forward: 16
model outputs.size: torch.Size([16, 3])
CUDA_VISIBLE_DEVICES :0
device_count :1
