## data parallelism

如何用GPUS 

+ put  the model on a GPU
device = torch.device('cuda:0')
model.to(device)
+ copy your to GPU
mytensor = my_tensor.to(device)

+ using GPUS
model = nn.DataParallel(model)


In [2]:
# imports and parameters

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# parameters and DataLoaders
input_size = 5
output_size = 2

batch_size = 30
data_size = 100

# Device
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Dummy DataSet

class RandomDataset(Dataset):
    
    def __init__(self, size, length):
        self.len = length
        self.data = torch.randn(length, size)
        
    def __getitem__(self, index):
        return self.data[index]
    
    def __len__(self):
        return self.len
    
rand_loader = DataLoader(dataset=RandomDataset(input_size, data_size),
                        batch_size = batch_size, shuffle = True)


In [5]:
# Simple model

#上面函数 ， 我们有了模型的输入， 经过线性操作， 得到一个输出，
#然而 ， 你可以用DataParallel
#在任何模型上（CNN， RNN, Capsule Net etc）
#放置一个输出声明在模型里去见识输入和输出tensors的大小。

class Model(nn.Module):
    # our model 
    def __init__(self, input_size, output_size):
        super(Model, self).__init__()
        self.fc = nn.Linear(input_size, output_size)
        
    def forward(self, input):
        output = self.fc(input)
        print('\t In Model: input size', input.size(),
             'output size', output.size())
        
        return output
    

In [6]:
#  创建模型和DataParallel
model = Model(input_size, output_size)
if torch.cuda.device_count() > 1:
    print('Let''s use', torch.cuda.device_count(),'GPUS')
    # dim = 0[30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUS
    model = nn.DataParallel(model)
    
model.to(device)

Model(
  (fc): Linear(in_features=5, out_features=2, bias=True)
)

In [7]:
# rnn the model 
for data in rand_loader:
    input = data.to(device)
    output = model(input)
    print('Outside: input size', input.size(),
         'output_size',output.size())

	 In Model: input size torch.Size([30, 5]) output size torch.Size([30, 2])
Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
	 In Model: input size torch.Size([30, 5]) output size torch.Size([30, 2])
Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
	 In Model: input size torch.Size([30, 5]) output size torch.Size([30, 2])
Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
	 In Model: input size torch.Size([10, 5]) output size torch.Size([10, 2])
Outside: input size torch.Size([10, 5]) output_size torch.Size([10, 2])


#### 总结
dataparallel splits your data 自动，将工作分配给多个GPUS， 当每个gpu完成工作的时候， dataparallel会收集并整合结果返回给你