## GPU的使用
这一节将介绍一下GPU如何在训练中发挥作用，同时将介绍一些在训练时的注意点。

### 1.第一步还是导入必要的库和数据，同时对数据进行处理。

In [1]:
import torch
from torchvision import datasets
from torch import nn
import torchvision
from torchvision.transforms import transforms
import torch.optim as optim
import torch.nn.functional as F

transform = transforms.Compose([
    transforms.Resize(227),
#     transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
     

])

transform_test = transforms.Compose([
    transforms.Resize(227),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = datasets.CIFAR10(root='./data', train=True,
                          download=False, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32,
                                          shuffle=True)


testset = datasets.CIFAR10(root='./data', train=False,
                         download=False, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=32,
                                         shuffle=False)

In [3]:
class AlexNet(nn.Module):
    def __init__(self, num_classes=10):
        super(AlexNet, self).__init__()
        self.conv1 = nn.Conv2d(3,96,11,4)
        self.conv2 = nn.Conv2d(96,256,5,padding=2,groups=2)
        self.conv3 = nn.Conv2d(256,384,3,padding=1)
        self.conv4 = nn.Conv2d(384,384,3,padding=1, groups=2)
        self.conv5 = nn.Conv2d(384,256,3,padding=1, groups=2)
        self.fc1 = nn.Linear(256*6*6,4096)
        self.fc2 = nn.Linear(4096,4096)
        self.fc3 = nn.Linear(4096,num_classes)
 
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)),(2,2))
        x = F.max_pool2d(F.relu(self.conv2(x)),(2,2))
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = F.max_pool2d(F.relu(self.conv5(x)),(2,2))
        x = x.view(x.size(0),256*6*6)
        x = F.dropout(F.relu(self.fc1(x)),p=0.5)
        x = F.dropout(F.relu(self.fc2(x)),p=0.5)
        x = self.fc3(x)
        return x
 
 
 
 
device = torch.device("cuda")
print(device)
 
net = AlexNet()
net.to(device)
 
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
 
for epoch in range(20):
    running_loss = 0.0
    train_acc = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, lables = data
        inputs = inputs.to(device)
        lables = lables.to(device)
        
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, lables)
        loss.backward()
        optimizer.step()
 
        running_loss += loss.item()
    
        _, pred = outputs.max(1)
        num_correct = (pred == lables).sum().item()
        acc = num_correct / data[0].shape[0]
        train_acc += acc
        
        if i % 200 == 199:
            print('Epochs:%d, Batch: %d ,loss: %.3f, Avg_acc %.3f' % (epoch + 1, i + 1, running_loss / 200, train_acc / 200))
            running_loss = 0.0
            train_acc = 0.0


cuda
Epochs:1, Batch: 200 ,loss: 2.302, Avg_acc 0.101
Epochs:1, Batch: 400 ,loss: 2.302, Avg_acc 0.109
Epochs:1, Batch: 600 ,loss: 2.300, Avg_acc 0.112
Epochs:1, Batch: 800 ,loss: 2.292, Avg_acc 0.140
Epochs:1, Batch: 1000 ,loss: 2.242, Avg_acc 0.162
Epochs:1, Batch: 1200 ,loss: 2.128, Avg_acc 0.237
Epochs:1, Batch: 1400 ,loss: 2.041, Avg_acc 0.255
Epochs:2, Batch: 200 ,loss: 1.951, Avg_acc 0.280
Epochs:2, Batch: 400 ,loss: 1.895, Avg_acc 0.307
Epochs:2, Batch: 600 ,loss: 1.795, Avg_acc 0.348
Epochs:2, Batch: 800 ,loss: 1.737, Avg_acc 0.362


KeyboardInterrupt: 

In [1]:
import torch
from torchvision import datasets
from torch import nn
import torchvision
from torchvision.transforms import transforms
import torch.optim as optim
import torch.nn.functional as F

transform = transforms.Compose([
    transforms.Resize(227),
#     transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
     

])

transform_test = transforms.Compose([
    transforms.Resize(227),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = datasets.CIFAR10(root='./data', train=True,
                          download=False, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32,
                                          shuffle=True)


testset = datasets.CIFAR10(root='./data', train=False,
                         download=False, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=32,
                                         shuffle=False)

In [2]:
# 需要转成227
class AlexNet(nn.Module):
    def __init__(self, num_classes=10):
        super(AlexNet, self).__init__()
        self.conv1 = nn.Conv2d(3,96,11,4)
        self.conv2 = nn.Conv2d(96,256,5,padding=2,groups=2)
        self.conv3 = nn.Conv2d(256,384,3,padding=1)
        self.conv4 = nn.Conv2d(384,384,3,padding=1, groups=2)
        self.conv5 = nn.Conv2d(384,256,3,padding=1, groups=2)
        self.fc1 = nn.Linear(256*6*6,4096)
        self.fc2 = nn.Linear(4096,4096)
        self.fc3 = nn.Linear(4096,num_classes)
 
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)),(2,2))
        x = F.max_pool2d(F.relu(self.conv2(x)),(2,2))
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = F.max_pool2d(F.relu(self.conv5(x)),(2,2))
        x = x.view(x.size(0),256*6*6)
        x = F.dropout(F.relu(self.fc1(x)),p=0.5)
        x = F.dropout(F.relu(self.fc2(x)),p=0.5)
        x = self.fc3(x)
        return x

device = torch.device("cuda")
print(device)
 
net = AlexNet()
net.to(device)
 
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
 
for epoch in range(20):
    running_loss = 0.0
    train_acc = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, lables = data
        inputs = inputs.to(device)
        lables = lables.to(device)
        
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, lables)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        _, pred = outputs.max(1)
        num_correct = (pred == lables).sum().item()
        acc = num_correct / data[0].shape[0]
        train_acc += acc
        
        if i % 200 == 199:
            print('Epochs:%d, Batch: %d ,loss: %.3f, Avg_acc %.3f' % (epoch , i + 1, running_loss / 200, train_acc / 200))
            running_loss = 0.0
            train_acc = 0.0

cuda


KeyboardInterrupt: 

In [3]:
# 使用AlexNet进行训练
# AlexNet默认的参数太大了如果你不想等的时间太长可以将下面这句解除注释，把model = torchvision.models.alexnet(num_classes=10)注释掉
# model = AlexNet()
model = torchvision.models.alexnet(num_classes=10)
# model = lenet
criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
optimizer = optim.Adam(model.parameters(), lr=0.001)
model = model.to("cuda")
print(model)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [8]:
# 在训练过程中一般一个轮次训练结束后对测试集整体进行测试，获取测试集上的损失和准确率
losses = []
acces = []
eval_losses = []
eval_acces = []
best_acc = 0

def train(epochs):
    train_loss = 0
    train_acc = 0
    model.train() 
    for index, (data, target) in enumerate(trainloader):
        data = data.to("cuda")
        target = target.to("cuda")
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        # 统计一些数据
        train_loss += loss.item()
        _, pred = output.max(1)
        # 计算一个批次对了几个
        num_correct = (pred == target).sum().item()
        # 计算准确率=对的个数/批次大小
        acc = num_correct / data.shape[0]
        train_acc += acc
        
        if index % 200 == 0:
            print("Epochs:{0:}  |  Batch:{1:}  |  Avg_acc:{2:.3f}  |  Avg_Loss:{3:.3f}".format(epochs, index, train_acc / (index + 1), train_loss / (index + 1)))
    
def test(epochs):
    global best_acc
    eval_loss = 0
    eval_acc = 0
    model.eval() # 将模型改为预测模式
    with torch.no_grad():
        for index, (im, label) in enumerate(testloader):
            im = im.to("cuda")
            label = label.to("cuda")
            out = model(im)
            loss = criterion(out, label)
            # 记录误差
            eval_loss += loss.item()
            # 记录准确率
            _, pred = out.max(1)
            num_correct = (pred == label).sum().item()
            acc = num_correct / im.shape[0]
            eval_acc += acc
            avg_acc = eval_acc / (index + 1)
        print("Test:Epochs:{0:}  |  Batch:{1:}  |  Avg_acc:{2:.3f}  |  Avg_Loss:{3:.3f}".format(epochs, index, avg_acc, eval_loss / (index + 1)))
        if eval_acc / (index + 1) > best_acc:
            print("New Acc: %.3f,Saving Model..." % eval_acc / (index + 1), )
            torch.save(model, './model/model.pth')
            best_acc = eval_acc / (index + 1)
        
for i in range(10):
    train(i)
    test(i)

Epochs:0  |  Batch:0  |  Avg_acc:0.531  |  Avg_Loss:1.528
Epochs:0  |  Batch:200  |  Avg_acc:0.534  |  Avg_Loss:1.308
Epochs:0  |  Batch:400  |  Avg_acc:0.538  |  Avg_Loss:1.299
Epochs:0  |  Batch:600  |  Avg_acc:0.538  |  Avg_Loss:1.296
Epochs:0  |  Batch:800  |  Avg_acc:0.535  |  Avg_Loss:1.304
Epochs:0  |  Batch:1000  |  Avg_acc:0.536  |  Avg_Loss:1.301
Epochs:0  |  Batch:1200  |  Avg_acc:0.537  |  Avg_Loss:1.295
Epochs:0  |  Batch:1400  |  Avg_acc:0.539  |  Avg_Loss:1.291
New Acc:0.5691892971246006,Saving Model...
Test:Epochs:0  |  Batch:312  |  Avg_acc:0.569  |  Avg_Loss:1.209
Epochs:1  |  Batch:0  |  Avg_acc:0.562  |  Avg_Loss:1.115
Epochs:1  |  Batch:200  |  Avg_acc:0.557  |  Avg_Loss:1.232
Epochs:1  |  Batch:400  |  Avg_acc:0.551  |  Avg_Loss:1.259
Epochs:1  |  Batch:600  |  Avg_acc:0.552  |  Avg_Loss:1.262
Epochs:1  |  Batch:800  |  Avg_acc:0.550  |  Avg_Loss:1.265


KeyboardInterrupt: 