In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
# cifar-10官方提供的数据集是用numpy array存储的
# 下面这个transform会把numpy array变成torch tensor，然后把rgb值归一到[0, 1]这个区间
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# 在构建数据集的时候指定transform，就会应用我们定义好的transform
# root是存储数据的文件夹，download=True指定如果数据不存在先下载数据
cifar_train = torchvision.datasets.CIFAR10(root='./data', train=True,
                                           download=True, transform=transform)
cifar_test = torchvision.datasets.CIFAR10(root='./data', train=False,
                                          transform=transform)

Files already downloaded and verified


In [2]:
print(cifar_train)
print(cifar_test)

Dataset CIFAR10
    Number of datapoints: 50000
    Split: train
    Root Location: ./data
    Transforms (if any): Compose(
                             ToTensor()
                             Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
                         )
    Target Transforms (if any): None
Dataset CIFAR10
    Number of datapoints: 10000
    Split: test
    Root Location: ./data
    Transforms (if any): Compose(
                             ToTensor()
                             Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
                         )
    Target Transforms (if any): None


In [3]:
trainloader = torch.utils.data.DataLoader(cifar_train, batch_size=32, shuffle=True)
testloader = torch.utils.data.DataLoader(cifar_test, batch_size=32, shuffle=True)

In [4]:
class LeNet(nn.Module):
    # 一般在__init__中定义网络需要的操作算子，比如卷积、全连接算子等等
    def __init__(self):
        super(LeNet, self).__init__()
        # Conv2d的第一个参数是输入的channel数量，第二个是输出的channel数量，第三个是kernel size
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        # 由于上一层有16个channel输出，每个feature map大小为5*5，所以全连接层的输入是16*5*5
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        # 最终有10类，所以最后一个全连接层输出数量是10
        self.fc3 = nn.Linear(84, 10)
        self.pool = nn.MaxPool2d(2, 2)
    # forward这个函数定义了前向传播的运算，只需要像写普通的python算数运算那样就可以了
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        # 下面这步把二维特征图变为一维，这样全连接层才能处理
        x = x.view(-1, 16*5*5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

## start GPU

In [5]:
device = torch.device("cuda:0")
net = LeNet().to(device)

### Loss function SGD

In [6]:
# optim中定义了各种各样的优化方法，包括SGD
import torch.optim as optim

# CrossEntropyLoss就是我们需要的损失函数
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [7]:
print(net)

LeNet(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)


In [9]:
print("Start Training...")
for epoch in range(30):
    # 我们用一个变量来记录每100个batch的平均loss
    loss100 = 0.0
    # 我们的dataloader派上了用场
    for i, data in enumerate(trainloader):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device) # 注意需要复制到GPU
        optimizer.zero_grad()
        
        outputs = net(inputs)
        print(outputs.shape)
        # print(labels)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        loss100 += loss.item()
        break
        if i % 100 == 99:
            print('[Epoch %d, Batch %5d] loss: %.3f' %
                  (epoch + 1, i + 1, loss100 / 100))
            loss100 = 0.0

print("Done Training!")

Start Training...
torch.Size([32, 10])
torch.Size([32, 10])
torch.Size([32, 10])
torch.Size([32, 10])
torch.Size([32, 10])
torch.Size([32, 10])
torch.Size([32, 10])
torch.Size([32, 10])
torch.Size([32, 10])
torch.Size([32, 10])
torch.Size([32, 10])
torch.Size([32, 10])
torch.Size([32, 10])
torch.Size([32, 10])
torch.Size([32, 10])
torch.Size([32, 10])
torch.Size([32, 10])
torch.Size([32, 10])
torch.Size([32, 10])
torch.Size([32, 10])
torch.Size([32, 10])
torch.Size([32, 10])
torch.Size([32, 10])
torch.Size([32, 10])
torch.Size([32, 10])
torch.Size([32, 10])
torch.Size([32, 10])
torch.Size([32, 10])
torch.Size([32, 10])
torch.Size([32, 10])
Done Training!


In [9]:
class VGG16(nn.Module):
    def __init__(self):
        super(VGG16, self).__init__()
        # conv layers: (in_channel size, out_channels size, kernel_size, stride, padding)
        self.conv1_1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.conv1_2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)

        self.conv2_1 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, padding=1)

        self.conv3_1 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.conv3_3 = nn.Conv2d(256, 256, kernel_size=3, padding=1)

        self.conv4_1 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
        self.conv4_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.conv4_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1)

        self.conv5_1 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.conv5_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1)

        # max pooling (kernel_size, stride)
        self.pool = nn.MaxPool2d(2, 2)

        # fully conected layers:
        self.fc6 = nn.Linear(16384, 4096)
        self.fc7 = nn.Linear(4096, 4096)
        self.fc8 = nn.Linear(4096, 1000)
        self.fc9 = nn.Linear(1000, 100)
        self.fc10 = nn.Linear(100, 10)

        
        
    def forward(self, x):
        x = F.relu(self.conv1_1(x))
        x = F.relu(self.conv1_2(x))
        x = self.pool(x)
        x = F.relu(self.conv2_1(x))
        x = F.relu(self.conv2_2(x))
        x = self.pool(x)
        x = F.relu(self.conv3_1(x))
        x = F.relu(self.conv3_2(x))
        x = F.relu(self.conv3_3(x))
        x = self.pool(x)
        x = F.relu(self.conv4_1(x))
        x = F.relu(self.conv4_2(x))
        x = F.relu(self.conv4_3(x))
        x = self.pool(x)
        x = F.relu(self.conv5_1(x))
        x = F.relu(self.conv5_2(x))
        x = F.relu(self.conv5_3(x))
        x = self.pool(x)
        x = x.view(-1, 16384)
        x = F.relu(self.fc6(x))
        x = F.dropout(x, 0.5)
        x = F.relu(self.fc7(x))
        x = F.dropout(x, 0.5)
        x = self.fc8(x)
        x = F.dropout(x, 0.5)
        x = self.fc9(x)
        x = F.dropout(x, 0.5)
        x = self.fc10(x)

        
        return x


In [10]:
trainloader = torch.utils.data.DataLoader(cifar_train, batch_size=32, shuffle=True)
testloader = torch.utils.data.DataLoader(cifar_test, batch_size=32, shuffle=True)

In [11]:
device = torch.device("cuda:0")
net = VGG16().to(device)

In [12]:
# CrossEntropyLoss就是我们需要的损失函数
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [17]:
print("Start Training...")
for epoch in range(30):
    # 我们用一个变量来记录每100个batch的平均loss
    loss100 = 0.0
    # 我们的dataloader派上了用场
    for i, data in enumerate(trainloader):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device) # 注意需要复制到GPU
        optimizer.zero_grad()
        
        outputs = net(inputs)
        # print(outputs.shape)
        # print(labels)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        loss100 += loss.item()
        if i % 100 == 99:
            print('[Epoch %d, Batch %5d] loss: %.3f' %
                  (epoch + 1, i + 1, loss100 / 100))
            loss100 = 0.0

print("Done Training!")

Start Training...
[Epoch 1, Batch   100] loss: 0.294
[Epoch 1, Batch   200] loss: 0.259
[Epoch 1, Batch   300] loss: 0.283
[Epoch 1, Batch   400] loss: 0.299
[Epoch 1, Batch   500] loss: 0.304
[Epoch 1, Batch   600] loss: 0.304
[Epoch 1, Batch   700] loss: 0.313
[Epoch 1, Batch   800] loss: 0.328
[Epoch 1, Batch   900] loss: 0.331
[Epoch 1, Batch  1000] loss: 0.361
[Epoch 1, Batch  1100] loss: 0.355
[Epoch 1, Batch  1200] loss: 0.363
[Epoch 1, Batch  1300] loss: 0.352
[Epoch 1, Batch  1400] loss: 0.369
[Epoch 1, Batch  1500] loss: 0.384
[Epoch 2, Batch   100] loss: 0.285
[Epoch 2, Batch   200] loss: 0.292
[Epoch 2, Batch   300] loss: 0.271
[Epoch 2, Batch   400] loss: 0.299
[Epoch 2, Batch   500] loss: 0.320
[Epoch 2, Batch   600] loss: 0.316
[Epoch 2, Batch   700] loss: 0.308
[Epoch 2, Batch   800] loss: 0.302
[Epoch 2, Batch   900] loss: 0.330
[Epoch 2, Batch  1000] loss: 0.347
[Epoch 2, Batch  1100] loss: 0.341
[Epoch 2, Batch  1200] loss: 0.362
[Epoch 2, Batch  1300] loss: 0.349
[E

[Epoch 16, Batch   700] loss: 0.224
[Epoch 16, Batch   800] loss: 0.238
[Epoch 16, Batch   900] loss: 0.269
[Epoch 16, Batch  1000] loss: 0.266
[Epoch 16, Batch  1100] loss: 0.264
[Epoch 16, Batch  1200] loss: 0.262
[Epoch 16, Batch  1300] loss: 0.316
[Epoch 16, Batch  1400] loss: 0.307
[Epoch 16, Batch  1500] loss: 0.300
[Epoch 17, Batch   100] loss: 0.199
[Epoch 17, Batch   200] loss: 0.200
[Epoch 17, Batch   300] loss: 0.228
[Epoch 17, Batch   400] loss: 0.229
[Epoch 17, Batch   500] loss: 0.217
[Epoch 17, Batch   600] loss: 0.216
[Epoch 17, Batch   700] loss: 0.252
[Epoch 17, Batch   800] loss: 0.256
[Epoch 17, Batch   900] loss: 0.273
[Epoch 17, Batch  1000] loss: 0.292
[Epoch 17, Batch  1100] loss: 0.259
[Epoch 17, Batch  1200] loss: 0.263
[Epoch 17, Batch  1300] loss: 0.302
[Epoch 17, Batch  1400] loss: 0.275
[Epoch 17, Batch  1500] loss: 0.305
[Epoch 18, Batch   100] loss: 0.198
[Epoch 18, Batch   200] loss: 0.192
[Epoch 18, Batch   300] loss: 0.190
[Epoch 18, Batch   400] loss

In [19]:
# 构造测试的dataloader
dataiter = iter(testloader)
# 预测正确的数量和总数量
correct = 0
total = 0
# 使用torch.no_grad的话在前向传播中不记录梯度，节省内存
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        # 预测
        outputs = net(images)
        # 我们的网络输出的实际上是个概率分布，去最大概率的哪一项作为预测分类
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 59 %
