In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
# cifar-10官方提供的数据集是用numpy array存储的
# 下面这个transform会把numpy array变成torch tensor，然后把rgb值归一到[0, 1]这个区间
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# 在构建数据集的时候指定transform，就会应用我们定义好的transform
# root是存储数据的文件夹，download=True指定如果数据不存在先下载数据
cifar_train = torchvision.datasets.CIFAR10(root='./data', train=True,
                                           download=True, transform=transform)
cifar_test = torchvision.datasets.CIFAR10(root='./data', train=False,
                                          transform=transform)

Files already downloaded and verified


In [2]:
print(cifar_train)
print(cifar_test)

Dataset CIFAR10
    Number of datapoints: 50000
    Split: train
    Root Location: ./data
    Transforms (if any): Compose(
                             ToTensor()
                             Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
                         )
    Target Transforms (if any): None
Dataset CIFAR10
    Number of datapoints: 10000
    Split: test
    Root Location: ./data
    Transforms (if any): Compose(
                             ToTensor()
                             Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
                         )
    Target Transforms (if any): None


In [3]:
trainloader = torch.utils.data.DataLoader(cifar_train, batch_size=32, shuffle=True)
testloader = torch.utils.data.DataLoader(cifar_test, batch_size=32, shuffle=True)

In [4]:
class LeNet(nn.Module):
    # 一般在__init__中定义网络需要的操作算子，比如卷积、全连接算子等等
    def __init__(self):
        super(LeNet, self).__init__()
        # Conv2d的第一个参数是输入的channel数量，第二个是输出的channel数量，第三个是kernel size
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        # 由于上一层有16个channel输出，每个feature map大小为5*5，所以全连接层的输入是16*5*5
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        # 最终有10类，所以最后一个全连接层输出数量是10
        self.fc3 = nn.Linear(84, 10)
        self.pool = nn.MaxPool2d(2, 2)
    # forward这个函数定义了前向传播的运算，只需要像写普通的python算数运算那样就可以了
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        # 下面这步把二维特征图变为一维，这样全连接层才能处理
        x = x.view(-1, 16*5*5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

## start GPU

In [5]:
device = torch.device("cuda:0")
net = LeNet().to(device)

### Loss function SGD

In [6]:
# optim中定义了各种各样的优化方法，包括SGD
import torch.optim as optim

# CrossEntropyLoss就是我们需要的损失函数
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [7]:
print(net)

LeNet(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)


In [8]:
print("Start Training...")
for epoch in range(30):
    # 我们用一个变量来记录每100个batch的平均loss
    loss100 = 0.0
    # 我们的dataloader派上了用场
    for i, data in enumerate(trainloader):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device) # 注意需要复制到GPU
        optimizer.zero_grad()
        
        outputs = net(inputs)
        # print(outputs)
        # print(labels)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        loss100 += loss.item()
        break
        if i % 100 == 99:
            print('[Epoch %d, Batch %5d] loss: %.3f' %
                  (epoch + 1, i + 1, loss100 / 100))
            loss100 = 0.0

print("Done Training!")

Start Training...
tensor([[ 1.2970e-02,  4.2581e-02,  6.0349e-02, -9.1367e-02,  6.9938e-02,
         -4.7231e-02, -5.2742e-02, -8.4100e-03,  8.1142e-02,  3.2391e-02],
        [ 1.5045e-02,  5.8664e-02,  6.3746e-02, -8.0074e-02,  8.1771e-02,
         -5.2293e-02, -5.0420e-02, -1.1572e-02,  7.2548e-02,  4.9074e-02],
        [ 1.1461e-02,  4.9707e-02,  1.0265e-01, -7.5324e-02,  6.4649e-02,
         -6.4808e-02, -2.5439e-02,  1.1416e-02,  8.8860e-02,  8.4718e-02],
        [ 1.9762e-02,  6.0521e-02,  7.1186e-02, -9.7548e-02,  7.7393e-02,
         -4.2783e-02, -5.0003e-02,  1.8992e-03,  9.2109e-02,  6.2539e-02],
        [ 1.2060e-02,  5.4011e-02,  5.3474e-02, -8.1873e-02,  8.1943e-02,
         -5.2684e-02, -4.6411e-02, -1.6033e-02,  7.2278e-02,  4.3279e-02],
        [ 6.5697e-03,  5.1755e-02,  7.5524e-02, -8.4480e-02,  6.9963e-02,
         -5.7012e-02, -5.1456e-02, -5.1101e-03,  8.5472e-02,  5.1669e-02],
        [ 4.7075e-03,  5.3253e-02,  6.3276e-02, -9.9612e-02,  7.3402e-02,
         -4.42

          0.0804,  0.0705]], device='cuda:0', grad_fn=<AddmmBackward>)
tensor([3, 9, 1, 4, 8, 9, 0, 9, 2, 5, 6, 2, 4, 3, 5, 8, 1, 0, 3, 9, 6, 0, 7, 3,
        8, 6, 7, 2, 8, 3, 0, 3], device='cuda:0')
tensor([[ 0.0155,  0.0425,  0.1038, -0.0939,  0.0536, -0.0522, -0.0396,  0.0231,
          0.0979,  0.0681],
        [ 0.0153,  0.0612,  0.0763, -0.1026,  0.0776, -0.0479, -0.0456, -0.0049,
          0.0947,  0.0657],
        [ 0.0175,  0.0636,  0.0733, -0.0946,  0.0764, -0.0555, -0.0438, -0.0068,
          0.0781,  0.0598],
        [ 0.0173,  0.0511,  0.0577, -0.1035,  0.0802, -0.0485, -0.0492, -0.0130,
          0.0804,  0.0507],
        [ 0.0062,  0.0540,  0.0846, -0.0858,  0.0710, -0.0484, -0.0447,  0.0005,
          0.0813,  0.0696],
        [ 0.0066,  0.0600,  0.0459, -0.1101,  0.0912, -0.0519, -0.0619, -0.0123,
          0.0880,  0.0448],
        [ 0.0091,  0.0517,  0.0471, -0.0824,  0.0827, -0.0473, -0.0406, -0.0192,
          0.0756,  0.0357],
        [ 0.0097,  0.0568,  0.0570, 

        9, 8, 1, 1, 1, 5, 8, 9], device='cuda:0')
tensor([[ 0.0183,  0.0529,  0.0883, -0.0891,  0.0632, -0.0479, -0.0490, -0.0077,
          0.0791,  0.0569],
        [ 0.0118,  0.0503,  0.0373, -0.1069,  0.0773, -0.0428, -0.0512, -0.0103,
          0.0890,  0.0323],
        [ 0.0133,  0.0599,  0.1034, -0.0865,  0.0672, -0.0641, -0.0358,  0.0009,
          0.0795,  0.0709],
        [ 0.0093,  0.0507,  0.0553, -0.0814,  0.0820, -0.0528, -0.0424, -0.0201,
          0.0722,  0.0432],
        [ 0.0179,  0.0554,  0.0459, -0.0950,  0.0903, -0.0589, -0.0436, -0.0150,
          0.0892,  0.0474],
        [ 0.0160,  0.0460,  0.1001, -0.0859,  0.0560, -0.0546, -0.0384,  0.0122,
          0.0911,  0.0730],
        [ 0.0116,  0.0629,  0.0835, -0.1033,  0.0750, -0.0409, -0.0562, -0.0169,
          0.0884,  0.0538],
        [ 0.0254,  0.0659,  0.0873, -0.0930,  0.0667, -0.0585, -0.0376, -0.0005,
          0.0906,  0.0629],
        [ 0.0103,  0.0498,  0.0491, -0.0944,  0.0790, -0.0501, -0.0516, -0.011

tensor([[ 0.0183,  0.0566,  0.0684, -0.0843,  0.0832, -0.0474, -0.0455, -0.0197,
          0.0724,  0.0472],
        [ 0.0157,  0.0598,  0.0794, -0.0843,  0.0715, -0.0531, -0.0436, -0.0101,
          0.0769,  0.0609],
        [ 0.0106,  0.0503,  0.0633, -0.0819,  0.0856, -0.0489, -0.0559, -0.0181,
          0.0833,  0.0348],
        [ 0.0157,  0.0500,  0.0612, -0.1072,  0.0755, -0.0529, -0.0567, -0.0080,
          0.1006,  0.0688],
        [ 0.0087,  0.0524,  0.0654, -0.0781,  0.0650, -0.0507, -0.0488, -0.0039,
          0.0781,  0.0536],
        [ 0.0142,  0.0503,  0.0822, -0.0831,  0.0560, -0.0552, -0.0455,  0.0025,
          0.0770,  0.0568],
        [ 0.0150,  0.0498,  0.0642, -0.0880,  0.0810, -0.0513, -0.0493, -0.0200,
          0.0759,  0.0477],
        [ 0.0169,  0.0576,  0.0511, -0.0977,  0.0874, -0.0486, -0.0513, -0.0216,
          0.0855,  0.0408],
        [ 0.0197,  0.0576,  0.0614, -0.0830,  0.0752, -0.0500, -0.0470, -0.0179,
          0.0751,  0.0458],
        [ 0.0125,  

          0.0767,  0.0453]], device='cuda:0', grad_fn=<AddmmBackward>)
tensor([9, 5, 1, 2, 5, 9, 8, 4, 1, 0, 8, 9, 6, 3, 0, 6, 6, 5, 1, 1, 8, 4, 7, 1,
        1, 7, 6, 7, 6, 5, 6, 9], device='cuda:0')
tensor([[ 1.9649e-02,  5.9597e-02,  7.4386e-02, -8.3572e-02,  7.7210e-02,
         -5.2872e-02, -4.1338e-02, -1.5684e-02,  7.1373e-02,  5.7241e-02],
        [ 6.7806e-03,  5.8008e-02,  7.5226e-02, -9.2206e-02,  6.8201e-02,
         -5.3330e-02, -5.1832e-02, -1.2144e-02,  7.9188e-02,  6.1747e-02],
        [ 2.0925e-02,  5.1388e-02,  9.1255e-02, -9.1274e-02,  6.0799e-02,
         -5.4162e-02, -3.7878e-02, -1.2993e-03,  8.5125e-02,  6.0442e-02],
        [ 5.4541e-03,  5.6860e-02,  5.7714e-02, -8.7907e-02,  7.7853e-02,
         -4.9775e-02, -4.9267e-02, -1.5229e-02,  8.4520e-02,  4.2744e-02],
        [ 1.4515e-02,  5.8651e-02,  8.6851e-02, -8.1887e-02,  7.1909e-02,
         -6.0509e-02, -4.1029e-02, -9.3628e-03,  7.8310e-02,  6.0993e-02],
        [ 1.3834e-02,  6.0232e-02,  6.9989e-02, -9.962

          0.0800,  0.0681]], device='cuda:0', grad_fn=<AddmmBackward>)
tensor([0, 5, 4, 7, 3, 9, 5, 7, 8, 4, 5, 9, 4, 5, 3, 1, 4, 7, 7, 3, 1, 9, 6, 1,
        1, 0, 3, 6, 1, 0, 0, 3], device='cuda:0')
tensor([[ 0.0123,  0.0542,  0.0539, -0.0876,  0.0965, -0.0462, -0.0472, -0.0190,
          0.0735,  0.0434],
        [ 0.0151,  0.0544,  0.0713, -0.0817,  0.0826, -0.0450, -0.0434, -0.0127,
          0.0722,  0.0502],
        [ 0.0278,  0.0619,  0.0673, -0.0874,  0.0784, -0.0509, -0.0399, -0.0135,
          0.0737,  0.0502],
        [ 0.0247,  0.0555,  0.0761, -0.0861,  0.0766, -0.0517, -0.0487, -0.0080,
          0.0759,  0.0517],
        [ 0.0065,  0.0502,  0.0716, -0.1061,  0.0756, -0.0459, -0.0726, -0.0173,
          0.0853,  0.0429],
        [ 0.0174,  0.0560,  0.0577, -0.0819,  0.0824, -0.0485, -0.0477, -0.0190,
          0.0765,  0.0470],
        [ 0.0110,  0.0547,  0.0806, -0.0963,  0.0724, -0.0464, -0.0564, -0.0132,
          0.0914,  0.0600],
        [ 0.0179,  0.0604,  0.0652, 

tensor([[ 0.0124,  0.0551,  0.0587, -0.0753,  0.0794, -0.0541, -0.0427, -0.0138,
          0.0686,  0.0339],
        [ 0.0121,  0.0601,  0.0760, -0.0827,  0.0713, -0.0466, -0.0496, -0.0122,
          0.0822,  0.0480],
        [ 0.0236,  0.0611,  0.0949, -0.0882,  0.0634, -0.0559, -0.0399, -0.0085,
          0.0787,  0.0633],
        [ 0.0217,  0.0528,  0.0836, -0.0822,  0.0603, -0.0486, -0.0354,  0.0006,
          0.0813,  0.0601],
        [ 0.0211,  0.0616,  0.0686, -0.0841,  0.0775, -0.0503, -0.0446, -0.0154,
          0.0805,  0.0534],
        [ 0.0212,  0.0577,  0.0778, -0.0885,  0.0710, -0.0563, -0.0435, -0.0188,
          0.0803,  0.0512],
        [ 0.0177,  0.0547,  0.0944, -0.0897,  0.0686, -0.0500, -0.0479, -0.0018,
          0.0808,  0.0701],
        [ 0.0194,  0.0477,  0.0592, -0.0821,  0.0771, -0.0433, -0.0402, -0.0102,
          0.0778,  0.0409],
        [ 0.0138,  0.0501,  0.0876, -0.0855,  0.0496, -0.0550, -0.0436,  0.0060,
          0.0830,  0.0698],
        [ 0.0098,  

          0.0809,  0.0587]], device='cuda:0', grad_fn=<AddmmBackward>)
tensor([1, 8, 1, 6, 1, 1, 0, 5, 3, 5, 4, 7, 5, 5, 3, 3, 2, 7, 0, 6, 5, 3, 3, 7,
        7, 5, 2, 3, 7, 2, 4, 2], device='cuda:0')
tensor([[ 0.0034,  0.0480,  0.0651, -0.0859,  0.0873, -0.0497, -0.0447, -0.0178,
          0.0787,  0.0489],
        [ 0.0085,  0.0545,  0.0490, -0.0802,  0.0777, -0.0564, -0.0482, -0.0154,
          0.0787,  0.0319],
        [ 0.0053,  0.0565,  0.0846, -0.0764,  0.0684, -0.0492, -0.0410, -0.0025,
          0.0861,  0.0545],
        [ 0.0106,  0.0506,  0.0559, -0.0785,  0.0824, -0.0517, -0.0454, -0.0242,
          0.0795,  0.0391],
        [ 0.0104,  0.0573,  0.0688, -0.0840,  0.0664, -0.0402, -0.0526, -0.0167,
          0.0814,  0.0481],
        [ 0.0200,  0.0584,  0.0686, -0.0845,  0.0680, -0.0489, -0.0463, -0.0179,
          0.0798,  0.0425],
        [ 0.0203,  0.0541,  0.1025, -0.0870,  0.0489, -0.0458, -0.0418, -0.0052,
          0.0890,  0.0632],
        [ 0.0144,  0.0522,  0.0613, 

In [4]:
class VGG16(nn.Module):
    def __init__(self, n_classes):
        super(VGG16, self).__init__()
        # conv layers: (in_channel size, out_channels size, kernel_size, stride, padding)
        self.conv1_1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.conv1_2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)

        self.conv2_1 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, padding=1)

        self.conv3_1 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.conv3_3 = nn.Conv2d(256, 256, kernel_size=3, padding=1)

        self.conv4_1 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
        self.conv4_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.conv4_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1)

        self.conv5_1 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.conv5_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1)

        # max pooling (kernel_size, stride)
        self.pool = nn.MaxPool2d(2, 2)

        # fully conected layers:
        self.fc6 = nn.Linear(16384, 4096)
        self.fc7 = nn.Linear(4096, 4096)
        self.fc8 = nn.Linear(4096, 1000)
        self.fc9 = nn.Linear(1000, 100)
        self.fc10 = nn.Linear(100, 10)

        
        
    def forward(self, x, training=True):
        x = F.relu(self.conv1_1(x))
        x = F.relu(self.conv1_2(x))
        x = self.pool(x)
        x = F.relu(self.conv2_1(x))
        x = F.relu(self.conv2_2(x))
        x = self.pool(x)
        x = F.relu(self.conv3_1(x))
        x = F.relu(self.conv3_2(x))
        x = F.relu(self.conv3_3(x))
        x = self.pool(x)
        x = F.relu(self.conv4_1(x))
        x = F.relu(self.conv4_2(x))
        x = F.relu(self.conv4_3(x))
        x = self.pool(x)
        x = F.relu(self.conv5_1(x))
        x = F.relu(self.conv5_2(x))
        x = F.relu(self.conv5_3(x))
        x = self.pool(x)
        x = x.view(-1, 16384)
        x = F.relu(self.fc6(x))
        x = F.dropout(x, 0.5, training=training)
        x = F.relu(self.fc7(x))
        x = F.dropout(x, 0.5, training=training)
        x = self.fc8(x)
        x = F.dropout(x, 0.5, training=training)
        x = self.fc9(x)
        x = F.dropout(x, 0.5, training=training)
        x = self.fc10(x)

        
        return x


In [5]:
trainloader = torch.utils.data.DataLoader(cifar_train, batch_size=32, shuffle=True)
testloader = torch.utils.data.DataLoader(cifar_test, batch_size=32, shuffle=True)

In [6]:
device = torch.device("cuda:0")
net = VGG16(True).to(device)

In [7]:
# CrossEntropyLoss就是我们需要的损失函数
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [10]:
print("Start Training...")
for epoch in range(30):
    # 我们用一个变量来记录每100个batch的平均loss
    loss100 = 0.0
    # 我们的dataloader派上了用场
    for i, data in enumerate(trainloader):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device) # 注意需要复制到GPU
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        loss100 += loss.item()
        if i % 100 == 99:
            print('[Epoch %d, Batch %5d] loss: %.3f' %
                  (epoch + 1, i + 1, loss100 / 100))
            loss100 = 0.0

print("Done Training!")

Start Training...


ValueError: Expected input batch_size (1) to match target batch_size (32).