In [15]:
import torch
import torchvision
import torchvision.transforms as transforms


transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

import matplotlib.pyplot as plt
import numpy as np

# functions to show an image
def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))

import torch.nn as nn
import torch.nn.functional as F

Files already downloaded and verified
Files already downloaded and verified


# 2-2 change the current kernel size

### kernel = 5

In [16]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
net.to(device)

import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

print('start traning.')
for epoch in range(2):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1

for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

# del dataiter

cpu
start traning.
[1,  2000] loss: 2.162
[1,  4000] loss: 1.761
[1,  6000] loss: 1.605
[1,  8000] loss: 1.523
[1, 10000] loss: 1.460
[1, 12000] loss: 1.385
[2,  2000] loss: 1.318
[2,  4000] loss: 1.259
[2,  6000] loss: 1.232
[2,  8000] loss: 1.229
[2, 10000] loss: 1.199
[2, 12000] loss: 1.163
Finished Training
Accuracy of the network on the 10000 test images: 57 %
Accuracy of plane : 65 %
Accuracy of   car : 78 %
Accuracy of  bird : 51 %
Accuracy of   cat : 61 %
Accuracy of  deer : 21 %
Accuracy of   dog : 52 %
Accuracy of  frog : 53 %
Accuracy of horse : 57 %
Accuracy of  ship : 61 %
Accuracy of truck : 67 %


### kernel = 4

In [2]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, 4)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 16, 4)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
net.to(device)

import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

print('start traning.')
for epoch in range(2):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1

for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

# del dataiter


cpu
start traning.
[1,  2000] loss: 2.191
[1,  4000] loss: 1.797
[1,  6000] loss: 1.601
[1,  8000] loss: 1.522
[1, 10000] loss: 1.456
[1, 12000] loss: 1.430
[2,  2000] loss: 1.345
[2,  4000] loss: 1.326
[2,  6000] loss: 1.324
[2,  8000] loss: 1.284
[2, 10000] loss: 1.254
[2, 12000] loss: 1.231
Finished Training
Accuracy of the network on the 10000 test images: 57 %
Accuracy of plane : 66 %
Accuracy of   car : 75 %
Accuracy of  bird : 43 %
Accuracy of   cat : 20 %
Accuracy of  deer : 44 %
Accuracy of   dog : 60 %
Accuracy of  frog : 64 %
Accuracy of horse : 69 %
Accuracy of  ship : 66 %
Accuracy of truck : 61 %


### kernel = 7

In [3]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(
                in_channels=3,              # input height
                out_channels=16,            # n_filters
                kernel_size=7,              # filter size
                stride=1,                   # filter movement/step
                padding=3,                  # if want same width and length of this image after con2d, padding=(kernel_size-1)/2 if stride=1
            )
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(
                in_channels=16,              # input height
                out_channels=32,            # n_filters
                kernel_size=7,              # filter size
                stride=1,                   # filter movement/step
                padding=3,                  # if want same width and length of this image after con2d, padding=(kernel_size-1)/2 if stride=1
            )
        self.fc1 = nn.Linear(2048, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
net.to(device)

import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

print('start traning.')
for epoch in range(2):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1

for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

# del dataiter


cpu
start traning.
[1,  2000] loss: 2.103
[1,  4000] loss: 1.738
[1,  6000] loss: 1.593
[1,  8000] loss: 1.468
[1, 10000] loss: 1.378
[1, 12000] loss: 1.312
[2,  2000] loss: 1.198
[2,  4000] loss: 1.162
[2,  6000] loss: 1.149
[2,  8000] loss: 1.115
[2, 10000] loss: 1.074
[2, 12000] loss: 1.045
Finished Training
Accuracy of the network on the 10000 test images: 62 %
Accuracy of plane : 64 %
Accuracy of   car : 56 %
Accuracy of  bird : 36 %
Accuracy of   cat : 37 %
Accuracy of  deer : 53 %
Accuracy of   dog : 69 %
Accuracy of  frog : 74 %
Accuracy of horse : 70 %
Accuracy of  ship : 84 %
Accuracy of truck : 82 %


# 커널의 크기가 커지면 소요 시간이 커지며 결과가 향상되었습니다.

# 2-2 remove pooling layer

In [4]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(
                in_channels=3,              # input height
                out_channels=16,            # n_filters
                kernel_size=7,              # filter size
                stride=1,                   # filter movement/step
                padding=3,                  # if want same width and length of this image after con2d, padding=(kernel_size-1)/2 if stride=1
            )
        #self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(
                in_channels=16,              # input height
                out_channels=32,            # n_filters
                kernel_size=7,              # filter size
                stride=1,                   # filter movement/step
                padding=3,                  # if want same width and length of this image after con2d, padding=(kernel_size-1)/2 if stride=1
            )
        self.fc1 = nn.Linear(3072, 120)# 3072 = 2048*3(rgb)/2(pooling kernel)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        #x = self.pool(F.relu(self.conv1(x)))
        #x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
net.to(device)

import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

print('start traning.')
for epoch in range(2):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1

for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

# del dataiter


cpu
start traning.
[1,  2000] loss: 1.945
[1,  4000] loss: 1.729
[1,  6000] loss: 1.663
[1,  8000] loss: 1.619
[1, 10000] loss: 1.575
[1, 12000] loss: 1.561
[2,  2000] loss: 1.476
[2,  4000] loss: 1.487
[2,  6000] loss: 1.470
[2,  8000] loss: 1.479
[2, 10000] loss: 1.451
[2, 12000] loss: 1.424
Finished Training
Accuracy of the network on the 10000 test images: 49 %
Accuracy of plane : 62 %
Accuracy of   car : 61 %
Accuracy of  bird : 25 %
Accuracy of   cat : 27 %
Accuracy of  deer : 38 %
Accuracy of   dog : 38 %
Accuracy of  frog : 63 %
Accuracy of horse : 63 %
Accuracy of  ship : 56 %
Accuracy of truck : 56 %


# 2-4 change the current activation function to other activation function sigmoid

In [14]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(
                in_channels=3,              # input height
                out_channels=16,            # n_filters
                kernel_size=7,              # filter size
                stride=1,                   # filter movement/step
                padding=3,                  # if want same width and length of this image after con2d, padding=(kernel_size-1)/2 if stride=1
            )
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(
                in_channels=16,              # input height
                out_channels=32,            # n_filters
                kernel_size=7,              # filter size
                stride=1,                   # filter movement/step
                padding=3,                  # if want same width and length of this image after con2d, padding=(kernel_size-1)/2 if stride=1
            )
        self.fc1 = nn.Linear(2048, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.sigmoid(self.conv1(x)))
        x = self.pool(F.sigmoid(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = F.sigmoid(self.fc1(x))
        x = F.sigmoid(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
net.to(device)

import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

print('start traning.')
for epoch in range(2):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1

for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

# del dataiter


cpu
start traning.




[1,  2000] loss: 2.317
[1,  4000] loss: 2.315
[1,  6000] loss: 2.313
[1,  8000] loss: 2.312
[1, 10000] loss: 2.311
[1, 12000] loss: 2.310
[2,  2000] loss: 2.308
[2,  4000] loss: 2.309
[2,  6000] loss: 2.308
[2,  8000] loss: 2.307
[2, 10000] loss: 2.307
[2, 12000] loss: 2.306
Finished Training
Accuracy of the network on the 10000 test images: 10 %
Accuracy of plane :  0 %
Accuracy of   car :  0 %
Accuracy of  bird :  0 %
Accuracy of   cat :  0 %
Accuracy of  deer :  0 %
Accuracy of   dog :  0 %
Accuracy of  frog :  0 %
Accuracy of horse : 100 %
Accuracy of  ship :  0 %
Accuracy of truck :  0 %


# 2-4 Tanh

In [6]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(
                in_channels=3,              # input height
                out_channels=16,            # n_filters
                kernel_size=7,              # filter size
                stride=1,                   # filter movement/step
                padding=3,                  # if want same width and length of this image after con2d, padding=(kernel_size-1)/2 if stride=1
            )
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(
                in_channels=16,              # input height
                out_channels=32,            # n_filters
                kernel_size=7,              # filter size
                stride=1,                   # filter movement/step
                padding=3,                  # if want same width and length of this image after con2d, padding=(kernel_size-1)/2 if stride=1
            )
        self.fc1 = nn.Linear(2048, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.tanh(self.conv1(x)))
        x = self.pool(F.tanh(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = F.tanh(self.fc1(x))
        x = F.tanh(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
net.to(device)

import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

print('start traning.')
for epoch in range(2):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1

for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

# del dataiter


cpu
start traning.




[1,  2000] loss: 1.941
[1,  4000] loss: 1.600
[1,  6000] loss: 1.492
[1,  8000] loss: 1.407
[1, 10000] loss: 1.349
[1, 12000] loss: 1.297
[2,  2000] loss: 1.186
[2,  4000] loss: 1.201
[2,  6000] loss: 1.157
[2,  8000] loss: 1.137
[2, 10000] loss: 1.103
[2, 12000] loss: 1.095
Finished Training
Accuracy of the network on the 10000 test images: 62 %
Accuracy of plane : 67 %
Accuracy of   car : 69 %
Accuracy of  bird : 47 %
Accuracy of   cat : 27 %
Accuracy of  deer : 43 %
Accuracy of   dog : 58 %
Accuracy of  frog : 86 %
Accuracy of horse : 71 %
Accuracy of  ship : 78 %
Accuracy of truck : 69 %


# 2-4 ReLU

In [7]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(
                in_channels=3,              # input height
                out_channels=16,            # n_filters
                kernel_size=7,              # filter size
                stride=1,                   # filter movement/step
                padding=3,                  # if want same width and length of this image after con2d, padding=(kernel_size-1)/2 if stride=1
            )
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(
                in_channels=16,              # input height
                out_channels=32,            # n_filters
                kernel_size=7,              # filter size
                stride=1,                   # filter movement/step
                padding=3,                  # if want same width and length of this image after con2d, padding=(kernel_size-1)/2 if stride=1
            )
        self.fc1 = nn.Linear(2048, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
net.to(device)

import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

print('start traning.')
for epoch in range(2):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1

for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

# del dataiter


cpu
start traning.
[1,  2000] loss: 2.066
[1,  4000] loss: 1.687
[1,  6000] loss: 1.561
[1,  8000] loss: 1.455
[1, 10000] loss: 1.379
[1, 12000] loss: 1.316
[2,  2000] loss: 1.225
[2,  4000] loss: 1.188
[2,  6000] loss: 1.174
[2,  8000] loss: 1.132
[2, 10000] loss: 1.114
[2, 12000] loss: 1.075
Finished Training
Accuracy of the network on the 10000 test images: 60 %
Accuracy of plane : 64 %
Accuracy of   car : 62 %
Accuracy of  bird : 36 %
Accuracy of   cat : 25 %
Accuracy of  deer : 64 %
Accuracy of   dog : 47 %
Accuracy of  frog : 75 %
Accuracy of horse : 81 %
Accuracy of  ship : 65 %
Accuracy of truck : 80 %


# 2-5 change the current optimization method AdaDelta

In [8]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(
                in_channels=3,              # input height
                out_channels=16,            # n_filters
                kernel_size=7,              # filter size
                stride=1,                   # filter movement/step
                padding=3,                  # if want same width and length of this image after con2d, padding=(kernel_size-1)/2 if stride=1
            )
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(
                in_channels=16,              # input height
                out_channels=32,            # n_filters
                kernel_size=7,              # filter size
                stride=1,                   # filter movement/step
                padding=3,                  # if want same width and length of this image after con2d, padding=(kernel_size-1)/2 if stride=1
            )
        self.fc1 = nn.Linear(2048, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
net.to(device)

import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adadelta(net.parameters(), lr=0.001)

print('start traning.')
for epoch in range(2):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1

for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

# del dataiter


cpu
start traning.
[1,  2000] loss: 2.302
[1,  4000] loss: 2.297
[1,  6000] loss: 2.288
[1,  8000] loss: 2.270
[1, 10000] loss: 2.241
[1, 12000] loss: 2.191
[2,  2000] loss: 2.139
[2,  4000] loss: 2.104
[2,  6000] loss: 2.068
[2,  8000] loss: 2.025
[2, 10000] loss: 2.015
[2, 12000] loss: 1.982
Finished Training
Accuracy of the network on the 10000 test images: 30 %
Accuracy of plane : 46 %
Accuracy of   car : 25 %
Accuracy of  bird :  0 %
Accuracy of   cat :  2 %
Accuracy of  deer : 29 %
Accuracy of   dog : 39 %
Accuracy of  frog : 46 %
Accuracy of horse : 27 %
Accuracy of  ship : 41 %
Accuracy of truck : 49 %


# 2-5 AdaGrad

In [9]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(
                in_channels=3,              # input height
                out_channels=16,            # n_filters
                kernel_size=7,              # filter size
                stride=1,                   # filter movement/step
                padding=3,                  # if want same width and length of this image after con2d, padding=(kernel_size-1)/2 if stride=1
            )
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(
                in_channels=16,              # input height
                out_channels=32,            # n_filters
                kernel_size=7,              # filter size
                stride=1,                   # filter movement/step
                padding=3,                  # if want same width and length of this image after con2d, padding=(kernel_size-1)/2 if stride=1
            )
        self.fc1 = nn.Linear(2048, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
net.to(device)

import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adagrad(net.parameters(), lr=0.001)

print('start traning.')
for epoch in range(2):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1

for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

# del dataiter


cpu
start traning.
[1,  2000] loss: 1.983
[1,  4000] loss: 1.841
[1,  6000] loss: 1.778
[1,  8000] loss: 1.746
[1, 10000] loss: 1.721
[1, 12000] loss: 1.695
[2,  2000] loss: 1.662
[2,  4000] loss: 1.650
[2,  6000] loss: 1.662
[2,  8000] loss: 1.631
[2, 10000] loss: 1.622
[2, 12000] loss: 1.623
Finished Training
Accuracy of the network on the 10000 test images: 42 %
Accuracy of plane : 46 %
Accuracy of   car : 58 %
Accuracy of  bird : 18 %
Accuracy of   cat : 31 %
Accuracy of  deer : 24 %
Accuracy of   dog : 30 %
Accuracy of  frog : 53 %
Accuracy of horse : 55 %
Accuracy of  ship : 49 %
Accuracy of truck : 51 %


# 2-5 RMS Prop

In [10]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(
                in_channels=3,              # input height
                out_channels=16,            # n_filters
                kernel_size=7,              # filter size
                stride=1,                   # filter movement/step
                padding=3,                  # if want same width and length of this image after con2d, padding=(kernel_size-1)/2 if stride=1
            )
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(
                in_channels=16,              # input height
                out_channels=32,            # n_filters
                kernel_size=7,              # filter size
                stride=1,                   # filter movement/step
                padding=3,                  # if want same width and length of this image after con2d, padding=(kernel_size-1)/2 if stride=1
            )
        self.fc1 = nn.Linear(2048, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
net.to(device)

import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.RMSprop(net.parameters(), lr=0.001)

print('start traning.')
for epoch in range(2):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1

for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

# del dataiter


cpu
start traning.
[1,  2000] loss: 1.850
[1,  4000] loss: 1.585
[1,  6000] loss: 1.498
[1,  8000] loss: 1.428
[1, 10000] loss: 1.367
[1, 12000] loss: 1.343
[2,  2000] loss: 1.255
[2,  4000] loss: 1.294
[2,  6000] loss: 1.275
[2,  8000] loss: 1.254
[2, 10000] loss: 1.251
[2, 12000] loss: 1.230
Finished Training
Accuracy of the network on the 10000 test images: 54 %
Accuracy of plane : 62 %
Accuracy of   car : 48 %
Accuracy of  bird : 57 %
Accuracy of   cat : 55 %
Accuracy of  deer : 53 %
Accuracy of   dog : 25 %
Accuracy of  frog : 68 %
Accuracy of horse : 40 %
Accuracy of  ship : 73 %
Accuracy of truck : 62 %


# 2-6 choose Adam optimization method and use L2 (ridge) regularization method

In [11]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(
                in_channels=3,              # input height
                out_channels=16,            # n_filters
                kernel_size=7,              # filter size
                stride=1,                   # filter movement/step
                padding=3,                  # if want same width and length of this image after con2d, padding=(kernel_size-1)/2 if stride=1
            )
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(
                in_channels=16,              # input height
                out_channels=32,            # n_filters
                kernel_size=7,              # filter size
                stride=1,                   # filter movement/step
                padding=3,                  # if want same width and length of this image after con2d, padding=(kernel_size-1)/2 if stride=1
            )
        self.fc1 = nn.Linear(2048, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
net.to(device)

import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001, weight_decay=1e-5)

print('start traning.')
for epoch in range(2):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1

for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

# del dataiter


cpu
start traning.
[1,  2000] loss: 1.901
[1,  4000] loss: 1.645
[1,  6000] loss: 1.506
[1,  8000] loss: 1.449
[1, 10000] loss: 1.389
[1, 12000] loss: 1.344
[2,  2000] loss: 1.275
[2,  4000] loss: 1.254
[2,  6000] loss: 1.227
[2,  8000] loss: 1.225
[2, 10000] loss: 1.219
[2, 12000] loss: 1.213
Finished Training
Accuracy of the network on the 10000 test images: 58 %
Accuracy of plane : 62 %
Accuracy of   car : 79 %
Accuracy of  bird : 29 %
Accuracy of   cat : 31 %
Accuracy of  deer : 54 %
Accuracy of   dog : 65 %
Accuracy of  frog : 71 %
Accuracy of horse : 59 %
Accuracy of  ship : 73 %
Accuracy of truck : 61 %


# 2-7 add the Xavier weight initialization method

In [12]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(
                in_channels=3,              # input height
                out_channels=16,            # n_filters
                kernel_size=7,              # filter size
                stride=1,                   # filter movement/step
                padding=3,                  # if want same width and length of this image after con2d, padding=(kernel_size-1)/2 if stride=1
            )
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(
                in_channels=16,              # input height
                out_channels=32,            # n_filters
                kernel_size=7,              # filter size
                stride=1,                   # filter movement/step
                padding=3,                  # if want same width and length of this image after con2d, padding=(kernel_size-1)/2 if stride=1
            )
        self.fc1 = nn.Linear(2048, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        torch.nn.init.xavier_uniform_(self.fc3.weight)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
net.to(device)

import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001, weight_decay=1e-5)

print('start traning.')
for epoch in range(2):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1

for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

# del dataiter


cpu
start traning.
[1,  2000] loss: 1.896
[1,  4000] loss: 1.669
[1,  6000] loss: 1.549
[1,  8000] loss: 1.463
[1, 10000] loss: 1.434
[1, 12000] loss: 1.360
[2,  2000] loss: 1.276
[2,  4000] loss: 1.274
[2,  6000] loss: 1.235
[2,  8000] loss: 1.217
[2, 10000] loss: 1.229
[2, 12000] loss: 1.215
Finished Training
Accuracy of the network on the 10000 test images: 56 %
Accuracy of plane : 60 %
Accuracy of   car : 57 %
Accuracy of  bird : 44 %
Accuracy of   cat : 30 %
Accuracy of  deer : 59 %
Accuracy of   dog : 29 %
Accuracy of  frog : 66 %
Accuracy of horse : 67 %
Accuracy of  ship : 82 %
Accuracy of truck : 68 %


# 2-8 choose ONE other parameters of CNN program (dropout)

In [13]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(
                in_channels=3,              # input height
                out_channels=16,            # n_filters
                kernel_size=7,              # filter size
                stride=1,                   # filter movement/step
                padding=3,                  # if want same width and length of this image after con2d, padding=(kernel_size-1)/2 if stride=1
            )
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(
                in_channels=16,              # input height
                out_channels=32,            # n_filters
                kernel_size=7,              # filter size
                stride=1,                   # filter movement/step
                padding=3,                  # if want same width and length of this image after con2d, padding=(kernel_size-1)/2 if stride=1
            )
        self.fc1 = nn.Linear(2048, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        torch.nn.init.xavier_uniform_(self.fc3.weight)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = nn.functional.dropout(x) #Drop out
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
net.to(device)

import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001, weight_decay=1e-5)

print('start traning.')
for epoch in range(2):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1

for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

# del dataiter


cpu
start traning.
[1,  2000] loss: 1.947
[1,  4000] loss: 1.739
[1,  6000] loss: 1.645
[1,  8000] loss: 1.605
[1, 10000] loss: 1.574
[1, 12000] loss: 1.567
[2,  2000] loss: 1.508
[2,  4000] loss: 1.499
[2,  6000] loss: 1.466
[2,  8000] loss: 1.458
[2, 10000] loss: 1.417
[2, 12000] loss: 1.390
Finished Training
Accuracy of the network on the 10000 test images: 51 %
Accuracy of plane : 62 %
Accuracy of   car : 71 %
Accuracy of  bird : 37 %
Accuracy of   cat : 39 %
Accuracy of  deer : 43 %
Accuracy of   dog : 46 %
Accuracy of  frog : 47 %
Accuracy of horse : 64 %
Accuracy of  ship : 55 %
Accuracy of truck : 51 %
