In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

In [None]:
#torchvision数据集的输出是范围[0, 1]的PILImage图像。我们将它们转换为归一化范围[-1, 1]的tensor
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
)
batch_size = 4
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)

trainloader = DataLoader(trainset, batch_size=batch_size,
                               shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                        download=True, transform=transform)
testloader = DataLoader(testset, batch_size=batch_size,
                               shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog',
           'horse', 'ship', 'truck')


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data\cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

In [None]:
import matplotlib.pyplot as plt
import numpy as np
#functions to show image
def imshow(img):
    img = img/2 + 0.5#unnormalize 加载图像时，把其标准化为范围在[-1, 1]之间的张量，用([-1, 1] / 2) + 0.5 就把范围归一化到[0, 1]了。
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    #因为在plt.imshow在现实的时候输入的是（imagesize,imagesize,channels）imshow中，参数img的格式为（channels,imagesize,imagesize）,
    #这两者的格式不一致，我们需要调用一次np.transpose函数，即np.transpose(npimg,(1,2,0))，
    #将npimg的数据格式由（channels,imagesize,imagesize）转化为（imagesize,imagesize,channels）,进行格式的转换后方可进行显示。
    plt.show()
#get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.next()

#show images
imshow(torchvision.utils.make_grid(images))#Make a grid of images.,组成图像的网络，其实就是将多张图片组合成一张图片。
#print labels
print(' '.join('%5s' %classes[label[j]] for j in range(batch_size)))

In [None]:
import torch.nn as nn
import torch.nn.functional as F
#Define Model
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3,6,5)
        self.pool = nn.MaxPool2d(2,2)
        self.conv2 = nn.Conv2d(6,16,5)
        self.fc1 = nn.Linear(16*5*5,120)
        self.fc2 = nn.Linear(120,84)
        self.fc3 = nn.Linear(84,10)
        
    
    def forward(self,x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x,1)#flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    

net = Net()
        

In [None]:
import torch.optim as optim
#Define loss function
criterion = nn.CrossEntroyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum = 0.9)



In [None]:
for epoch in range(2):
    running_loss = 0.0
    for i, data in emumerate(trainloader,0):
        #get thr inputs;data is a list of [inputs,labels]
        inputs,labels = data
        
        #zero the parameter gradients
        optimizer.zero_grad()
        
        #forward+backward+optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        #print ststistics
        running_loss += loss.item()
        if i%2000 == 1999:
            print('[%d, %5d]loss:%.3f'%(epoch + 1, i + 1, running_loss/2000))
            running_loss = 0.0
        

print("Finished Training")
            

In [None]:
PATH = './cifar_net.pth'
torch.save(net.state_dict(),PATH)


In [None]:
#test
dataiter = iter(testloader)
images, labels = dataiter.next()

#print images
imshow(torchvision.utils.make_grid(images))
print('GroundTruth:',' '.join('%5s'%classes[labels[j]] for j in range(4)))


In [None]:
corrext = 0
total = 0
#since we're not training,we dont need to calculate the gradients for our outputs
with torch.no_grad():
    for data in testloader:
        images,labels = data
        #calculate outputs by running image through the network
        outputs = net(image)
        #the class with the highest energy is what we choose as prediction
        _. predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on tje 10000 test image: %d%%'%(100*correct/total))

In [None]:
#count predictions for each class
correct_pred = {classname:0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

#again no gradients needed
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(image)
        _,predictions = torch.max(outputs,1)#其中这个 1代表行，0的话代表列。不加_,返回的是一行中最大的数。
        #加_,则返回一行中最大数的位置
        #collect the correct predictions for each class
        for label, prediction in zip(labels,prediction):
            if label == prediction:
                correct_pred[classes[label]] += 1
            total_pred[classes[label]] += 1

#print accuracy for each class
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count)/total_pred[classname]
    print("Accuracy for class {:5s} is: {:.1f}% ".format(classname,accuracy)
         )
    