Training a classifier

在处理图像、视频、音频、文本文档时，可以使用其他Python工具将原始数据读取转换成numpy.array，然后再把numpy.array 转换成torch.Tensor

* 图像文件可以使用pillow
* 音频文件使用scipy和librosa
* 文本文件可以直接使用Python或Cython代码或者用NLTK和SpaCy

在torchvision模块中包含了ImagetNet、CIFAR10、MNIST等数据集的加载器，还有数据图像转换器，可以通过 torchvision.datasets和torch.utils.data.DataLoader调用

本教程中，我们将使用CIFAR10数据集，这个数据集很适合用于简单的分类训练，所有图片的尺寸都是32×32×3.

加载数据集并进行归一化

In [4]:
import torch
import torchvision
import torchvision.transforms as transforms

In [None]:
transform = transforms.Compose(
    [transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))]
)
trainset = torchvision.datasets.CIFAR10(root='./data',train=True,
                                       download=True,transform = transform)
trainloader = torch.utils.data.DataLoader(trainset,batch_size=4,
                                         shuffle=True,num_workers=2)
testset = torchvision.datasets.CIFAR10(root='./data',train=False,
                                      download=True,transform = transform)
testloader = torch.utils.data.DataLoader(testset,batch_size=4,
                                        shuffle = False,num_workers=2)
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data\cifar-10-python.tar.gz


绘图

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# functions to show an image


def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))


# get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.next()

# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(4)))

建立网络

In [None]:
import torch.nn as nn 
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.conv1 = nn.Conv2d(3,6,5)
        self.pool = nn.MaxPool2d(2,2)
        self.conv2 = nn.Conv2d(6,16,5)
        self.fc1 = nn.Linear(16*5*5,120)
        self.fc2 = nn.linear(120,84)
        self.fc3 = nn.Linear(84,10)
    
    def forward(self,x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1,16*5*5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

net = Net()

loss function and optimizer

In [None]:
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(),lr = 0.001,momentum=0.9)

training

In [None]:
for epoch in range(2):
    running_loss = 0.0
    for i,data in enumerate(trainloader,0):
        inputs,labels = data
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs,labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() # 统计最近2000次的平均loss
        if i% 2000 == 1999:
            print('[%d,%5d]loss:%.3f' % (epoch +1,i+1,running_loss/2000))
            running_loss = 0.0

testing

In [None]:
dataiter = iter(testloader)
images,labels = dataiter.next()
