CNN图像分类
=
* Stanford CS231n
* AlexNet
* VGG
* ResNet
* Densnet

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
print("Pytorch Version: ", torch.__version__)

Pytorch Version:  1.0.1.post2


首先定义一个基于convnet的简单的神经网络
-

In [2]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5, 1)
        self.conv2 = nn.Conv2d(20, 50, 5, 1)
        self.fc1 = nn.Linear(4*4*50, 500)
        self.fc2 = nn.Linear(500, 10)
        
        
    def forward(self, x):
        x = F.relu(self.conv1(x)) # [20, 24, 24]
        x = F.max_pool2d(x, 2, 2) # [20, 12, 12]
        x = F.relu(self.conv2(x)) # [50, 8, 8]
        x = F.max_pool2d(x, 2, 2) # [50, 4, 4]
        x = x.view(-1, 4*4*50)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        
        return F.log_softmax(x, dim=1)

In [3]:
mnist_data = datasets.MNIST("./mnist_data", train = True, download = True,
                            transform = transforms.Compose([
                                transforms.ToTensor()    
                            ])) # transform 把图片转化为了tensor
mnist_data

Dataset MNIST
    Number of datapoints: 60000
    Split: train
    Root Location: ./mnist_data
    Transforms (if any): Compose(
                             ToTensor()
                         )
    Target Transforms (if any): None

In [4]:
mnist_data[0][0].shape

torch.Size([1, 28, 28])

In [5]:
# mnist_data[0][0]

In [7]:
data = [d[0].data.cpu().numpy() for d in mnist_data]

In [8]:
# data[0]

In [10]:
#

In [12]:
device = torch.device("cuda" if torch.cuda.is_available else "cpu")
BATCH_SIZE = 64
train_dataloader = torch.utils.data.DataLoader(
    datasets.MNIST("./mnist_data", train = True, download = True,
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1306,),(0.3081,))
    ])),
    batch_size = BATCH_SIZE, shuffle = True, num_workers = 1, pin_memory = True
                    )

test_dataloader = torch.utils.data.DataLoader(
    datasets.MNIST("./mnist_data", train = False, download = True,
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((np.mean(data),),(np.std(data),))
    ])),
    batch_size = BATCH_SIZE, shuffle = True, num_workers = 1, pin_memory = True
                    )

lr = 0.01
momentum = 0.5
model = Net().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = lr, momentum = momentum)


In [14]:
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        pred = model(data) # [batch_size * 10]
        loss = F.nll_loss(pred, target) 
        
        
        # sgd
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if idx % 100 == 0:
            print("Train Epoch: {}, iteration: {} , Loss: {}".format(epoch, idx, loss.item()))

In [19]:
def test(model, device, test_loader, epoch):
    model.eval()
    total_loss = 0.
    correct = 0.
    with torch.no_grad():
        for idx, (data, target) in enumerate(test_loader):
            data, target = data.to(device), target.to(device)
            output = model(data)
            total_loss += F.nll_loss(output, target, reduction = "sum").item()
            pred = output.argmax(dim = 1)
            correct += pred.eq(target.view_as(pred)).sum().item()
            

    total_loss /= len(test_loader.dataset)
    acc = correct/len(test_loader.dataset) * 100
    print("Test loss: {}, Accuracy: {}".format(total_loss, acc))

    model.train()

In [20]:
NUM_EPOCHS = 5
for epoch in range(NUM_EPOCHS):
    train(model, device, train_dataloader, optimizer, epoch)
    test(model, device, test_dataloader, epoch)
    
torch.save(model.state_dict(), "mnist_cnn.pth")

Train Epoch: 0, iteration: 0 , Loss: 0.07222264260053635
Train Epoch: 0, iteration: 100 , Loss: 0.028401784598827362
Train Epoch: 0, iteration: 200 , Loss: 0.10096027702093124
Train Epoch: 0, iteration: 300 , Loss: 0.11841542273759842
Train Epoch: 0, iteration: 400 , Loss: 0.04357606917619705
Train Epoch: 0, iteration: 500 , Loss: 0.03476938605308533
Train Epoch: 0, iteration: 600 , Loss: 0.04522170126438141
Train Epoch: 0, iteration: 700 , Loss: 0.04935184866189957
Train Epoch: 0, iteration: 800 , Loss: 0.14072354137897491
Train Epoch: 0, iteration: 900 , Loss: 0.039729125797748566
Test loss: 0.055588755607604984, Accuracy: 98.11999999999999
Train Epoch: 1, iteration: 0 , Loss: 0.04523603990674019
Train Epoch: 1, iteration: 100 , Loss: 0.17083124816417694
Train Epoch: 1, iteration: 200 , Loss: 0.06908587366342545
Train Epoch: 1, iteration: 300 , Loss: 0.1428205817937851
Train Epoch: 1, iteration: 400 , Loss: 0.03196604549884796
Train Epoch: 1, iteration: 500 , Loss: 0.1547815650701522

NLL Loss 的定义
-
$$ \mathcal{L} (x,y) = L = \{\mathcal{l}_1,\ldots,\mathcal{l}_N \}^T,  \mathcal{l}_n = \mathcal{w}_{\mathcal{y}_\mathcal{n}} x_{n, y_{\mathcal{n}}} $$
-