# 多分类问题

使用交叉熵loss（cross entropy loss）作为损失函数进行优化

<img src="./assets/多分类问题_网络结构.png" alt="nn" width=600 height=400/>

In [1]:
import  torch
import  torch.nn as nn
import  torch.nn.functional as F
import  torch.optim as optim
from    torchvision import datasets, transforms

batch_size=200
learning_rate=1e-2  # 0.01
epochs=10

train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([        
        transforms.ToTensor(),        
        transforms.Normalize((0.1307,), (0.3081,))    
    ])),    
    batch_size=batch_size, shuffle=True)

# 一般第一个维度是该层的输出，第二个维度是该层的输入
w1 = torch.randn(200, 784, requires_grad=True)
b1 = torch.zeros(200, requires_grad=True)
w2 = torch.randn(200, 200, requires_grad=True)
b2 = torch.zeros(200, requires_grad=True)
w3 = torch.randn(10, 200, requires_grad=True)
b3 = torch.zeros(10, requires_grad=True)

# 初始化权重参数
torch.nn.init.kaiming_normal_(w1)
torch.nn.init.kaiming_normal_(w2)
torch.nn.init.kaiming_normal_(w3)

# 前向传播
def forward(x):
    x = x @ w1.t() + b1
    x = F.relu(x)
    x = x @ w2.t() + b2
    x = F.relu(x)
    x = x @ w3.t() + b3
    x = F.relu(x) # 最后结果是一个 logits
    # 假设有神经网络进行分类，类别数目为m，输出为y1,做一个映射使得y1与类别的维度相同，y1 --> y2,再把 y2 作为softmax输入值 得到各个类别的概率，这里的y2被叫做logits
    # logits: 未归一化的概率， 一般也就是 softmax层的输入
    return x

optimizer = optim.SGD([w1, b1, w2, b2, w3, b3], lr=learning_rate)
criteon = nn.CrossEntropyLoss()  # 效果跟 F.cross_entropy 功能是一样的 

for epoch in range(epochs):

    for batch_idx, (data, target) in enumerate(train_loader):

        data = data.view(-1, 28*28)
        logits = forward(data)
        loss = criteon(logits, target)

        optimizer.zero_grad()
        loss.backward()
        # print(w1.grad.norm(), w2.grad.norm())
        optimizer.step()

        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(                
                epoch, batch_idx * len(data), len(train_loader.dataset),                       
                    100. * batch_idx / len(train_loader), loss.item()))

    test_loss = 0
    correct = 0
    for data, target in test_loader:
        data = data.view(-1, 28 * 28)
        logits = forward(data)
        test_loss += criteon(logits, target).item()

        pred = logits.data.max(1)[1]
        correct += pred.eq(target.data).sum()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(        
        test_loss, correct, len(test_loader.dataset),        
        100. * correct / len(test_loader.dataset)))


Test set: Average loss: 0.0019, Accuracy: 8950/10000 (90%)


Test set: Average loss: 0.0014, Accuracy: 9206/10000 (92%)


Test set: Average loss: 0.0012, Accuracy: 9297/10000 (93%)


Test set: Average loss: 0.0011, Accuracy: 9352/10000 (94%)


Test set: Average loss: 0.0010, Accuracy: 9410/10000 (94%)


Test set: Average loss: 0.0009, Accuracy: 9448/10000 (94%)


Test set: Average loss: 0.0009, Accuracy: 9500/10000 (95%)


Test set: Average loss: 0.0008, Accuracy: 9518/10000 (95%)


Test set: Average loss: 0.0008, Accuracy: 9545/10000 (95%)


Test set: Average loss: 0.0007, Accuracy: 9557/10000 (96%)



### nn.Linear 全连接层

In [6]:
from torch import nn
# x 是输入数据
x = torch.ones(1, 784)
print(x.shape)

# 创建全连接层
# 第 1 个参数是输入，第 2 个参数是输出
layer1 = nn.Linear(784, 200)
layer2 = nn.Linear(200, 200)
layer3 = nn.Linear(200, 10)

# 得到第一层的输出
x = layer1(x)
x = F.relu(x, inplace=True)
print(x.shape)

# 得到第二层的输出
x = layer2(x)
x = F.relu(x, inplace=True)
print(x.shape)

# 得到第三层的输出
x = layer3(x)
x = F.relu(x, inplace=True)
print(x.shape)

torch.Size([1, 784])
torch.Size([1, 200])
torch.Size([1, 200])
torch.Size([1, 10])


### 使用 pytorch 提供的 API 重写多分类问题（多层感知机MLP）

In [11]:
import  torch
import  torch.nn as nn
import  torch.nn.functional as F
import  torch.optim as optim
from    torchvision import datasets, transforms

batch_size=200
learning_rate=1e-2  # 0.01
epochs=10

train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([        
        transforms.ToTensor(),        
        transforms.Normalize((0.1307,), (0.3081,))    
    ])),    
    batch_size=batch_size, shuffle=True)

class MLP(nn.Module):

    def __init__(self):
        super(MLP, self).__init__()

        self.model = nn.Sequential(
            nn.Linear(784, 200),
            nn.LeakyReLU(inplace=True),
            nn.Linear(200, 200),
            nn.LeakyReLU(inplace=True),
            nn.Linear(200, 10),
            nn.LeakyReLU(inplace=True),
        )

    def forward(self, x):
        x = self.model(x)
        return x

device = torch.device('cuda:0')
net = MLP().to(device)
optimizer = optim.SGD(net.parameters(), lr=learning_rate)
criteon = nn.CrossEntropyLoss().to(device)

for epoch in range(epochs):

    for batch_idx, (data, target) in enumerate(train_loader):
        data = data.view(-1, 28*28)
        data, target = data.to(device), target.cuda()

        logits = net(data)
        loss = criteon(logits, target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(                
                epoch, batch_idx * len(data), len(train_loader.dataset),                       
                    100. * batch_idx / len(train_loader), loss.item()))
    
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        data = data.view(-1, 28 * 28)
        logits = forward(data)
        test_loss += criteon(logits, target).item()

        pred = logits.data.max(1)[1]
        correct += pred.eq(target.data).sum()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(        
        test_loss, correct, len(test_loader.dataset),        
        100. * correct / len(test_loader.dataset)))
        


Test set: Average loss: 0.0007, Accuracy: 9557/10000 (96%)


Test set: Average loss: 0.0007, Accuracy: 9557/10000 (96%)


Test set: Average loss: 0.0007, Accuracy: 9557/10000 (96%)


Test set: Average loss: 0.0007, Accuracy: 9557/10000 (96%)


Test set: Average loss: 0.0007, Accuracy: 9557/10000 (96%)


Test set: Average loss: 0.0007, Accuracy: 9557/10000 (96%)


Test set: Average loss: 0.0007, Accuracy: 9557/10000 (96%)


Test set: Average loss: 0.0007, Accuracy: 9557/10000 (96%)


Test set: Average loss: 0.0007, Accuracy: 9557/10000 (96%)


Test set: Average loss: 0.0007, Accuracy: 9557/10000 (96%)

