## 1、交叉熵损失实现

### 1.1 Cross Entropy in Numpy

In [None]:
import numpy as np

# one hot编码
y = np.array([1,0,0])
z = np.array([0.2,0.1,-0.1])
y_pred = np.exp(z) / np.exp(z).sum()
loss = -(y*np.log(y_pred)).sum()

print(loss)

0.9729189131256584


### 1.2 Cross Entropy in Pytorch

**标签值y值在pytorch的CrossEntropyLoss中会自动转换为one hot编码!!!**

CrossEntropyLoss == LogSoftmax + NLLLoss

In [4]:
import torch 

# y值在pytorch的CrossEntropyLoss中会自动转换为one hot编码!!!

y = torch.tensor([0],dtype=torch.long)
z = torch.tensor([[0.2,0.1,-0.1]],dtype=torch.float32)

criterion = torch.nn.CrossEntropyLoss()
loss = criterion(z,y)
print(loss)

tensor(0.9729)


In [7]:
# Mini-Batch
import torch

criterion = torch.nn.CrossEntropyLoss()
Y = torch.tensor([2,0,1],dtype=torch.long)

# 自动识别为torch.float32
# 2 0 1
Y_pred1 = torch.tensor([[0.1,0.2,0.9],[0.7,0.1,0.2],[0.2,0.5,0.3]]) 
# 0 1 1
Y_pred2 = torch.tensor([[0.8,0.1,0.1],[0.2,0.8,0.1],[0.3,0.4,0.3]])

print(f'loss1: {criterion(Y_pred1,Y)}, loss2:{criterion(Y_pred2,Y)}')

loss1: 0.7911708354949951, loss2:1.2461291551589966


## 2、In MNIST Dataset

In [8]:
# 0、导包
import torch 
from torchvision import datasets,transforms
from torch.utils.data import DataLoader,Dataset
import torch.optim as optim


In [11]:
# 1、准备数据集

# 为了更高效的处理，比如卷积操作，做如下操作：
# 通过cv读取图像进来是 w * H * c,转到pytorch 里面会转为 c * w * h

batch_size = 64
transform = transforms.Compose([
    transforms.ToTensor(),
    # 标准化：第一个是mean,第二个是std。
    transforms.Normalize((0.1307,),(0.3081))
])

train_dataset = datasets.MNIST(root='./data/dataset/MNIST',train=True,transform=transform,download=True)

test_dataset = datasets.MNIST(root='./data/dataset/MNIST',train=False,transform=transform,download=True)

train_dataloader = DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)
test_dataloader = DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=True)


In [None]:
# 2、设计网络
class MNIST_Model(torch.nn.Module):
    def __init__(self):
        # 父类初始化!!!
        super().__init__()

        self.l1 = torch.nn.Linear(784,512)
        self.l2 = torch.nn.Linear(512,256)
        self.l3 = torch.nn.Linear(256,128)
        self.l4 = torch.nn.Linear(128,64)
        self.l5 = torch.nn.Linear(64,10)
        self.activate = torch.nn.ReLU()
    
    def forward(self,x):
        # 单个图像：28 * 28 = 784
        x = x.view(-1,784)
        x = self.activate(self.l1(x))
        x = self.activate(self.l2(x))
        x = self.activate(self.l3(x))
        x = self.activate(self.l4(x))
        return self.l5(x)
model = MNIST_Model()

In [17]:
# 3、损失函数和优化器
criterion = torch.nn.CrossEntropyLoss()
# 数据集比较大，使用momentum来加速训练
optimizer = optim.SGD(model.parameters(),lr=0.01,momentum=0.5) 

In [19]:
# 4、训练和测试
def train(epoch):
    running_loss = 0.0
    for batch_id,data in enumerate(train_dataloader,0):
        # 获取数据
        inputs,targets = data

        # 前向传播
        y_pred = model(inputs)
        # 计算损失
        loss = criterion(y_pred,targets)
        # 梯度清零
        optimizer.zero_grad()
        # 反向传播
        loss.backward()
        # 更新参数
        optimizer.step()

        # 损失求和
        running_loss += loss.item()
        # 
        if batch_id % 300 == 299:
            print(f'[epoch: {epoch +1},batch_id:{batch_id + 1}] loss: {running_loss / 300 : .4f}')
            # 损失清零
            running_loss = 0.0


def test():
    correct = 0
    total = 0
    # 不计算梯度

    with torch.no_grad():
        for data in test_dataloader:
            # 获取数据
            inputs, targets = data
            # 前向传播
            outputs = model(inputs)
            # 不同类别概率的最大值
            _,predicted = torch.max(outputs.data,dim=1)
            # 样本数量
            total += targets.size(0) 
            # 计算预测正确的数量
            correct += (predicted == targets).sum().item()
    print(f'Accuracy on test set:{100 * correct / total}%')


In [20]:
for epoch in range(100):
    train(epoch=epoch)
    test()

[epoch: 1,batch_id:300] loss:  0.0912
[epoch: 1,batch_id:600] loss:  0.0921
[epoch: 1,batch_id:900] loss:  0.0891
Accuracy on test set:96.79%
[epoch: 2,batch_id:300] loss:  0.0753
[epoch: 2,batch_id:600] loss:  0.0705
[epoch: 2,batch_id:900] loss:  0.0765
Accuracy on test set:97.32%
[epoch: 3,batch_id:300] loss:  0.0542
[epoch: 3,batch_id:600] loss:  0.0629
[epoch: 3,batch_id:900] loss:  0.0628
Accuracy on test set:97.31%
[epoch: 4,batch_id:300] loss:  0.0494
[epoch: 4,batch_id:600] loss:  0.0489
[epoch: 4,batch_id:900] loss:  0.0484
Accuracy on test set:97.54%
[epoch: 5,batch_id:300] loss:  0.0384
[epoch: 5,batch_id:600] loss:  0.0404
[epoch: 5,batch_id:900] loss:  0.0416
Accuracy on test set:97.55%
[epoch: 6,batch_id:300] loss:  0.0309
[epoch: 6,batch_id:600] loss:  0.0348
[epoch: 6,batch_id:900] loss:  0.0312
Accuracy on test set:97.79%
[epoch: 7,batch_id:300] loss:  0.0227
[epoch: 7,batch_id:600] loss:  0.0289
[epoch: 7,batch_id:900] loss:  0.0264
Accuracy on test set:97.44%
[epoch