卷积神经网络与交叉熵损失函数的mnist数据集分类，对比不同激活函数的效果

In [28]:
import torch
import torch.nn as nn
import numpy as np
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import time

In [29]:
#读取数据集。
transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize(0.5,0.5)])
tra=datasets.MNIST(root="data",train=True, download=True,transform=transform)
tes=datasets.MNIST(root="data",train=False, download=True,transform=transform)
tra_dloader = DataLoader(tra, batch_size=64, shuffle=True)
tes_dloader = DataLoader(tes, batch_size=64, shuffle=True)

In [30]:
#损失函数
loss_fun=nn.CrossEntropyLoss()

In [31]:
#整个数据集的读取器，用于计算整体loss与准确率。
eva_loader_tra=DataLoader(tra, batch_size=len(tra), shuffle=True)
eva_loader_tes=DataLoader(tes, batch_size=len(tes), shuffle=True)

In [32]:
#建立网络，acti_fun是可变参数激活函数。
class Net(nn.Module):
    def __init__(self,acti_fun):
        super(Net,self).__init__()
        self.layer1=nn.Sequential(nn.Conv2d(1,3,5,padding=2),nn.BatchNorm2d(3),
                                  acti_fun(),nn.AvgPool2d(2))
        self.layer2=nn.Sequential(nn.Conv2d(3,6,5,padding=2),nn.BatchNorm2d(6),
                                  acti_fun(),nn.AvgPool2d(2))
        self.flatten=nn.Flatten(1,-1)
        self.layer3 = nn.Sequential(nn.Linear(6*7*7,90),acti_fun())
        self.layer4 = nn.Sequential(nn.Linear(90,30),acti_fun())
        self.out=nn.Linear(30,10)
    def forward(self,x):
        x=self.layer1(x)
        x=self.layer2(x)
        x=self.flatten(x)
        x=self.layer3(x)
        x=self.layer4(x)
        return self.out(x)

In [33]:
#训练网络，记录训练时间。
def training(acti_fun):
    net=Net(acti_fun)
    optimizer=torch.optim.Adam(net.parameters(),lr=0.01)
    epoch=5
    start_time=time.time()
    for i in range(0,epoch):
        for idx,data in enumerate(tra_dloader):
            optimizer.zero_grad()
            x=data[0]
            target=data[1]
            y=net(x)
            loss=loss_fun(y,target)
            loss.backward()
            optimizer.step()
        #每个epoch计算整个数据集的loss
        for idx,data in enumerate(eva_loader_tra):
            x=data[0]
            target=data[1]
            y=net(x)
            total_loss=loss_fun(y,target)
            #print('损失值:',total_loss)
    end_time=time.time()
    return net,end_time-start_time

In [34]:
#计算预测准确率。
def get_accuracy(net):
    for idx,data in enumerate(eva_loader_tra):
        x=data[0]
        target=data[1]
        y=net(x)
        pre_y=torch.argmax(y,dim=1)
        accur_tra=torch.sum(torch.eq(pre_y,target))/len(tra)
        #print('测试集正确率:',accur_tra)
    for idx,data in enumerate(eva_loader_tes):
        x=data[0]
        target=data[1]
        y=net(x)
        pre_y=torch.argmax(y,dim=1)
        accur_tes=torch.sum(torch.eq(pre_y,target))/len(tes)
        #print('测试集正确率:',accur_tes)
    return accur_tra,accur_tes

In [35]:
act_fun_list=[nn.ReLU,nn.ELU,nn.LeakyReLU,nn.Sigmoid,nn.Tanh,nn.Hardtanh]
act_fun_name=['ReLU','ELU','LeakyReLU','Sigmoid','Tanh','HardTanh']
torch.manual_seed(0)
for i in range(0,6):
    recorder=training(act_fun_list[i])
    Accuracy=get_accuracy(recorder[0])
    print(act_fun_name[i],'训练集正确率:',Accuracy[0]
          ,'测试集正确率:',Accuracy[1],'训练时间:',recorder[1])

ReLU 训练集正确率: tensor(0.9888) 测试集正确率: tensor(0.9854) 训练时间: 111.37960910797119
ELU 训练集正确率: tensor(0.9819) 测试集正确率: tensor(0.9765) 训练时间: 119.52012538909912
LeakyReLU 训练集正确率: tensor(0.9897) 测试集正确率: tensor(0.9870) 训练时间: 112.20423364639282
Sigmoid 训练集正确率: tensor(0.9816) 测试集正确率: tensor(0.9798) 训练时间: 113.3835723400116
Tanh 训练集正确率: tensor(0.9681) 测试集正确率: tensor(0.9713) 训练时间: 109.88344073295593
HardTanh 训练集正确率: tensor(0.9595) 测试集正确率: tensor(0.9607) 训练时间: 111.55760645866394
