# 多层全连接前向网络

## 多层全连接神经网络实现MNIST手写数字分类

In [2]:
from torch import nn
# 定义三层全连接神经网络
class simpleNet(nn.Module):
    def __init__(self,in_dim,n_hidden1,n_hidden2,out_dim):
        super(simpleNet,self).__init__()
        self.layer1 = nn.Linear(in_dim,n_hidden1)
        self.layer2 = nn.Linear(n_hidden1,n_hidden2)
        self.layer3 = nn.Linear(n_hidden2,out_dim)
        
    def forward(self,x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        return x

In [3]:
# 添加激活函数
class Activation_Net(nn.Module):
    def __init__(self,in_dim,n_hidden1,n_hidden2,out_dim):
        super(Activation_Net,self).__init__()
        # Sequential是将网络的层组合到一起
        self.layer1 = nn.Sequential(
            nn.Linear(in_dim,n_hidden1),
            nn.ReLU(True)
        )
        self.layer2 = nn.Sequential(
            nn.Linear(n_hidden1,n_hidden2),
            nn.ReLU(True)
        )
        # 输出层不能添加激活函数
        self.layer3 = nn.Sequential(nn.Linear(n_hidden2,out_dim))
    
    def forward(self,x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        return x
        

In [6]:
# 添加激活函数
class Batch_Net(nn.Module):
    def __init__(self,in_dim,n_hidden1,n_hidden2,out_dim):
        super(Batch_Net,self).__init__()
        self.layer1 = nn.Sequential(
            nn.Linear(in_dim,n_hidden1),
            # 批标准化一般放在全连接的后面，激活函数的前面
            nn.BatchNorm1d(n_hiddn1),
            nn.ReLU(True)
        )
        self.layer2 = nn.Sequential(
            nn.Linear(n_hidden1,n_hidden2),
            nn.BatchNorm1d(n_hiddn2),
            nn.ReLU(True)
        )
        # 输出层不能添加激活函数
        self.layer3 = nn.Sequential(nn.Linear(n_hidden2,out_dim))
        
    def forward(self,x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        return x

In [1]:
# 训练网络 MNIST数据集：0-9 55000张训练集，10000张测试集,5000验证集 28*28 灰度图
import torch
from torch import nn,optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import datasets,transforms

import net # 这个是之前定义的simpleNet网络



In [2]:
# 定义超参数
batch_size = 64
learning_rate = 1e-2
num_epoches = 20

In [3]:
# 数据标准化
# transforms.ToTensor() 就是将图片转换为PyTorch中处理的对象Tensor，自动将图像标准化
# transforms.Normalize(),需要传入两个参数，一个是均值，一个是方差。处理是减去均值，除以方差
# transforms.Compose将各种预处理操作组合在一起
data_tf = transforms.Compose(
    [transforms.ToTensor(),
    transforms.Normalize([0.5],[0.5])] # 将图片转化为-1到1之间 灰度图 单通道
)


In [4]:
# 读取数据集
train_dataset = datasets.MNIST(
    root='./data',train=True,transform=data_tf,download=True)
test_dataset = datasets.MNIST(root='./data',train=False,transform=data_tf)
train_loader = DataLoader(train_dataset,batch_size=batch_size,shuffle=True)
test_loader = DataLoader(test_dataset,batch_size=batch_size,shuffle=False)

In [6]:
# 导入网络，定义损失函数和优化方法
model = net.simpleNet(28*28,300,100,10) # 输入维度28*28，最后输出结果必须是10，因为这是个分类问题，0-9 这10个数字，10分类
if torch.cuda.is_available():
    model = model.cuda()
    
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(),lr = learning_rate)


In [22]:
# 训练 首先循环epoch，将数据转为Variable并放入cuda
# losses = []
# acces = []
for epoch in range(num_epoches):
    train_loss = 0
    train_acc = 0
    for data in train_loader:
        img,label = data
        # img.size(0)指 将Tensor转为1维。这个-1指的是不知道多少列的情况下，根据原来Tensor内容和Tensor的大小自动分配列数
        # 还可以将行数不定（为1），指定列数为Tensor的大小： a.view(-1, a.size(0))
        img = img.view(img.size(0),-1)
        if torch.cuda.is_available():
            img = Variable(img).cuda()
            label = Variable(label).cuda()
        else:
            img = Variable(img)
            label = Variable(label)
            
        #forward 计算out ,loss,,acc
        out = model(img)
        loss = criterion(out,label)
        # print_loss = loss.item()*label.size(0)
        # 记录误差
        train_loss += loss.item()*label.size(0)

        # torch.max()这个函数返回的是两个值，第一个值是具体的value（用下划线_表示），第二个值是value所在的index（也就是pred）,而我们只关心第二个值，也就是pred，因为后面要用pred值来与label进行比较，来表示预测的正确与否，第一个值（概率）对我们而言并不重要，所以用下划线代替（当然可以用别的东西代替），习惯上都是用下划线来表示不关心的类别。
        _,pred = torch.max(out,1)
        correct = (pred == label).sum()
        acc = correct.item()
        train_acc += acc
        #backward 优化梯度归零，损失反向传播，优化下一步
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
#     losses.append(train_loss / len(train_dataset))
#     acces.append(train_acc / len(train_dataset))
    #输出
    print('epoch is {},Train Loss : {:.6f},Train Acc is {:.6f}'.format((epoch+1),train_loss/len(train_dataset),train_acc/len(train_dataset)))


epoch is 1,Train Loss : 0.065797,Acc is 0.978833
epoch is 2,Train Loss : 0.059826,Acc is 0.981133
epoch is 3,Train Loss : 0.056155,Acc is 0.982100
epoch is 4,Train Loss : 0.052137,Acc is 0.983850
epoch is 5,Train Loss : 0.048715,Acc is 0.984450
epoch is 6,Train Loss : 0.047643,Acc is 0.984567
epoch is 7,Train Loss : 0.044675,Acc is 0.985750
epoch is 8,Train Loss : 0.043586,Acc is 0.985750
epoch is 9,Train Loss : 0.040834,Acc is 0.986800
epoch is 10,Train Loss : 0.036825,Acc is 0.988517
epoch is 11,Train Loss : 0.036058,Acc is 0.988133
epoch is 12,Train Loss : 0.033897,Acc is 0.988983
epoch is 13,Train Loss : 0.032842,Acc is 0.989550
epoch is 14,Train Loss : 0.031238,Acc is 0.989683
epoch is 15,Train Loss : 0.028138,Acc is 0.990833
epoch is 16,Train Loss : 0.027085,Acc is 0.991033
epoch is 17,Train Loss : 0.026385,Acc is 0.991583
epoch is 18,Train Loss : 0.025478,Acc is 0.992000
epoch is 19,Train Loss : 0.023766,Acc is 0.991883
epoch is 20,Train Loss : 0.021904,Acc is 0.992900


In [23]:
# 验证
model.eval()
eval_loss = 0
eval_acc = 0
for data in test_loader:
        img,label = data
        # img.size(0)指 将Tensor转为1维。这个-1指的是不知道多少列的情况下，根据原来Tensor内容和Tensor的大小自动分配列数
        # 还可以将行数不定（为1），指定列数为Tensor的大小： a.view(-1, a.size(0))
        img = img.view(img.size(0),-1)
        if torch.cuda.is_available():
            img = Variable(img).cuda()
            label = Variable(label).cuda()
        else:
            img = Variable(img)
            label = Variable(label)
            
        #forward 计算out ,loss,,acc
        out = model(img)
        loss = criterion(out,label)
        # print_loss = loss.item()*label.size(0)
        # 记录误差
        eval_loss += loss.item()*label.size(0)

        # torch.max()这个函数返回的是两个值，第一个值是具体的value（用下划线_表示），第二个值是value所在的index（也就是pred）,而我们只关心第二个值，也就是pred，因为后面要用pred值来与label进行比较，来表示预测的正确与否，第一个值（概率）对我们而言并不重要，所以用下划线代替（当然可以用别的东西代替），习惯上都是用下划线来表示不关心的类别。
        _,pred = torch.max(out,1)
        correct = (pred == label).sum()
        acc = correct.item()
        eval_acc += acc

        
#     losses.append(train_loss / len(train_dataset))
#     acces.append(train_acc / len(train_dataset))
    #输出
print('Test Loss : {:.6f},Test Acc is {:.6f}'.format(eval_loss/len(test_dataset),eval_acc/len(test_dataset)))


Test Loss : 0.093930,Acc is 0.975100
