In [40]:
import torch
import numpy as np
import sys
sys.path.append("..")
import d2lzh_pytorch as d2l


In [41]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)


# 从0实现多层感知机

In [55]:
# 定义模型参数
input_dim = 784
hidden_dim = 256
output_dim = 10

w1 = torch.tensor(np.random.normal(0, 0.01, (input_dim, hidden_dim)), dtype = torch.float).requires_grad_(requires_grad=True)
b1 = torch.zeros(hidden_dim, dtype = torch.float).requires_grad_(requires_grad=True)
w2 = torch.tensor(np.random.normal(0, 0.01, (hidden_dim, output_dim)), dtype = torch.float).requires_grad_(requires_grad=True)
b2 = torch.zeros(output_dim, dtype = torch.float).requires_grad_(requires_grad=True)

params = [w1,b1,w2,b2]

In [43]:
# 定义激活函数

def relu(X):
    return torch.max(input=X, other=torch.tensor(0.0))


def sigmoid(X):
    X_exp = X.exp()
    return 1 / (1 + X_exp)

def tanh(X):
    X_exp = torch.exp(-2 * X)
    return (1 - X_exp) / (1 + X_exp)

In [44]:
# 定义模型
def MLP(X):
    X = X.view(-1, input_dim)
    H = relu(torch.matmul(X, w1) + b1)
    O = torch.matmul(H, w2) + b2
    return O

In [45]:
# 定义损失函数
# crossentropy 自己的有问题
def crossEntropy(y_hat, y):
    return - torch.log(y_hat.gather(1, y.view(-1, 1)))
myloss = crossEntropy

# 注意，pytorch的交叉熵默认是平均的
loss = torch.nn.CrossEntropyLoss()



In [46]:
# 训练
num_epochs, lr = 5, 100.0

def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
        n += y.shape[0]
    return acc_sum / n
# 本函数已保存在d2lzh包中方便以后使用
def train(net, train_iter, test_iter, loss, num_epochs, batch_size,
              params=None, lr=None, optimizer=None):
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        for X, y in train_iter:
            y_hat = net(X)
            l = loss(y_hat, y).sum()

            # 梯度清零
            if optimizer is not None:
                optimizer.zero_grad()
            elif params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()

            l.backward()
            if optimizer is None:
                d2l.sgd(params, lr, batch_size)
            else:
                optimizer.step()  # “softmax回归的简洁实现”一节将用到


            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
            n += y.shape[0]
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))

train(MLP, train_iter, test_iter, loss, num_epochs, batch_size, params, lr)

epoch 1, loss 0.0031, train acc 0.708, test acc 0.755
epoch 2, loss 0.0019, train acc 0.824, test acc 0.820
epoch 3, loss 0.0016, train acc 0.846, test acc 0.784
epoch 4, loss 0.0016, train acc 0.855, test acc 0.832
epoch 5, loss 0.0015, train acc 0.863, test acc 0.831


In [56]:
num_epochs, lr = 5, 100.0
d2l.train_ch3(MLP, train_iter, test_iter, loss, num_epochs, batch_size, params, lr)

epoch 1, loss 0.0031, train acc 0.713, test acc 0.797
epoch 2, loss 0.0019, train acc 0.823, test acc 0.828
epoch 3, loss 0.0017, train acc 0.843, test acc 0.829
epoch 4, loss 0.0015, train acc 0.856, test acc 0.841
epoch 5, loss 0.0015, train acc 0.862, test acc 0.850


# 利用pytorch的简洁实现

In [66]:
# 定义一个falttern层
class FlatternLayer(torch.nn.Module):
    def __init__(self):
        super(FlatternLayer,self).__init__()
    def forward(self,X):
        return X.view(X.shape[0], -1)
    
model = torch.nn.Sequential(
    FlatternLayer(),
    torch.nn.Linear(input_dim, hidden_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(hidden_dim, output_dim)
)

for params in model.parameters():
    torch.nn.init.normal_(params, mean=0, std=0.01)
        

In [69]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
loss = torch.nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(model.parameters(), lr=0.5)

num_epochs = 5
train(model, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)


epoch 1, loss 0.0031, train acc 0.700, test acc 0.776
epoch 2, loss 0.0019, train acc 0.819, test acc 0.817
epoch 3, loss 0.0017, train acc 0.842, test acc 0.853
epoch 4, loss 0.0015, train acc 0.857, test acc 0.828
epoch 5, loss 0.0014, train acc 0.862, test acc 0.849
