In [1]:
import torch
import numpy as np
from d2l import torch as d2l
from IPython import display

In [2]:
batch_size = 256
train_iter,test_iter = d2l.load_data_fashion_mnist(batch_size)

In [3]:
num_inputs = 784
num_outputs = 10

W = torch.normal(0, 0.01, size=(num_inputs, num_outputs), requires_grad=True)
b = torch.zeros(num_outputs, requires_grad=True)

In [4]:
#softmax计算实现
def softmax(X):
    
    X_exp = torch.exp(X)
    
    partition = X_exp.sum(dim=1, keepdim=True)
    
    return X_exp / partition  # 这里应用了广播机制

#简单测试
X = torch.normal(0,1,(2,5))

X_prob = softmax(X)

X_prob,X_prob.sum(dim = 1)

(tensor([[0.0413, 0.4877, 0.1412, 0.2704, 0.0594],
         [0.7549, 0.0906, 0.0525, 0.0718, 0.0301]]),
 tensor([1.0000, 1.0000]))

In [5]:
#softmax回归模型
def net(X):
    return softmax(torch.matmul(X.reshape(-1,W.shape[0]),W)+b)

# def net(X):
#     return softmax(torch.mm(X.view((-1, num_inputs)), W) + b)


In [6]:
y_hat = torch.tensor([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]])
y = torch.tensor([0, 2])
y_hat[range(len(y_hat)),y]

tensor([0.1000, 0.5000])

In [7]:
##交叉熵实现
def cross_entropy(y_hat,y):
    return -torch.log(y_hat[range(len(y_hat)),y])

cross_entropy(y_hat,y)

tensor([2.3026, 0.6931])

In [8]:
##预测正确率的计算
def accuracy(y_hat,y):
    
    if len(y_hat.shape)>1 and y_hat.shape[1] >1:
        
        y_hat = y_hat.argmax(axis =1) #y_hat矩阵中每一行里最大值的下标被找出来，得到一个一维数组，再赋值给yhat
                                      #这里每行的最大值下标，代表的就正是每行最后的分类结果，即最终预测为第几类
            
    cmp = y_hat.type(y.dtype) == y    #两个矩阵里的每个元素相比较，对应位置相同的元素，则为TRUE，反之Flase，最后存在cmp里
     
    return float(cmp.type(y.dtype).sum())  #将cmp这个列向量里的所有1求和，得到的总和即为预测正确的个数

accuracy(y_hat,y)/len(y)              #预测正确的总个数，除以总共样本数，得到准确率

0.5

In [9]:
# 评价net在数据集上的准确率
# 本函数已保存在d2lzh_pytorch包中方便以后使用。该函数将被逐步改进：它的完整实现将在“图像增广”一节中描述
def evaluate_accuracy(data_iter, net):
    
    acc_sum, n = 0.0, 0
    
    for X, y in data_iter:
        
        prd_X = net(X)
        
        prd_X = prd_X.argmax(axis=1)
        
        cmp = prd_X.type(y.dtype) == y
        
        acc_sum += float(cmp.type(y.dtype).sum())
        
        n += y.shape[0]
    
    return acc_sum / n

print(evaluate_accuracy(test_iter, net))

0.0825


In [10]:
num_epochs, lr = 5, 0.1

# 本函数已保存在d2lzh包中方便以后使用
def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
              params=None, lr=None, optimizer=None):
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        for X, y in train_iter:
            y_hat = net(X)
            l = loss(y_hat, y).sum()

            # 梯度清零
            if optimizer is not None:
                optimizer.zero_grad()
            elif params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()

            l.backward()
            if optimizer is None:
                d2l.sgd(params, lr, batch_size)
            else:
                optimizer.step()  # “softmax回归的简洁实现”一节将用到


            train_l_sum += l
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum()
            n += y.shape[0]
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))

train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size, [W, b], lr)


epoch 1, loss 0.7848, train acc 0.748, test acc 0.789
epoch 2, loss 0.5694, train acc 0.814, test acc 0.802
epoch 3, loss 0.5252, train acc 0.826, test acc 0.820
epoch 4, loss 0.5016, train acc 0.831, test acc 0.826
epoch 5, loss 0.4862, train acc 0.836, test acc 0.827


In [11]:
def get_fashion_mnist_labels(labels):  # @save
    """返回Fashion-MNIST数据集的文本标签。"""
    text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
                   'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
    return [text_labels[int(i)] for i in labels]
X, y = iter(test_iter).next()
true_labels = get_fashion_mnist_labels(y.numpy())
pred_labels = get_fashion_mnist_labels(net(X).argmax(dim=1).numpy())
titles = [true + '\n' + pred for true, pred in zip(true_labels, pred_labels)]
show_images(X[0:9], titles[0:9])

NameError: name 'show_images' is not defined