# softmax回归从零开始实现

In [38]:
import torch
from IPython import display
from d2l import torch as d2l

batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

## 初始化模型参数

In [39]:
num_inputs = 784
num_outputs = 10

W = torch.normal(0, 0.01, size = (num_inputs, num_outputs), requires_grad= True)
b = torch.zeros(num_inputs, requires_grad=True)

In [40]:
X = torch.tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
X.sum(0, keepdim=True), X.sum(1, keepdim=True)

(tensor([[5., 7., 9.]]),
 tensor([[ 6.],
         [15.]]))

In [41]:
def softmax(X):
    X_exp = torch.exp(X)
    partition = X_exp.sum(1, keepdim=True)
    return X_exp / partition  # 这里应用了广播机制

In [42]:
X = torch.normal(0, 1, (2, 5))
X_prob = softmax(X)
X_prob, X_prob.sum(1)

(tensor([[0.0621, 0.0567, 0.0321, 0.8129, 0.0361],
         [0.1102, 0.1743, 0.0721, 0.1094, 0.5340]]),
 tensor([1., 1.]))

In [43]:
# def net(X):
#     return softmax(torch.matmul(X.reshape((-1, W.shape[0])), W) + b)
def net(X):
    return softmax(torch.matmul(X.reshape((-1, W.shape[0])), W) + b)

## 切片

In [44]:
y = torch.tensor([0, 2])
y_hat = torch.tensor([ [0.1, 0.3, 0.6], [0.3, 0.2, 0.5] ])
y_hat[[0, 1], y]

tensor([0.1000, 0.5000])

In [45]:
def cross_entropy(y_hat, y):
    return -torch.log(y_hat[range(len(y_hat)), y]) # 这里省略了一个1

cross_entropy(y_hat, y)

tensor([2.3026, 0.6931])

In [46]:
def accuracy(y_hat, y):
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
        y_hat = y_hat.argmax(axis=1)
    cmp = y_hat.type(y.dtype) == y
    return float(cmp.type(y.dtype).sum())

In [47]:
accuracy(y_hat, y) / len(y),y

(0.5, tensor([0, 2]))

In [48]:
class Accumulator:  #@save
    """在n个变量上累加"""
    def __init__(self, n):
        self.data = [0.0] * n

    def add(self, *args):
        self.data = [a + float(b) for a, b in zip(self.data, args)]

    def reset(self):
        self.data = [0.0] * len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

In [49]:
def evaluate_accuracy(net, data_iter):  #@save
    """计算在指定数据集上模型的精度"""
    if isinstance(net, torch.nn.Module):
        net.eval()  # 将模型设置为评估模式
    metric = Accumulator(2)  # 正确预测数、预测总数
    with torch.no_grad():
        for X, y in data_iter:
            metric.add(accuracy(net(X), y), y.numel())
    return metric[0] / metric[1]

In [50]:
evaluate_accuracy(net, test_iter)

RuntimeError: The size of tensor a (10) must match the size of tensor b (784) at non-singleton dimension 1