### Softmax

$$S(y_i)=\frac{e^{y_i}}{\sum{e^{y_j}}}$$

将数据转换为“概率”， 数据值越大，概率越大

In [1]:
import torch
import torch.nn as nn
import numpy as np

In [2]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0) # axis=0 表示对同一个 sample 中的 features 进行操作

x = np.array([2.0, 1.0, 0.1])
outputs = softmax(x)
print("softmax numpy:", outputs)

softmax numpy: [0.65900114 0.24243297 0.09856589]


In [4]:
x = torch.tensor([2.0, 1.0, 0.1])
outputs = torch.softmax(x, dim=0) # 相当于 axis=0
print(outputs)

tensor([0.6590, 0.2424, 0.0986])


### Cross-Entropy

$$ D(\hat{Y}, Y) = -\frac{1}{N} \cdot \sum{Y_i \cdot \text{log}(\hat{Y}_i)}$$

$Y$ 必须是 one-hot encoded

$\hat{Y}$ 是概率（softmax）

In [5]:
def cross_entropy(actual, predicted):
    loss = -np.sum(actual * np.log(predicted))
    return loss # / float(predicted.shape[0])

In [6]:
# \hat{Y} must be one hot encoded
# if class 0: [1 0 0]
# if class 1: [0 1 0]
# if class 2: [0 0 1]
Y = np.array([1, 0, 0])

# y_pred has probabilities
Y_pred_good = np.array([0.7, 0.2, 0.1])
Y_pred_bad = np.array([0.1, 0.3, 0.6])
l1 = cross_entropy(Y, Y_pred_good)
l2 = cross_entropy(Y, Y_pred_bad)
print(f"Loss1 numpy: {l1:.4f}")
print(f"Loss2 numpy: {l2:.4f}")

Loss1 numpy: 0.3567
Loss2 numpy: 2.3026


### Practice with pytorch

注意：在 nn.CrossEntropyLoss 中已经应用了 Softmax，所以我们不用再手动添加

No Softmax in last layer

Y has class labels, not One-Hot

Y_pred has raw scores (logits), no Softmax

In [12]:
loss = nn.CrossEntropyLoss()

# 3 samples
Y = torch.tensor([2, 0, 1])
# n_samples * n_features = 3 * 3
Y_pred_good = torch.tensor([[0.1, 1.0, 2.1], [2.0, 1.0, 0.1], [0.1, 3.0, 0.1]])
Y_pred_bad = torch.tensor([[0.5, 2.0, 0.3], [0.1, 1.0, 2.1], [2.0, 1.0, 0.1]])

l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)

print(l1.item())
print(l2.item())

# get the actual prediction
_, predictions1 = torch.max(Y_pred_good, 1)
_, predictions2 = torch.max(Y_pred_bad, 1)
print(predictions1)
print(predictions2)

0.3018244206905365
1.947229027748108
tensor([2, 0, 1])
tensor([1, 2, 0])


### 分类神经网络中的应用

In [19]:
# Multicalss problem
class NeuralNet2(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet2, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # no softmax at the end
        return out

In [20]:
model = NeuralNet2(input_size=28*28, hidden_size=5, num_classes=3)
criterion = nn.CrossEntropyLoss() # applies Softmax

### 二分类网络

In [26]:
# Binary classification
class NeuralNet1(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NeuralNet1, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, 1)
        
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # sigmoid at the end
        y_pred = torch.sigmoid(out)
        return y_pred

In [27]:
model = NeuralNet1(input_size=28*28, hidden_size=5)
criterion = nn.BCELoss()