# PyTorch Tutorial 11 - Softmax and Cross Entropy

https://www.youtube.com/watch?v=7q7E91pHoW4&list=PLqnslRFeH2UrcDBWF5mfPGpqQDSta6VK4&index=11

In [1]:
import torch
import torch.nn as nn
import numpy as np

# softmax

### handwritten softmax function

In [2]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

In [3]:
x = np.array([2., 1, 0.1])
outputs = softmax(x)
print('softmax numpy:', outputs)

softmax numpy: [0.65900114 0.24243297 0.09856589]


In [4]:
sum(outputs)

1.0

### built-in softmax from torch

In [5]:
x = torch.tensor([2., 1, 0.1])
outputs = torch.softmax(x, dim=0)
print('softmax torch:', outputs)

softmax torch: tensor([0.6590, 0.2424, 0.0986])


In [6]:
sum(outputs)

tensor(1.0000)

# cross entropy

### handwritten cross entropy function

In [7]:
def cross_entropy(actual, predicted):
    loss = -np.sum(actual * np.log(predicted))
    return loss

In [8]:
# Y must be one hot encoded
# if class 0: [1, 0, 0]
# if class 1: [0, 1, 0]
# if class 2: [0, 0, 1]

Y = np.array([1, 0, 0])

In [9]:
# Y_pred probabilities

Y_pred_good = np.array([0.7, 0.2, 0.1])
Y_pred_bad = np.array([0.1, 0.3, 0.6])

In [10]:
l1 = cross_entropy(Y, Y_pred_good)
l2 = cross_entropy(Y, Y_pred_bad)

In [11]:
print(f'loss 1 numpy: {l1:.4f}')
print(f'loss 2 numpy: {l2:.4f}')

loss 1 numpy: 0.3567
loss 2 numpy: 2.3026


### built-in cross entropy from torch, 1 sample

In [12]:
loss = nn.CrossEntropyLoss()

In [13]:
# Y must be NOT one hot encoded

# 1 sample
Y = torch.tensor([0])

In [14]:
# Y_pred probabilities
# n_samples * n_clasees = 1 * 3

Y_pred_good = torch.tensor([[2., 1, 0.1]])  # the dimension is an array of arrays; softmax shall not be applied
Y_pred_bad = torch.tensor([[0.5, 2, 0.3]])

In [15]:
l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)

In [16]:
print(f'loss 1 torch: {l1.item()}')
print(f'loss 2 torch: {l2.item()}')

loss 1 torch: 0.4170299470424652
loss 2 torch: 1.840616226196289


##### predictions

In [17]:
torch.max(Y_pred_good, axis=1)

torch.return_types.max(
values=tensor([2.]),
indices=tensor([0]))

In [18]:
_, predictions1 = torch.max(Y_pred_good, axis=1)
_, predictions2 = torch.max(Y_pred_bad, axis=1)

predictions1, predictions2

(tensor([0]), tensor([1]))

### built-in cross entropy from torch, 3 samples

In [19]:
loss = nn.CrossEntropyLoss()

In [20]:
# Y must be NOT one hot encoded

# 3 samples
Y = torch.tensor([2, 0, 1])

In [21]:
# Y_pred probabilities
# n_samples * n_clasees = 3 * 3

Y_pred_good = torch.tensor([[1., 0.1, 2.1], [2., 1, 0.1], [1., 2, 0.1]])  # the dimension is an array of arrays; 
                                                                          # softmax shall not be applied
Y_pred_bad = torch.tensor([[0.5, 2, 0.3], [1.5, 0.2, 0.3], [1.5, 0.2, 0.3]])

In [22]:
l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)

In [23]:
print(f'loss 1 torch: {l1.item()}')
print(f'loss 2 torch: {l2.item()}')

loss 1 torch: 0.40603378415107727
loss 2 torch: 1.415835976600647


## nn.CrossEntropyLoss applies nn.LogSoftmax + nn.NLLLoss    (Negative Log Likelihood Loss)    -----> No Softmax in last layer!

## Y has class labels, not One-Hot!
## Y_pred has raw scores (logits), no Softmax!

## NN example - binary classification

In [24]:
class NeuralNet1(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NeuralNet1, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, 1)  # 1 feature out - binary classification!!!
        
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # sigmoid at the end
        y_pred = torch.sigmoid(out)
        return y_pred

In [25]:
model = NeuralNet1(input_size=28*28, hidden_size=5)
criterion = nn.BCELoss()

## NN example - multi-class classification

In [26]:
class NeuralNet2(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet2, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, num_classes)  # some feature out - multi-class classification!!!
        
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # no softmax at the end
        return out

In [27]:
model = NeuralNet2(input_size=28*28, hidden_size=5, num_classes=3)
criterion = nn.CrossEntropyLoss()  # (applies Softmax)