<a href="https://colab.research.google.com/github/AlexeyTri/PyTorchTutorials/blob/main/Tutorial9.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# SoftMax and CrossEntropy

```

        -> 2.0              -> 0.65  
 Linear -> 1.0  -> Softmax  -> 0.25   -> CrossEntropy(y, y_hat)
        -> 0.1              -> 0.1                   

     scores(logits)      probabilities
                           sum = 1.0

```



In [94]:
import torch
import torch.nn as nn
import numpy as np

# SoftMax
applies the exponential function to each element, and normilizes by dividing by the sum of all these exponentials 

-> squashes the output to be between 0 and 1 = probability sum of all probabilities is 1

In [95]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

In [96]:
x = np.array([2.0, 1.0, 0.1])
outputs = softmax(x)
outputs

array([0.65900114, 0.24243297, 0.09856589])

In [99]:
x = torch.tensor([2.0, 1.0, 0.1])
outputs = torch.softmax(x, dim=0)
outputs

tensor([0.6590, 0.2424, 0.0986])

# Cross entropy or log loss

measures the perfomance of a classification model whose output is a probability value between 0 and 1

-> loss increases as the predicted probability diverges from the actual label

In [101]:
def cross_entropy(actual, predicted):
    EPS = 1e-15
    predicted = np.clip(predicted, EPS, 1 - EPS)
    loss = -np.sum(actual * np.log(predicted))
    return loss # / float(predicted.shape[0])

In [102]:
# y must be one hot encoded
# if class 0: [1 0 0]
# if class 1: [0 1 0]
# if class 2: [0 0 1]
Y = np.array([1, 0, 0])
Y_pred_good = np.array([0.7, 0.2, 0.1])
Y_pred_bad = np.array([0.1, 0.3, 0.6])
l1 = cross_entropy(Y, Y_pred_good)
l2 = cross_entropy(Y, Y_pred_bad)
l1, l2

(0.35667494393873245, 2.3025850929940455)

# CrossEntropyLoss in PyTorch (applies Softmax)

* nn.LogSoftmax + nn.NLLLoss
* NLLLoss = negative log likelihood loss


loss(input, target)

target element has class label:0,1,2

Y (=target) contains class labels, not one-hot

input is of size nSamples x nClasses = 1 x 3

y_pred (=input) must be raw, unnormalizes scores (logits) for each class, not softmax

In [103]:
loss = nn.CrossEntropyLoss()

In [107]:
Y = torch.tensor([0])
Y

tensor([0])

In [108]:
Y_pred_good = torch.tensor([[2.0, 1.0, 0.1]])
Y_pred_bad = torch.tensor([[0.5, 2.0, 0.3]])
l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)
l1,l2

(tensor(0.4170), tensor(1.8406))

In [109]:
# get predictions
_, predictions1 = torch.max(Y_pred_good, 1)
_, predictions2 = torch.max(Y_pred_bad, 1)
predictions1, predictions2

(tensor([0]), tensor([1]))

*  allows batch loss for multiple samples

In [110]:
# target is of size nBatch = 3
# each element has class label: 0, 1, or 2
Y = torch.tensor([2, 0, 1])

In [111]:
# input is of size nBatch x nClasses = 3 x 3
# Y_pred are logits (not softmax)
Y_pred_good = torch.tensor(
    [[0.1, 0.2, 3.9], # predict class 2
    [1.2, 0.1, 0.3], # predict class 0
    [0.3, 2.2, 0.2]]) # predict class 1

Y_pred_bad = torch.tensor(
    [[0.9, 0.2, 0.1],
    [0.1, 0.3, 1.5],
    [1.2, 0.2, 0.5]])

In [112]:
l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)
l1,l2

(tensor(0.2834), tensor(1.6418))

In [113]:
# get predictions
_, predictions1 = torch.max(Y_pred_good, 1)
_, predictions2 = torch.max(Y_pred_bad, 1)
predictions1, predictions2

(tensor([2, 0, 1]), tensor([0, 2, 0]))

## Binary classification

In [115]:
class NeuralNet1(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NeuralNet1, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, 1)

    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        y_pred = torch.sigmoid(out)
        return y_pred

model = NeuralNet1(input_size=28*28, hidden_size=5)
criterion = nn.BCELoss()

## Multiclass problem

In [116]:
class NeuralNet2(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet2, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, num_classes)  
    
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # no softmax at the end
        return out

model = NeuralNet2(input_size=28*28, hidden_size=5, num_classes=3)
criterion = nn.CrossEntropyLoss()  # (applies Softmax)