## Softmax

$$softmax(y_i) = \frac{e^{y_i}}{\sum e^{y_j}}$$ 

In [1]:
# softmax function
# It transforms a vector of raw scores (logits) into probabilities

import torch
import torch.nn as nn

input = torch.tensor([2.0, 1.0, 0.1])

# Manual 
e = torch.exp(input)
output = e / torch.sum(e)

print(output)

# Import function
softmax = nn.Softmax(dim=0)
output = softmax(input)

print(output)


tensor([0.6590, 0.2424, 0.0986])
tensor([0.6590, 0.2424, 0.0986])


## Cross Entropy Loss

In [2]:

loss = nn.CrossEntropyLoss()

Y = torch.tensor([0])

# Real Y = [1, 0, 0] acutally is non hot encoded
Y_pred_good = torch.tensor([[2.0, 1.0, 0.1]])
Y_pred_bad = torch.tensor([[0.5, 2.0, 0.3]])

l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)

print(l1.item())
print(l2.item())

_, predictions1 = torch.max(Y_pred_good, 1)
_, predictions2 = torch.max(Y_pred_bad, 1)

print(predictions1)
print(predictions2)

0.4170299470424652
1.840616226196289
tensor([0])
tensor([1])


## Neural net with softmax

Final softmax layer is no required! The loss handles it.

<img src="../images/image1.png" width="800"/>


In [3]:

# Multiclass problem

class NeuralNet2(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet2, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # No softmax at the end
        return out

# Instantiate the model
model = NeuralNet2(input_size=28*28, hidden_size=5, num_classes=3)

# Define the loss function
criterion = nn.CrossEntropyLoss()  # Automatically applies softmax

<img src="../images/image2.png" width="800"/>

In [5]:
# Binary classification

class NeuralNet1(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NeuralNet1, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, 1)
    
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        y_pred = torch.sigmoid(out) #!!!
        return y_pred

# Instantiate the model
model = NeuralNet1(input_size=28*28, hidden_size=5)

# Define the loss function
criterion = nn.BCELoss() 