In [2]:
from torch import nn
import torch

## [torch.nn.Softmax](https://pytorch.org/docs/stable/generated/torch.nn.Softmax.html#softmax)

Given a vector of real numbers $z = (z_{1}, z_{2}, ..., z_{n})$, the softmax function calculates the probability 
$p_{i}$ of the $i_{th}$ class as follows: 

$p_{i} = \frac{e^{z_{i}}}{\sum_{j=1}^{n}e^{z_{j}}}$

In [10]:
t1 = torch.randn(size=(2, 3, 4))
print("shape: ", t1.shape)
print("t1: \n", t1)

shape:  torch.Size([2, 3, 4])
t1: 
 tensor([[[ 1.1281e-03,  5.2460e-01, -2.0523e+00,  5.4018e-01],
         [-6.2819e-01, -7.6226e-01,  1.6928e-01,  6.9731e-02],
         [-1.1660e+00, -9.2012e-01,  7.7639e-01, -1.2223e+00]],

        [[ 2.5709e-01,  1.8134e+00,  9.5059e-01,  1.4146e-01],
         [ 7.5287e-01, -5.3057e-01, -6.7884e-01,  1.0876e+00],
         [-1.0602e+00,  1.5446e-01, -1.2389e+00, -7.4936e-01]]])


In [11]:
softmax_obj = nn.Softmax(dim=-1)
print("type: ", type(softmax_obj))
print(softmax_obj)

type:  <class 'torch.nn.modules.activation.Softmax'>
Softmax(dim=-1)


In [12]:
# Notice that the output of the softmax is always >= 1.
t2 = softmax_obj(t1)
print("shape: ", t2.shape)
print("t2: \n", t2)

shape:  torch.Size([2, 3, 4])
t2: 
 tensor([[[0.2207, 0.3726, 0.0283, 0.3784],
         [0.1638, 0.1433, 0.3637, 0.3292],
         [0.0980, 0.1254, 0.6839, 0.0927]],

        [[0.1158, 0.5492, 0.2318, 0.1032],
         [0.3432, 0.0951, 0.0820, 0.4797],
         [0.1522, 0.5128, 0.1273, 0.2077]]])


## [torch.nn.LogSoftmax](https://pytorch.org/docs/stable/generated/torch.nn.LogSoftmax.html#logsoftmax)


LogSoftmax is almost the same as Softmax. It just calculates the log of probabilities.

$LogSoftmax(i) = log(p_{i})$

In [14]:
t3 = torch.randn(size=(2, 3, 4))
print("shape: ", t3.shape)
print("t3: \n", t3)

shape:  torch.Size([2, 3, 4])
t3: 
 tensor([[[ 1.0472, -0.2998, -0.5796, -0.1466],
         [-0.6364,  0.0129, -1.3613,  1.4907],
         [ 0.3979, -1.8782,  0.1394, -0.0354]],

        [[ 0.9524, -0.0172,  0.1861,  0.3807],
         [-1.2351,  0.1988, -0.0174, -1.4718],
         [ 1.5263,  2.6209,  0.4102, -0.0599]]])


In [7]:
log_softmax_obj = nn.LogSoftmax(dim=-1)
print("type: ", type(log_softmax_obj))
print(log_softmax_obj)

type:  <class 'torch.nn.modules.activation.LogSoftmax'>
LogSoftmax(dim=-1)


In [15]:
# log(x) < 0 if x <= 1. Hence all the entires in t4 are <= 0. 
t4 = log_softmax_obj(t3)
print("shape: ", t4.shape)
print("t4: \n", t4)

shape:  torch.Size([2, 3, 4])
t4: 
 tensor([[[-0.5651, -1.9121, -2.1919, -1.7589],
         [-2.4672, -1.8179, -3.1921, -0.3401],
         [-0.9256, -3.2016, -1.1841, -1.3588]],

        [[-0.8790, -1.8486, -1.6453, -1.4507],
         [-2.2369, -0.8029, -1.0192, -2.4735],
         [-1.5086, -0.4140, -2.6247, -3.0948]]])
