In [1]:
import torch
import torch.nn as nn
import numpy as np

In [2]:
def softmax(x):
  return np.exp(x) / np.sum(np.exp(x), axis=0) # denom: sum of all the 'x' values exponentiated

In [3]:
x = np.array([2.0, 1.0, 0.1])
outputs = softmax(x)
print('softmax numpy:', outputs)

softmax numpy: [0.65900114 0.24243297 0.09856589]


In [4]:
# we can use the built-in torch.softmax() to get approximately the same results
x = torch.tensor([2.0, 1.0, 0.1])
outputs = torch.softmax(x, dim=0)
print(outputs)

tensor([0.6590, 0.2424, 0.0986])


In [5]:
def cross_entropy(actual, predicted):
  loss = -np.sum(actual * np.log(predicted))
  return loss # / float(predicted.shape[0]) - if we wanted to normalize it

In [6]:
# one hot encoding for Y
Y = np.array([1, 0, 0])

Y_pred_good = np.array([0.7,0.2,0.1])
Y_pred_bad = np.array([0.1,0.3,0.6])
l1 = cross_entropy(Y, Y_pred_good)
l2 = cross_entropy(Y, Y_pred_bad)
print(f'Loss1 numpy: {l1:.4f}')
print(f'Loss2 numpy: {l2:.4f}')

Loss1 numpy: 0.3567
Loss2 numpy: 2.3026


In [17]:
# now let's implement cross entropy using PyTorch directly
loss = nn.CrossEntropyLoss()  

# 3 possible classes: '0' '1' and '2'
Y = torch.tensor([2, 0, 1])

# n_samples x n_classes = 3x3
# our Y is [2,0,1] which means that the first sample tensor should be class 2, the second class 0, ...
# in Y_pred_good, the first tensor is class 2 (has its highest val at index 2), ...
Y_pred_good = torch.tensor([[2.0, 1.0, 2.1], [2.2, 1.0, 0.1], [2.0, 3.0, 2.1]])
Y_pred_bad = torch.tensor([[2.1, 1.0, 0.1], [0.5, 1.0, 0.1], [2.0, 3.0, 2.1]])

# now compute the Cross-Entropy Loss with multiple samples
l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)

print(l1.item())
print(l2.item())

# lets get/print all our predictions
# torch.max returns a tuple: (the maximum values, their corresponding indices)
# computing torch.max(..., 1) on dimension 1 (the rows)
_, predictions1 = torch.max(Y_pred_good, 1)
_, predictions2 = torch.max(Y_pred_bad, 1)
print(predictions1)
print(predictions2)

0.5773889422416687
1.3857358694076538
tensor([2, 0, 1])
tensor([0, 1, 1])
