In [1]:
import torch
import torch.nn as nn
import numpy as np


## Softmax function

In [2]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

In [8]:
x = np.array([2.0, 1.0, 0.1])

outputs = softmax(x)
print("softmax numpy:", outputs.round(4))

softmax numpy: [0.659  0.2424 0.0986]


In [7]:
x = torch.tensor([2.0, 1.0, 0.1])
outputs = torch.softmax(x, dim=0)
print("softmax torch:", outputs)

softmax torch: tensor([0.6590, 0.2424, 0.0986])


## Cross Entropy

In [11]:
def cross_entropy(actual, predicted):
    loss = np.sum(actual * np.log(predicted))
    return loss

In [12]:
y_actual = np.array([1, 0, 0])

y_pred_good = np.array([0.7, 0.2, 0.1])
y_pred_bad = np.array([0.1, 0.3, 0.6])

l1 = cross_entropy(y_actual, y_pred_good)
l2 = cross_entropy(y_actual, y_pred_bad)

print(f"Loss1 numpy: {l1:.4f}")
print(f"Loss2 numpy: {l2:.4f}")

Loss1 numpy: -0.3567
Loss2 numpy: -2.3026


### Cross Entropy using Pytorch

In [None]:
### NOTE ###
# Careful

# nn.CrossEntropyLoss already applies in nn.LogSoftmax + nn.NLLLoss (negative log likelihood loss)
# -> No Softmax in the last layer

# Y has class labels, -> not One-Hot
# Y_pred has raw scores (logits) -> no Softmax


# But, after BCELoss(), we must use Sigmoid function
# Sigmoid function for binary classification
# Softmax function is for multiclass classification


In [21]:
loss = nn.CrossEntropyLoss()


Y = torch.tensor([0])
# nsamples * nclasses 
Y_pred_good = torch.tensor([[2.0, 1.0, 0.1]])
Y_pred_bad = torch.tensor([[0.5, 2.0, 0.3]])

l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)

print(l1.item())
print(l2.item())


_, predictions1 = torch.max(Y_pred_good, 1)
_, predictions2 = torch.max(Y_pred_bad, 1)

print("***********")
print(predictions1)
print(predictions2)


0.31326165795326233
1.7014133930206299
***********
tensor([0])
tensor([1])


In [30]:
loss = nn.CrossEntropyLoss()


Y = torch.tensor([2, 0, 1]) # # Target labels for 3 samples
# nsamples * nclasses = 3 
Y_pred_good = torch.tensor([[2.0, 1.0, 0.1], [2.0, 3.0, 0.1], [4.0, 3.0, 0.1]]) # Predictions for 3 samples
Y_pred_bad = torch.tensor([[0.5, 2.0, 0.3], [2.0, 1.0, 4.1], [2.0, 3.0, 5.1]]) # Predictions for 3 samples

l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)

print(l1.item())
print(l2.item())


_, predictions1 = torch.max(Y_pred_good, 1)
_, predictions2 = torch.max(Y_pred_bad, 1)

print("***********")
print(predictions1)
print(predictions2)


1.6658931970596313
2.1834516525268555
***********
tensor([0, 1, 0])
tensor([1, 2, 2])


#### Multiclass problem

In [31]:
class NeuralNet2(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet2, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # no softmax at the end
        return out
    
model = NeuralNet2(input_size=28*28, hidden_size=5, num_classes=3)
criterion = nn.CrossEntropyLoss() # applies Softmax already

In [32]:
model

NeuralNet2(
  (linear1): Linear(in_features=784, out_features=5, bias=True)
  (relu): ReLU()
  (linear2): Linear(in_features=5, out_features=3, bias=True)
)

In [33]:
criterion

CrossEntropyLoss()

### Binary Classification

In [34]:
class NeuralNet1(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet1, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # sigmoid at the end
        out = torch.sigmoid(out)
        return out
    
model = NeuralNet1(input_size=28*28, hidden_size=5, num_classes=3)
criterion = nn.BCELoss()

In [35]:
model

NeuralNet1(
  (linear1): Linear(in_features=784, out_features=5, bias=True)
  (relu): ReLU()
  (linear2): Linear(in_features=5, out_features=3, bias=True)
)

In [36]:
criterion

BCELoss()