# Softmax and Crossentropy

![Softmax](./images/04-softmax.png)

![Softmax-formula](./images/04-1-softmax-formula.png)

In [1]:
import torch
import torch.nn as nn
import numpy as np

In [2]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis = 0)

In [3]:
x = np.array([2.0, 2.0, 1.0])
output = softmax(x)
print('softmax numpy: ', output)

softmax numpy:  [0.4223188 0.4223188 0.1553624]


In [4]:
x = torch.tensor([2.0, 2.0, 1.0])
ouputs = torch.softmax(x, dim=0) #to compute along first axis
print('softmax tensor: ', output)

softmax tensor:  [0.4223188 0.4223188 0.1553624]


## Crossentropy
Cross-entropy loss, or log loss, measures the performance of a classification model whose output is a probability value between 0 and 1. Cross-entropy loss increases as the predicted probability diverges from the actual label.

![crossentropy](./images/05-crossentropy.png)


better our predictions, lower is our loss


![crossentropy-example](./images/05-1-crossentropy-example.png)
for cross-entropy we require one-hot encoding

In [5]:
import torch
import torch.nn as nn
import numpy as np

In [8]:
def cross_entropy(actual, predicted):
    loss = -np.sum(actual * np.log(predicted))
    return loss

y = np.array([1,0,0]) # one hot encoded predictions

y_pred_good = np.array([0.7, 0.2, 0.1])
y_pred_bad = np.array([0.1, 0.3, 0.6])
l1 = cross_entropy(y, y_pred_good)
l2 = cross_entropy(y, y_pred_bad)


print(f'loss1 numpy: {l1:.4f}')
print(f'loss2 numpy: {l2:.4f}')


loss1 numpy: 0.3567
loss2 numpy: 2.3026


In [9]:
# let's implement this in pytorh now

In [10]:
# pytorch --> crossentropy - logsoftmax + negative log likelihood
# don't do one hot encoding

loss = nn.CrossEntropyLoss()
y = torch.tensor([0]) # not one hot 0 class is best

#nsamples X nclasses = 1 X 3
y_pred_good = torch.tensor([[2.0, 1.0, 0.1]]) # 0 has the heighest value
y_pred_bad = torch.tensor([[0.5, 2.0, 0.3]]) # 1 has the highest value

l1 = loss(y_pred_good, y)
l2 = loss(y_pred_bad, y)

print(f'loss1 numpy: {l1.item():.4f}')
print(f'loss2 numpy: {l2.item():.4f}')



loss1 numpy: 0.4170
loss2 numpy: 1.8406


In [11]:
# to get actual predictions

In [12]:
_, predictions1 = torch.max(y_pred_good, 1) # along the 1first dimension
_, predictions2 = torch.max(y_pred_bad, 1) # along the 1first dimension


print(predictions1)
print(predictions2)


tensor([0])
tensor([1])


In [13]:
# let's do for multiple y prediction value

# pytorch --> crossentropy - logsoftmax + negative log likelihood
# don't do one hot encoding

loss = nn.CrossEntropyLoss()
y = torch.tensor([2, 0, 1]) # not one hot 0 class is best

#nsamples X nclasses = 3 X 3
y_pred_good = torch.tensor([[0.1, 1.0, 2.1], [2.0, 1.0, 0.1], [1.0, 3.0, 0.1]]) # 0 has the heighest value
y_pred_bad = torch.tensor([[2.1, 1.0, 0.3], [0.1, 1.0, 2.3], [0.1, 3.0, 0.1]]) # 1 has the highest value

l1 = loss(y_pred_good, y)
l2 = loss(y_pred_bad, y)

print(f'loss1 numpy: {l1.item():.4f}')
print(f'loss2 numpy: {l2.item():.4f}')



loss1 numpy: 0.3251
loss2 numpy: 1.6110


In [14]:
_, predictions1 = torch.max(y_pred_good, 1) # along the 1first dimension
_, predictions2 = torch.max(y_pred_bad, 1) # along the 1first dimension


print(predictions1)
print(predictions2)


tensor([2, 0, 1])
tensor([0, 2, 1])


![06-neural-net-with-softmax.png](./images/06-neural-net-with-softmax.png)


In [15]:
# for multiclass classifications

In [17]:
import torch
import torch.nn as nn

# multiclass problem
class MultiClassNeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(MultiClassNeuralNet, self).__init__()
        self.linear = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, num_classes)
    
    def forward(self,x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # no softmax at the end
        return out


In [18]:
model = MultiClassNeuralNet(input_size = 28*28, hidden_size = 5, num_classes = 3)
criterion = nn.CrossEntropyLoss() # applies softmax

![07-neural-net-with-sigmoid.png](./images/07-neural-net-with-sigmoid.png)

In [23]:
import torch
import torch.nn as nn

# binaryclass problem
class BinaryClassNeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(BinaryClassNeuralNet, self).__init__()
        self.linear = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, 1)
    
    def forward(self,x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # no softmax at the end
        y_pred = torch.sigmoid(out)
        return y_pred

In [24]:
model = BinaryClassNeuralNet(input_size = 28*28, hidden_size = 5)
criterion = nn.BCELoss() # applies softmax