In [2]:
import torch
import torch.nn as nn
import numpy as np

## Softmax

In [3]:
def softmax(x):
    # e**y/sum(e**y)
    return np.exp(x) / np.sum(np.exp(x), axis=0)

In [4]:
x = np.array([2.0, 1.0, 0.1])
outputs = softmax(x)
print('softmax numpy:', outputs)

softmax numpy: [0.65900114 0.24243297 0.09856589]


### Softmax with torch

In [5]:
x = torch.tensor([2.0, 1.0, 0.1])
outputs = torch.softmax(x, dim=0)
print(outputs)

tensor([0.6590, 0.2424, 0.0986])


## Cross Entropy Function

In [6]:
def cross_entropy(actual, predicted):
    loss = -np.sum(actual* np.log(predicted))
    return loss # float(predicted.shape[0])

In [7]:
Y = np.array([1,0,0])
Y_pred_good = np.array([0.7, 0.2, 0.1])
Y_pred_bad = np.array([0.1, 0.3, 0.6])

In [9]:
# The lower cross_entropy the better prediction
l1= cross_entropy(Y, Y_pred_good)
l2= cross_entropy(Y, Y_pred_bad)
print(f'Loss1 numpy: {l1:.4f}')
print(f'Loss2 numpy: {l2:.4f}')

Loss1 numpy: 0.3567
Loss2 numpy: 2.3026


## Cross Entropy with torch.nn

- DO NOT use Softmax in last layer: nn.CrossEntropyLoss has implicit Softmax (nn.LogSoftmax + nn.NLLLoss "negative log likelihood loss")
-  Y has class labels, no one-hot
-  Y_pred has raw scores(logits), no softmax

### Example for 1 class

In [37]:
loss = nn.CrossEntropyLoss()

In [38]:
Y = torch.tensor([0])
# nsamples x n classes = 1x3
Y_pred_good = torch.tensor([[2.0, 1.0, 0.1]])
Y_pred_bad = torch.tensor([[0.5, 2.0, 0.3]])

In [39]:
l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)
print(f'Loss1 numpy: {l1:.4f}')
print(f'Loss2 numpy: {l2:.4f}')

Loss1 numpy: 0.4170
Loss2 numpy: 1.8406


In [40]:
print(l1.item())
print(l2.item())

0.4170299470424652
1.840616226196289


In [41]:
_, predictions1 = torch.max(Y_pred_good, 1)
_, predictions2 = torch.max(Y_pred_bad, 1)
print(predictions1)
print(predictions2)

tensor([0])
tensor([1])


### Example for 3 classes

In [42]:
# 3 samples
Y = torch.tensor([2,0,1])
# nsamples x n classes = 3x3
Y_pred_good = torch.tensor([[0.1, 1.0, 2.1],
                            [2.0, 1.0, 0.1],
                            [0.1, 3.0, 0.1]
                           ])
Y_pred_bad = torch.tensor([[2.1, 1.0, 0.1],
                            [0.1, 1.0, 2.1],
                            [0.1, 3.0, 0.1]
                          ])

In [43]:
l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)
print(f'Loss1 numpy: {l1:.4f}')
print(f'Loss2 numpy: {l2:.4f}')

Loss1 numpy: 0.3018
Loss2 numpy: 1.6242


In [44]:
print(l1.item())
print(l2.item())

0.3018244206905365
1.6241613626480103


In [45]:
_, predictions1 = torch.max(Y_pred_good, 1)
_, predictions2 = torch.max(Y_pred_bad, 1)
print(predictions1)
print(predictions2)

tensor([2, 0, 1])
tensor([0, 2, 1])


## Binary Neural Net With SoftMax

In [52]:
class NeuralNet1(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NeuralNet1, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, 1)

    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # sigmoid at the end
        y_pred = torch.sigmoid(out)
        return y_pred

In [53]:
model = NeuralNet1(input_size=28*28, hidden_size=5)
model

NeuralNet1(
  (linear1): Linear(in_features=784, out_features=5, bias=True)
  (relu): ReLU()
  (linear2): Linear(in_features=5, out_features=1, bias=True)
)

In [None]:
criterion = nn.BCELoss() #(applies Softmax)

## Multiclass Neural Net With Softmax

In [57]:
class NeuralNet2(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet2, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # no softmax at the end
        return out

In [58]:
model = NeuralNet2(input_size=28*28, hidden_size=5, num_classes=3)
model

NeuralNet2(
  (linear1): Linear(in_features=784, out_features=5, bias=True)
  (relu): ReLU()
  (linear2): Linear(in_features=5, out_features=3, bias=True)
)

In [59]:
criterion = nn.CrossEntropyLoss() #(applies Softmax)