In [16]:
import torch
import torch.nn as nn
import numpy as np

In [17]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

In [36]:
x = np.array([2.0, 1.0, 0.1])
softmax(x)

array([0.65900114, 0.24243297, 0.09856589])

In [37]:
x = torch.from_numpy(x)
torch.softmax(x, dim=0)

tensor([0.6590, 0.2424, 0.0986], dtype=torch.float64)

In [38]:
def cross_entropy(actual, predicted):
    predicted = np.clip(predicted, EPS, 1 - EPS) #
    loss = -np.sum(actual * np.log(predicted))
    return loss

In [39]:
# y must be one hot encoded
# if class 0: [1 0 0]
# if class 1: [0 1 0]
# if class 2: [0 0 1]
Y = np.array([1, 0, 0])
Y_pred_good = np.array([0.7, 0.2, 0.1])
Y_pred_bad = np.array([0.1, 0.3, 0.6])
l1 = cross_entropy(Y, Y_pred_good)
l2 = cross_entropy(Y, Y_pred_bad)
print(f'Loss1 numpy: {l1:.4f}')
print(f'Loss2 numpy: {l2:.4f}')

Loss1 numpy: 0.3567
Loss2 numpy: 2.3026


In [40]:
loss = nn.CrossEntropyLoss()
# this function already include "onehot" & "softmax"
# nn.CrossEntropyLoss == nn.LogSoftmax + nn.NLLLoss

In [45]:
# 3 samples
Y = torch.tensor([2, 0, 1])

# nsamples x nclasses = 3x3
Y_pred_good = torch.tensor(
    [[0.1, 0.2, 3.9], # predict class 2
    [1.2, 0.1, 0.3], # predict class 0
    [0.3, 2.2, 0.2]]) # predict class 1

Y_pred_bad = torch.tensor(
    [[0.9, 0.2, 0.1],
    [0.1, 0.3, 1.5],
    [1.2, 0.2, 0.5]])

In [46]:
l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)
l1.item(),l2.item()

(0.28342217206954956, 1.6418448686599731)

In [47]:
_, predictions1 = torch.max(Y_pred_good, 1)
_, predictions2 = torch.max(Y_pred_bad, 1)
predictions1, predictions2

(tensor([2, 0, 1]), tensor([0, 2, 0]))

In [None]:
# Binary classification
class NeuralNet1(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NeuralNet1, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, 1)  
    
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # sigmoid at the end
        y_pred = torch.sigmoid(out)
        return y_pred

model = NeuralNet1(input_size=28*28, hidden_size=5)
criterion = nn.BCELoss()

In [None]:
# Multiclass problem
class NeuralNet2(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet2, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, num_classes)  
    
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # no softmax at the end
        return out

model = NeuralNet2(input_size=28*28, hidden_size=5, num_classes=3)
criterion = nn.CrossEntropyLoss()  # (applies Softmax)