In [1]:
"""import modules"""
import torch
import torch.nn as nn
import numpy as np

**Fact:**
  - Softmax applies the exponential function to each element, and normalizes by dividing by the sum of all these exponentials
  - Squashes the output to be between 0 and 1 in probability
  - Sum of all probabilities is 1

In [4]:
"""define softmax function"""
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

In [11]:
"""using softmax function"""
x = np.array([2.0, 1.0, 0.1])
outputs = softmax(x)
print('Softmax numpy:', outputs)

x = torch.tensor([2.0, 1.0, 0.1])
outputs = torch.softmax(x, dim=0) # along values along first axis
print('Softmax torch:', outputs)

Softmax numpy: [0.65900114 0.24243297 0.09856589]
Softmax torch: tensor([0.6590, 0.2424, 0.0986])


In [13]:
"""
- Define cross entropy function
- Cross-entropy loss, or log loss, measures the performance of a classification model
- Output is a probability value between 0 and 1
- Loss increases as the predicted probability diverges from the actual label
"""
def cross_entropy(actual, predicted):
    EPS = 1e-15
    predicted = np.clip(predicted, EPS, 1 - EPS)
    loss = -np.sum(actual * np.log(predicted))
    return loss # / float(predicted.shape[0])

"""
- y must be one hot encoded:
 if class 0: [1 0 0]
 if class 1: [0 1 0]
 if class 2: [0 0 1]
"""
Y = np.array([1, 0, 0])

"""probabilities for predicted outcome (y)"""
Y_pred_good = np.array([0.7, 0.2, 0.1])
Y_pred_bad = np.array([0.1, 0.3, 0.6])

"""compute the loss: loss is less for better prediction"""
l1 = cross_entropy(Y, Y_pred_good)
l2 = cross_entropy(Y, Y_pred_bad)

print(f'Loss1 numpy: {l1:.4f}')
print(f'Loss2 numpy: {l2:.4f}')

Loss1 numpy: 0.3567
Loss2 numpy: 2.3026


In [14]:
"""
- CrossEntropyLoss in PyTorch (applies Softmax)
- nn.LogSoftmax + nn.NLLLoss
- NLLLoss = negative log likelihood loss
"""
loss = nn.CrossEntropyLoss() # parameters: predicted class, actual class

"""
- target is of size nSamples: 1 in this case
- each element has class label: 0, 1, or 2, menas three in this case
- Y (target) contains class labels, not one-hot encoded
"""
Y = torch.tensor([0]) # target sample

"""
- input is of size nSamples x nClasses = 1 x 3
- y_pred (input) must be raw, unnormalizes scores (logits) for each class, not softmax
"""
"""predicted outcome"""
Y_pred_good = torch.tensor([[2.0, 1.0, 0.1]]) # predicted 0
Y_pred_bad = torch.tensor([[0.5, 2.0, 0.3]])  # predicted 1

"""compute the loss: loss is less for better prediction"""
l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)

print(f'PyTorch Loss1: {l1.item():.4f}')
print(f'PyTorch Loss2: {l2.item():.4f}')


PyTorch Loss1: 0.4170
PyTorch Loss2: 1.8406


In [15]:
# get predictions
_, predictions1 = torch.max(Y_pred_good, 1) # 1 is dimensions here
_, predictions2 = torch.max(Y_pred_bad, 1)  # 1 is dimensions here
print(f'Actual class: {Y.item()}, \nY_pred1: {predictions1.item()}, \nY_pred2: {predictions2.item()}')

Actual class: 0, 
Y_pred1: 0, 
Y_pred2: 1


In [16]:
"""
Allows batch loss for multiple samples:
- target is of size nBatch = 3
- each element has class label: 0, 1, or 2
"""
Y = torch.tensor([2, 0, 1]) # target outcomes

"""
- input is of size nBatch x nClasses = 3 x 3
- Y_pred are logits (not softmax)
"""

"""predicted outcome"""
Y_pred_good = torch.tensor(
    [[0.1, 0.2, 3.9], # predict class 2
    [1.2, 0.1, 0.3], # predict class 0
    [0.3, 2.2, 0.2]]) # predict class 1

Y_pred_bad = torch.tensor(
    [[0.9, 0.2, 0.1], # predicted class 0
    [0.1, 0.3, 1.5],  # predicted class 2
    [1.2, 0.2, 0.5]]) # predicted class 0

"""compute the loss: loss is less for better prediction"""
l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)

print(f'Batch Loss1:  {l1.item():.4f}')
print(f'Batch Loss2: {l2.item():.4f}')

"""get predictions"""
_, predictions1 = torch.max(Y_pred_good, 1)
_, predictions2 = torch.max(Y_pred_bad, 1)
print(f'\nActual class: {Y}, \nY_pred1: {predictions1}, \nY_pred2: {predictions2}')

Batch Loss1:  0.2834
Batch Loss2: 1.6418

Actual class: tensor([2, 0, 1]), 
Y_pred1: tensor([2, 0, 1]), 
Y_pred2: tensor([0, 2, 0])


In [17]:
"""Binary classification"""
class NeuralNet1(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NeuralNet1, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU() # activation function
        self.linear2 = nn.Linear(hidden_size, 1) # 1 is the output size, alwys fixed in this case

    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # sigmoid at the end
        y_pred = torch.sigmoid(out)
        return y_pred

model = NeuralNet1(input_size=28*28, hidden_size=5)
criterion = nn.BCELoss() # binary cross entropy

In [18]:
"""Multiclass problem"""
class NeuralNet2(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet2, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # no softmax at the end
        return out

model = NeuralNet2(input_size=28*28, hidden_size=5, num_classes=3)
criterion = nn.CrossEntropyLoss()  # applies Softmax automatically, don't need to implement