In [1]:
import torch
import torch.nn.functional as F
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np
from math import log, pi, exp

  warn(f"Failed to load image Python extension: {e}")


## Cross Entropy ##

nn.CrossEntropyLoss()

output: unnormalized logits

target: softmax probability  or  indices

In [156]:
ce_loss_fn = nn.CrossEntropyLoss()

output = torch.randn(2, 5, requires_grad=True)
target_prob = torch.randn(2, 5).softmax(dim=1)
target_index = torch.tensor([1, 3], dtype=torch.long)
print(output)
print(target_prob)
print(target_index)

tensor([[-0.6459,  1.8714, -0.2603,  0.3539,  1.1255],
        [ 0.4164,  0.3537, -0.0721, -0.9630, -2.2039]], requires_grad=True)
tensor([[0.3057, 0.1880, 0.1005, 0.2407, 0.1651],
        [0.0903, 0.3517, 0.0821, 0.3311, 0.1448]])
tensor([1, 3])


In [130]:
def CrossEntropyLoss(output, target):
    output_prob = torch.softmax(output, dim=-1)
    if target.dim() == 2:
        return -(torch.log(output_prob + 1e-10) * target).sum() / target.shape[0]
    elif target.dim() == 1:
        coorespondong_prob = output_prob[np.arange(output_prob.shape[0]), target]
        return -torch.log(coorespondong_prob + 1e-10).sum() / target.shape[0]
    
print(CrossEntropyLoss(output, target_prob))
print(ce_loss_fn(output, target_prob))
print(CrossEntropyLoss(output, target_index))
print(ce_loss_fn(output, target_index))

tensor(1.7768, grad_fn=<DivBackward0>)
tensor(1.7768, grad_fn=<DivBackward1>)
tensor(2.2193, grad_fn=<DivBackward0>)
tensor(2.2193, grad_fn=<NllLossBackward0>)


## NLL Loss ##

nn.NLLLoss()

output: log softmax probability

target: indices

In [231]:
nll_loss_fn = nn.NLLLoss()

output = torch.randn(2, 5)
target_index = torch.tensor([1, 3], dtype=torch.long)
print(output)
print(target_index)

tensor([[ 0.1699,  1.5913, -1.2887,  0.7471, -1.5609],
        [-1.1157,  0.5208,  0.3123, -0.4699, -1.6789]])
tensor([1, 3])


In [232]:
def NLLLoss(output_prob, target):
    coorespondong_prob = output_prob[np.arange(output_prob.shape[0]), target]
    return - coorespondong_prob.sum() / target.shape[0]

print(NLLLoss(torch.log(torch.softmax(output, -1)), target_index))
print(nll_loss_fn(torch.log(torch.softmax(output, -1)), target_index))

tensor(1.2368)
tensor(1.2368)


In [233]:
# same result
a1 = nll_loss_fn(torch.log(torch.softmax(output, dim=-1)), target_index)
a2 = ce_loss_fn(output, target_index)
print(a1)
print(a2)

tensor(1.2368)
tensor(1.2368)


## BCE Loss ##

nn.BCELoss()

output: individual probability  0 < output < 1

target: individual probability  0 < target < 1

In [223]:
bce_loss_fn = nn.BCELoss()

output = torch.randn(4, 2)
target_prob = torch.randn(4, 2)
print(output)
print(target_prob)

tensor([[-0.2785,  2.0216],
        [ 0.4317, -1.3540],
        [ 1.8208, -0.0185],
        [ 0.0481, -0.3824]], requires_grad=True)
tensor([[-0.2324, -0.1733],
        [ 0.3539, -0.1443],
        [ 1.6869, -0.3359],
        [ 1.3514,  2.2017]])


In [225]:
def BCELoss(output, target):
    return - (target * torch.log(output) + (1-target) * torch.log(1-output)).sum() / target.numel()

print(BCELoss(output.sigmoid(), target_prob.sigmoid()))
print(bce_loss_fn(output.sigmoid(), target_prob.sigmoid()))

tensor(0.7644, grad_fn=<DivBackward0>)
tensor(0.7644, grad_fn=<BinaryCrossEntropyBackward0>)


## KL Divergence Loss ##

nn.KLDivLoss()

output: log softmax probability

target: softmax probability

In [235]:
kl_loss_fn = nn.KLDivLoss()

output = torch.randn(4, 2)
target_prob = torch.randn(4, 2)
print(output)
print(target_prob)

tensor([[ 1.7335, -1.9119],
        [ 0.4035, -0.9658],
        [ 1.9480,  0.6130],
        [-0.3847, -1.7015]])
tensor([[ 0.1402, -0.6207],
        [-0.1955,  0.8656],
        [ 0.9659, -0.5352],
        [ 1.8111,  0.8578]])


In [242]:
def KLDivergenceLoss(output, target):
    return -(target * (output - torch.log(target))).sum() / target.numel()

print(KLDivergenceLoss(torch.log(torch.softmax(output, dim=-1)), torch.softmax(target_prob, dim=-1)))
print(kl_loss_fn(torch.log(torch.softmax(output, dim=-1)), torch.softmax(target_prob, dim=-1)))

tensor(0.1562)
tensor(0.1562)


In [259]:
def getEntropy(target_prob):
#     return torch.distributions.Categorical(probs = torch.softmax(target_prob, dim=-1)).entropy()
    return -(torch.softmax(target_prob, dim=-1) * torch.log(torch.softmax(target_prob, dim=-1))).sum(dim=-1)

kl_loss_fn = nn.KLDivLoss(reduction='none')
ce_loss_fn = nn.CrossEntropyLoss(reduction='none')
kl_loss = kl_loss_fn(torch.log(torch.softmax(output, dim=-1)), torch.softmax(target_prob, dim=-1))
ce_loss = ce_loss_fn(output, torch.softmax(target_prob, dim=-1))
entropy = getEntropy(target_prob)
print(kl_loss)
print(ce_loss)
print(entropy)

# KL divergence + entropy = CrossEntropy
print(kl_loss.sum(-1) + entropy)
print(ce_loss)


tensor([[-0.2437,  0.8047],
        [-0.2910,  0.9647],
        [ 0.0265, -0.0244],
        [-0.0639,  0.0765]])
tensor([1.1867, 1.2438, 0.4769, 0.6038])
tensor([0.6257, 0.5700, 0.4748, 0.5913])
tensor([1.1867, 1.2438, 0.4769, 0.6038])
tensor([1.1867, 1.2438, 0.4769, 0.6038])


## Cosine Similarity Loss ##

cos = nn.CosineSimilarity(dim=1, eps=1e-6)

input1: random

input2: random

In [271]:
input1 = torch.randn(5, 128)
input2 = torch.randn(5, 128)
cos = nn.CosineSimilarity(dim=1, eps=1e-6)

def CosineSimilarity(input1, input2, eps=1e-6):
    f1 = (input1 * input2).sum(-1)
    f2 = ((input1**2).sum(dim=-1)**0.5 * (input2**2).sum(dim=-1)**0.5)
    return f1 / torch.maximum(f2, torch.ones_like(f2) * eps)

print(CosineSimilarity(input1, input2))
print(cos(input1, input2))

tensor([ 0.0330, -0.0483,  0.0351, -0.0431,  0.1155])
tensor([ 0.0330, -0.0483,  0.0351, -0.0431,  0.1155])


In [277]:
cos_loss_fn = torch.nn.CosineEmbeddingLoss()
print(cos_loss_fn(input1, input2, torch.ones(5)))
print((1-cos(input1, input2)).sum() / input1.shape[0])

tensor(0.9816)
tensor(0.9816)
