In [1]:
import torch
import torch.nn.functional as F
import numpy as np

In [60]:
# input x:
# e.g. 1 pic from MNIST dataset, which has shape of (28,28), flattened as (1,28*28)
x = torch.randn(1,28*28)
# weight w:
# according to convention of pytorch, parameter matrix are defined as:
# [channel_out, channel_in]
w = torch.randn(10,28*28)
# bias b
b = torch.randn(1,10)
# logits: output before activation function
# logits shape: x@w.t() + b = (1,784)@(784,10) + (1,10) = (1,10)
logits = x@w.t() + b
# target t
t = torch.tensor([3])

# method 1:
# F.cross_entropy() comprises of: softmax + cross entropy loss
# so usually if we apply CEL as error function and call F.cross_entropy()
# we don't need to extra do softmax, just input 'logits'
CEL = F.cross_entropy(logits, t)
print(CEL)

# method2 :
# manually apply softmax to squeeze logits into [0,1]
# and according to formular of cross entropy loss to apply it: CEL = -sum(p_i * log(p_i))
pred = F.softmax(logits, dim=1)
pred_log = torch.log(pred)
# F.nll_loss(input, target): The negative log likelihood loss
# https://pytorch.org/docs/stable/generated/torch.nn.NLLLoss.html#torch.nn.NLLLoss
# so input should be values which are alreay taken log-operation
manul_cel = F.nll_loss(pred_log, t)
print(manul_cel)

tensor(56.3692)
tensor(56.3692)
