In [1]:
import torch
from torch import nn
import torch.nn.functional as F
from torchvision import datasets, transforms

In [2]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_set = datasets.MNIST("../Data/", download=False, train=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)

In [3]:
# feed-forward network
model = nn.Sequential(
    nn.Linear(784, 128),
    nn.ReLU(),
    nn.Linear(128, 64),
    nn.ReLU(),
    nn.Linear(64, 10)
)

# loss function
criterion = nn.CrossEntropyLoss()

# collect data
images, labels = next(iter(train_loader))

# flatten images
images = images.view(images.shape[0], -1)

# logits : pass in the raw output of our network into the loss, not the output of the softmax function
logits = model(images)

# calculate loss
loss = criterion(logits, labels)
print("Loss calculated : ", loss)

Loss calculated :  tensor(2.3206, grad_fn=<NllLossBackward>)


In [4]:
# build a model
# returns log-softmax as output
# calculate loss using negative log likelihood loss

model = nn.Sequential(
    nn.Linear(784, 128),
    nn.ReLU(),
    nn.Linear(128, 64),
    nn.ReLU(),
    nn.Linear(64, 10),
    nn.LogSoftmax(dim=1)
)
print(model)

criterion = nn.NLLLoss()

images, labels = next(iter(train_loader))
images = images.view(images.shape[0], -1)
logits = model(images)
loss = criterion(logits, labels)
print("loss calculated : ", loss)

Sequential(
  (0): Linear(in_features=784, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=64, bias=True)
  (3): ReLU()
  (4): Linear(in_features=64, out_features=10, bias=True)
  (5): LogSoftmax()
)
loss calculated :  tensor(2.3105, grad_fn=<NllLossBackward>)
