In [None]:
from torch import nn

def create_model():
    # Build a feed-forward network
    input_size = 784 #28x28
    output_size = 10
    model = nn.Sequential(nn.Linear(input_size, 128), #Performs W.x + b
                          nn.ReLU(),                  #Adds Non-Linearity
                          nn.Linear(128, 64),
                          nn.ReLU(),
                          nn.Linear(64, output_size),
                          nn.LogSoftmax(dim=1))

    return model

model=create_model()

At the end of the network we used LogSoftmax.

This means, we will use negative log likelihood loss as cost function.

Remember that the CrossEntropyLoss() function performs LogSoftmax() on the output of the neural network so we do not need to add it

In [None]:
# Add cost function and optimizer
cost =nn.NLLLoss() # Negative Log Likelihood loss
# cost = nn.CrossEntropyLoss()  # Performs Softmax Internally
# cost = nn.MSELoss()  # Used for regression

# the simplest optimizer we can use is the SGD() or stochastic gradient descent optimizer
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
#optimizer = optim.Adagrad(model.parameters(), lr=0.001)#Adagrad()
#optimizer = optim.Adam(model.parameters(), lr=0.001)#Adam()

momentum helps to speed up gradient descent (and hence the learning process) when nearing a minima



## Train

In [None]:
import torch
from torchvision import datasets, transforms
from torch import nn, optim

training_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),       # Data Augmentation
    transforms.ToTensor(),                        # Transforms image to range of 0 - 1
    transforms.Normalize((0.1307,), (0.3081,))    # Normalizes image
    ])

testing_transform = transforms.Compose([          # No Data Augmentation for test transform
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
    ])

trainset = datasets.<your data="" here="">('data/', download=True, train=True, transform=training_transform)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)

In [None]:
def train(model, train_loader, cost, optimizer, epoch):
model.train()
for e in range(epoch):
  running_loss=0
  correct=0
  for data, target in train_loader:                                 # Iterates through batches
      data = data.view(data.shape[0], -1)                           # Reshapes data
      
      #Remember that pytorch will accumulate gradients by default. So for every batch, we need to reset it to zero.
      # Also to save memory
      optimizer.zero_grad()                                         # Resets gradients for new batch
      pred = model(data)                                            # Runs Forwards Pass
      loss = cost(pred, target)                                     # Calculates Loss
      running_loss+=loss 
      loss.backward()                                               # Calculates Gradients for Model Parameters
      optimizer.step()                                              # Updates Weights
      pred=pred.argmax(dim=1, keepdim=True)
      correct += pred.eq(target.view_as(pred)).sum().item()         # Checks how many correct predictions where made
  print(f"Epoch {e}: Loss {running_loss/len(train_loader.dataset)}, Accuracy {100*(correct/len(train_loader.dataset))}%")

In [None]:
def train(model, train_loader, cost, optimizer, epoch):
    model.train()
    for e in range(epoch):
        running_loss=0
        correct=0
        for data, target in train_loader:
            data = data.view(data.shape[0], -1)
            optimizer.zero_grad()
            pred = model(data)
            loss = cost(pred, target)
            running_loss+=loss
            loss.backward()
            optimizer.step()
            pred=pred.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
        print(f"Epoch {e}: Loss {running_loss/len(train_loader.dataset)}, Accuracy {100*(correct/len(train_loader.dataset))}%")

In [None]:
def test(model, test_loader):
    model.eval()
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data = data.view(data.shape[0], -1)
            output = model(data)
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    print(f'Test set: Accuracy: {correct}/{len(test_loader.dataset)} = {100*(correct/len(test_loader.dataset))}%)')
