Let's see a real scenario on toy dataset

In [9]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

If you are using a GPU (if you have one definitely use it)  
You should know one thing about the GPU training.  
Your data and your model should be located at the same place to start training.  
You can always move your data or model by using .to(device)
or give function to as argument

In [10]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Let's set our hyper parameters which are the one of the most important thing for our training

In [15]:
# Hyper-parameters 
input_size = 28 * 28    # 784
num_classes = 10
hidden_size = 500
num_epochs = 5
batch_size = 100
learning_rate = 0.001

Let's use our datasets

In [16]:
# MNIST dataset (images and labels)
train_dataset = torchvision.datasets.MNIST(root='data', 
                                           train=True, 
                                           transform=transforms.ToTensor(),
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='data', 
                                          train=False, 
                                          transform=transforms.ToTensor())

# Data loader (input pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)

We know how to use trained torch models but what if we want to create a custom dataset  
P.S. network architecture creation is very hard if your knowledge is limited. So customizing known architecture is good place to start.

In [17]:
# Fully connected neural network with one hidden layer
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)  
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

In [18]:
# We create the model and move it to the device (GPU or CPU) default is CPU
model = NeuralNet(input_size, hidden_size, num_classes).to(device)

In [19]:
# We define our loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

Now important thing is we should write a training loop.  
There could be so many different training loop as you want but every training loop has same simple logic

In [20]:
# Train the model
total_step = len(train_loader)
# At each epoch we iterate over all the dataset
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):  
        # Move labels and images to the configured device
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        
        # Forward pass through the model
        outputs = model(images)
        # Calculate your loss
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        # If you don't accumulate the gradients then you should
        # zero grads before calculate them
        optimizer.zero_grad()
        loss.backward()
        # Make a step with your optimizer
        optimizer.step()
        
        # To monitor the training process we print or log some useful values 
        if (i+1) % 200 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))


Epoch [1/5], Step [100/600], Loss: 0.2192
Epoch [1/5], Step [200/600], Loss: 0.2566
Epoch [1/5], Step [300/600], Loss: 0.1970
Epoch [1/5], Step [400/600], Loss: 0.2340
Epoch [1/5], Step [500/600], Loss: 0.2046
Epoch [1/5], Step [600/600], Loss: 0.1533
Epoch [2/5], Step [100/600], Loss: 0.0468
Epoch [2/5], Step [200/600], Loss: 0.0737
Epoch [2/5], Step [300/600], Loss: 0.1283
Epoch [2/5], Step [400/600], Loss: 0.1604
Epoch [2/5], Step [500/600], Loss: 0.1259
Epoch [2/5], Step [600/600], Loss: 0.1315
Epoch [3/5], Step [100/600], Loss: 0.0486
Epoch [3/5], Step [200/600], Loss: 0.0244
Epoch [3/5], Step [300/600], Loss: 0.0645
Epoch [3/5], Step [400/600], Loss: 0.0469
Epoch [3/5], Step [500/600], Loss: 0.0660
Epoch [3/5], Step [600/600], Loss: 0.0527
Epoch [4/5], Step [100/600], Loss: 0.0383
Epoch [4/5], Step [200/600], Loss: 0.0488
Epoch [4/5], Step [300/600], Loss: 0.2054
Epoch [4/5], Step [400/600], Loss: 0.0812
Epoch [4/5], Step [500/600], Loss: 0.0246
Epoch [4/5], Step [600/600], Loss:

In [21]:
# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
# This with no grad saves you about forgetting gradient freezing
# Within no_grad context nothing will compute or keep gradients
# If you don't use no_grad your memory and  time usage will be higher
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        # If you don't use no_grad context you can use
        # model.eval() function 
        # When you use it your model enters to evaluation mode (no grad calculation) 
        # You know we calculate local gradients when we do forward pass 
        outputs = model(images)
        
        # Get predictions and calculate your accuracy
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))


Accuracy of the network on the 10000 test images: 97.91 %


Awesome in a less than a minute you solved digit recognition problem with high accuracy.  
Let's save our model.

In [22]:
# Save the model checkpoint
torch.save(model.state_dict(), 'model.ckpt')