In [13]:
import torch
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device


device(type='cpu')

In [14]:
from torchvision import datasets
from torchvision.transforms import ToTensor
train_data = datasets.MNIST(
    root = 'data',
    train = True,                         
    transform = ToTensor(), 
    download = True,            
)
test_data = datasets.MNIST(
    root = 'data', 
    train = False, 
    transform = ToTensor()
)

In [15]:
from torch.utils.data import DataLoader
loaders = {
    'train' : torch.utils.data.DataLoader(train_data, 
                                          batch_size=100, 
                                          shuffle=True, 
                                          num_workers=1),
    
    'test'  : torch.utils.data.DataLoader(test_data, 
                                          batch_size=100, 
                                          shuffle=True, 
                                          num_workers=1),
}
loaders

{'train': <torch.utils.data.dataloader.DataLoader at 0x1cf9d62d4c0>,
 'test': <torch.utils.data.dataloader.DataLoader at 0x1cf9d7bbac0>}

In [16]:
import torch.nn as nn
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(         
            nn.Conv2d(
                in_channels=1,              
                out_channels=16,            
                kernel_size=5,              
                stride=1,                   
                padding=2,                  
            ),                              
            nn.ReLU(),                      
            nn.MaxPool2d(kernel_size=2),    
        )
        self.conv2 = nn.Sequential(         
            nn.Conv2d(16, 32, 5, 1, 2),     
            nn.ReLU(),                      
            nn.MaxPool2d(2),                
        )
        # fully connected layer, output 10 classes
        self.out = nn.Linear(32 * 7 * 7, 10)
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        # flatten the output of conv2 to (batch_size, 32 * 7 * 7)
        x = x.view(x.size(0), -1)   
        output = self.out(x)
        return output, x    # return x for visualization

cnn = CNN()
print(cnn)

loss_func = nn.CrossEntropyLoss()   
loss_func

from torch import optim
optimizer = optim.Adam(cnn.parameters(), lr = 0.01)   
optimizer

CNN(
  (conv1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (out): Linear(in_features=1568, out_features=10, bias=True)
)


Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: False
    lr: 0.01
    maximize: False
    weight_decay: 0
)

In [17]:
from torch.autograd import Variable
num_epochs = 10
def train(num_epochs, cnn, loaders):
    
    cnn.train()
        
    # Train the model
    total_step = len(loaders['train'])
        
    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(loaders['train']):
            
            # gives batch data, normalize x when iterate train_loader
            b_x = Variable(images)   # batch x
            b_y = Variable(labels)   # batch y
            output = cnn(b_x)[0]               
            loss = loss_func(output, b_y)
            
            # clear gradients for this training step   
            optimizer.zero_grad()           
            
            # backpropagation, compute gradients 
            loss.backward()    
            # apply gradients             
            optimizer.step()                
            
            if (i+1) % 100 == 0:
                print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                       .format(epoch + 1, num_epochs, i + 1, total_step, loss.item()))
                pass
        
        pass
    
    
    pass
train(num_epochs, cnn, loaders)

Epoch [1/10], Step [100/600], Loss: 0.1881
Epoch [1/10], Step [200/600], Loss: 0.1315
Epoch [1/10], Step [300/600], Loss: 0.0949
Epoch [1/10], Step [400/600], Loss: 0.1304
Epoch [1/10], Step [500/600], Loss: 0.1094
Epoch [1/10], Step [600/600], Loss: 0.1024
Epoch [2/10], Step [100/600], Loss: 0.0288
Epoch [2/10], Step [200/600], Loss: 0.0714
Epoch [2/10], Step [300/600], Loss: 0.0159
Epoch [2/10], Step [400/600], Loss: 0.0540
Epoch [2/10], Step [500/600], Loss: 0.0379
Epoch [2/10], Step [600/600], Loss: 0.1152
Epoch [3/10], Step [100/600], Loss: 0.0255
Epoch [3/10], Step [200/600], Loss: 0.0466
Epoch [3/10], Step [300/600], Loss: 0.0162
Epoch [3/10], Step [400/600], Loss: 0.0276
Epoch [3/10], Step [500/600], Loss: 0.0450
Epoch [3/10], Step [600/600], Loss: 0.0185
Epoch [4/10], Step [100/600], Loss: 0.0227
Epoch [4/10], Step [200/600], Loss: 0.0121
Epoch [4/10], Step [300/600], Loss: 0.0598
Epoch [4/10], Step [400/600], Loss: 0.0647
Epoch [4/10], Step [500/600], Loss: 0.1176
Epoch [4/10