In [5]:
import torch
import torch.nn as nn
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.optim as optim
import numpy as np

In [6]:
trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (1.0,))])
train_set = dset.MNIST(root='./mnist', train=True, transform=trans, download=True)
test_set = dset.MNIST(root='./mnist', train=False, transform=trans)

batch_size = 50

#divide the set to training and validation
from torch.utils.data.sampler import SubsetRandomSampler
num_train = len(train_set)
valid_size = 0.1
indices = list(range(num_train))
split = int(np.floor(valid_size * num_train))
train_idx, valid_idx = indices[split:], indices[:split]
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

train_loader = torch.utils.data.DataLoader(
                 dataset=train_set,
                 batch_size=batch_size,                 
                 sampler=train_sampler)

valid_loader = torch.utils.data.DataLoader(
                 dataset=train_set,
                 batch_size=batch_size,                 
                 sampler=valid_sampler)


test_loader = torch.utils.data.DataLoader(
                dataset=test_set,
                batch_size=1,
                shuffle=False)

print('number of training data:', len(train_set)-split)
print('number of validation data:', split)
print('number of test data:', len(test_set))
print(train_loader)

('number of training data:', 54000)
('number of validation data:', 6000)
('number of test data:', 10000)
<torch.utils.data.dataloader.DataLoader object at 0x11a5e0a90>


In [7]:
class LeNet(nn.Module):
    def __init__(self,n_class=10):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(
            in_channels = 1,
            out_channels = 20,
            kernel_size = 5
        )
        self.conv2 = nn.Conv2d(
            in_channels = 20,
            out_channels = 50,
            kernel_size = 5
        ) 
        self.fc1 = nn.Linear(4*4*50, 500)
        self.fc2 = nn.Linear(500, n_class)
    def forward(self, x):
        x = F.relu(self.conv1(x))   # x:[batch_size,1,28,28] => x:[batch_size,20, 24, 24]
        x = F.max_pool2d(x, 2, 2)   # x:[batch_size,20,24,24] => x:[batch_size,20, 12, 12]
        x = F.relu(self.conv2(x))   # x:[batch_size,20,12,12] => x:[batch_size,50, 8, 8]
        x = F.max_pool2d(x, 2, 2)   # x:[batch_size,50,8,8] => x:[batch_size,50, 4, 4]
        x = x.view(-1, 4*4*50)      # x:[batch_size,50,4,4] => x:[batch_size,50*4*4]
        x = F.relu(self.fc1(x))     # x:[batch_size,50*4*4] => x:[batch_size,500]
        x = self.fc2(x)             # x:[batch_size,500] => x:[batch_size,10]
        return x

model = LeNet()
print(model)

LeNet(
  (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(20, 50, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=800, out_features=500, bias=True)
  (fc2): Linear(in_features=500, out_features=10, bias=True)
)


In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)


# images are 28 x 28 pixels, 784 in total. Each pixel has value between 0 and 255
train_loss = []
for epoch in range(10):
    # trainning
    ave_loss = 0
    for batch_idx, (x, target) in enumerate(train_loader):
        optimizer.zero_grad()
        x.requires_grad_()
        logits = model(x)
        exit()
        loss = criterion(logits, target)
        ave_loss = ave_loss * 0.9 + loss * 0.1
        train_loss.append(loss)
        loss.backward()
        optimizer.step()
        if (batch_idx+1) % 100 == 0 or (batch_idx+1) == len(train_loader):
            print('==>>> epoch: {}, batch index: {}, train loss: {:.6f}'.format(
                epoch, batch_idx+1, ave_loss))
    # testing
    correct_cnt, ave_loss = 0, 0
    total_cnt = 0
    preds = []
    for batch_idx, (x, target) in enumerate(valid_loader):
        logits = model(x)
        loss = criterion(logits, target)
        _, pred_label = torch.max(logits, 1)
        preds.append(pred_label)
        total_cnt += x.size()[0]
        correct_cnt += (pred_label == target).sum()
        # smooth average
        ave_loss = ave_loss * 0.9 + loss * 0.1
        
        if(batch_idx+1) % 100 == 0 or (batch_idx+1) == len(valid_loader):
            print('==>>> epoch: {}, batch index: {}, validation loss: {:.6f}, validation acc: {:.3f}'.format(
                epoch, batch_idx+1, ave_loss, correct_cnt.item() * 1.0 / total_cnt))

==>>> epoch: 0, batch index: 100, train loss: 0.249803
==>>> epoch: 0, batch index: 200, train loss: 0.169985
==>>> epoch: 0, batch index: 300, train loss: 0.105871
==>>> epoch: 0, batch index: 400, train loss: 0.099077
==>>> epoch: 0, batch index: 500, train loss: 0.082924
==>>> epoch: 0, batch index: 600, train loss: 0.072157
==>>> epoch: 0, batch index: 700, train loss: 0.070156
==>>> epoch: 0, batch index: 800, train loss: 0.067351
==>>> epoch: 0, batch index: 900, train loss: 0.092248
==>>> epoch: 0, batch index: 1000, train loss: 0.086294
==>>> epoch: 0, batch index: 1080, train loss: 0.063827
==>>> epoch: 0, batch index: 100, validation loss: 0.045092, validation acc: 0.986
==>>> epoch: 0, batch index: 120, validation loss: 0.062884, validation acc: 0.985
==>>> epoch: 1, batch index: 100, train loss: 0.067302
==>>> epoch: 1, batch index: 200, train loss: 0.041300
==>>> epoch: 1, batch index: 300, train loss: 0.064998
==>>> epoch: 1, batch index: 400, train loss: 0.044731
==>>> e