In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision.datasets import MNIST
import torchvision.transforms as transforms
import matplotlib.pyplot as plt



In [3]:
''' load MNIST database '''
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])
dataset_path = '../mnist_dataset'

train_dataset = MNIST(dataset_path, transform=transform, train=True, download=True)
valid_dataset = MNIST(dataset_path, transform=transform, train=False, download=True)
test_dataset = MNIST(dataset_path, transform=transform, train=False, download=True)


In [5]:
''' load MNIST dataset by using dataloader'''
batch_size = 64

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                         batch_size=batch_size,
                         shuffle=True)
valid_loader = torch.utils.data.DataLoader(dataset=valid_dataset,
                         batch_size=batch_size,
                         shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                         batch_size=batch_size,
                         shuffle=False)


In [6]:
''' test data loader'''
print(len(train_loader))
for batch_idx, (image, label) in enumerate(train_loader):
    if (batch_idx + 1) % 100 == 0:
        print(image.shape, label.shape)


938
torch.Size([64, 1, 28, 28]) torch.Size([64])
torch.Size([64, 1, 28, 28]) torch.Size([64])
torch.Size([64, 1, 28, 28]) torch.Size([64])
torch.Size([64, 1, 28, 28]) torch.Size([64])
torch.Size([64, 1, 28, 28]) torch.Size([64])


KeyboardInterrupt: 

In [7]:
''' test implementation '''
# prepare network input 
x_batch, y_batch = iter(train_loader).next()
batch_size, n_chn, hor_dim, ver_dim = x_batch.size()

x = x_batch.view(batch_size, -1)

# define neural network 
fc1_dim = 128
fc2_dim = 128
out_dim = 10

fc1 = nn.Linear(hor_dim * ver_dim, fc1_dim)
fc2 = nn.Linear(fc1_dim, fc2_dim)
fc3 = nn.Linear(fc2_dim, out_dim)

x = F.relu(fc1(x))
x = F.relu(fc2(x))
x = fc3(x)

print(x_batch.size())
print(x.size(), y_batch.size())

torch.Size([64, 1, 28, 28])
torch.Size([64, 10]) torch.Size([64])


In [8]:
''' Model class definition '''
class MLP(nn.Module):
    def __init__(self, in_dim, fc1_dim, fc2_dim, out_dim):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(in_dim, fc1_dim)
        self.fc2 = nn.Linear(fc1_dim, fc2_dim)
        self.fc3 = nn.Linear(fc2_dim, out_dim)
        
    def forward(self, x):
        x = x.view(-1, x.size(-3) * x.size(-2) * x.size(-1))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model = MLP(28 * 28, 128, 128, 10)
print(model)


MLP(
  (fc1): Linear(in_features=784, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=10, bias=True)
)


In [9]:
''' Training criteria and optimizer definition '''
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
print(criterion, optimizer)

CrossEntropyLoss() Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.0001
    weight_decay: 0
)


In [10]:
''' Train network '''
num_epochs = 5
model.train()

for epoch in range(num_epochs):
    loss_avg = 0.
    for image, label in train_loader:
        model_out = model(image)
        loss = criterion(model_out, label)
        loss.backward()
        optimizer.step()
        loss_avg += loss / len(train_loader)
    print('Epoch: {:} \tLoss: {:.6f}'.format(
        epoch+1, loss_avg))

Epoch: 1 	Loss: 0.929087
Epoch: 2 	Loss: 0.403054
Epoch: 3 	Loss: 0.317698
Epoch: 4 	Loss: 0.299654
Epoch: 5 	Loss: 0.276334
