In [1]:
import torch
import torchvision
import matplotlib.pylab as plt

In [2]:
# Select Target Device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# for reproducibility
torch.manual_seed(1)
if device == 'cuda':
    torch.cuda.manual_seed_all(1)

In [3]:
# Define parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100

In [4]:
# MNIST dataset
mnist_train = torchvision.datasets.MNIST(root='../MNIST_data/',
                          train=True,
                          transform=torchvision.transforms.ToTensor(),
                          download=True)

mnist_test = torchvision.datasets.MNIST(root='../MNIST_data/',
                         train=False,
                         transform=torchvision.transforms.ToTensor(),
                         download=True)
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

In [5]:
class CNN(torch.nn.Module):
    def __init__(self):
        super(CNN, self).__init__() 
        self.keep_prob = 0.5
        # Layer 1 : 
        # In  : (28, 28, 1)
        # Conv: (28, 28, 32) - Filter No.32
        # Pool: (14, 14, 32)
        self.layer1 = torch.nn.Sequential(
            torch.nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2))
        
        # Layer 2 : 
        # In  : (14, 14, 32)
        # Conv: (14, 14, 64) - Filter No.64
        # Pool: ( 7,  7, 64)
        self.layer2 = torch.nn.Sequential(
            torch.nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2))

        # Layer 3 : 
        # In  : ( 7,  7, 64)
        # Conv: ( 7,  7, 128) - Filter No.128
        # Pool: ( 4,  4, 128)
        self.layer3 = torch.nn.Sequential(
            torch.nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=1))
        
        # Fully-Connected Layer :
        # 4 x 4 x 128 -> 625
        self.fc1 = torch.nn.Linear(4*4*128,625,bias=True)
        torch.nn.init.xavier_uniform_(self.fc1.weight)
        self.layer4 = torch.nn.Sequential(
            self.fc1,
            torch.nn.ReLU(),
            torch.nn.Dropout(p=1 - self.keep_prob))
        # Fully-Connected Layer :
        # 7 x 7 x 64 -> 10
        self.fc2 = torch.nn.Linear(625,10,bias=True)
        torch.nn.init.xavier_uniform_(self.fc2.weight)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = out.view(out.size(0),-1) # Flatten them for FC
        out = self.layer4(out)
        out = self.fc2(out)
        return out

In [6]:
# instantiate CNN model
model = CNN().to(device)

In [7]:
# Define Cost/loss & Optimizer
criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)

In [8]:
total_batch = len(data_loader)
for epoch in range(training_epochs):
    avg_cost = 0
    
    for X,Y in data_loader:
        # reshape input image into [Batch_size by 784]
        # label is not one-hot encoded
        X = X.to(device)
        Y = Y.to(device)
        
        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis,Y)
        cost.backward()
        optimizer.step()
        
        avg_cost += cost / total_batch
        
    print('Epoch {:d}, cost {:.9f}'.format(epoch+1,avg_cost))
print('Learning finished')

Epoch 1, cost 0.180790991
Epoch 2, cost 0.049534507
Epoch 3, cost 0.036923952
Epoch 4, cost 0.029195374
Epoch 5, cost 0.022981148
Epoch 6, cost 0.019788316
Epoch 7, cost 0.016717318
Epoch 8, cost 0.013697321
Epoch 9, cost 0.013322204
Epoch 10, cost 0.010329810
Epoch 11, cost 0.010820020
Epoch 12, cost 0.009583737
Epoch 13, cost 0.008614470
Epoch 14, cost 0.006479246
Epoch 15, cost 0.006582783
Learning finished


In [9]:
# Test model and check accuracy
with torch.no_grad():
    X_test = mnist_test.data.view(len(mnist_test), 1, 28, 28).float().to(device)
    Y_test = mnist_test.targets.to(device)

    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy : {:7.4f} %'.format(accuracy.item()*100))

Accuracy : 97.3700 %
