In [1]:
import torch
import torchvision
import matplotlib.pylab as plt

In [2]:
# Select Target Device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# for reproducibility
torch.manual_seed(1)
if device == 'cuda':
    torch.cuda.manual_seed_all(1)

In [3]:
# Define parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100

In [4]:
# MNIST dataset
mnist_train = torchvision.datasets.MNIST(root='../MNIST_data/',
                          train=True,
                          transform=torchvision.transforms.ToTensor(),
                          download=True)

mnist_test = torchvision.datasets.MNIST(root='../MNIST_data/',
                         train=False,
                         transform=torchvision.transforms.ToTensor(),
                         download=True)
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

In [5]:
class CNN(torch.nn.Module):
    def __init__(self):
        super(CNN, self).__init__() 
        # Layer 1 : 
        # In  : (28, 28, 1)
        # Conv: (28, 28, 32) - Filter No.32
        # Pool: (14, 14, 32)
        self.layer1 = torch.nn.Sequential(
            torch.nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2))
        
        # Layer 2 : 
        # In  : (14, 14, 32)
        # Conv: (14, 14, 64) - Filter No.32
        # Pool: ( 7,  7, 64)
        self.layer2 = torch.nn.Sequential(
            torch.nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2))
        
        # Fully-Connected Layer :
        # 7 x 7 x 64 -> 10
        self.fc = torch.nn.Linear(7*7*64,10,bias=True)
        torch.nn.init.xavier_uniform_(self.fc.weight)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0),-1) # Flatten them for FC
        out = self.fc(out)
        return out

In [6]:
# instantiate CNN model
model = CNN().to(device)

In [7]:
# Define Cost/loss & Optimizer
criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)

In [8]:
total_batch = len(data_loader)
for epoch in range(training_epochs):
    avg_cost = 0
    
    for X,Y in data_loader:
        # reshape input image into [Batch_size by 784]
        # label is not one-hot encoded
        X = X.to(device)
        Y = Y.to(device)
        
        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis,Y)
        cost.backward()
        optimizer.step()
        
        avg_cost += cost / total_batch
        
    print('Epoch {:d}, cost {:.9f}'.format(epoch+1,avg_cost))
print('Learning finished')

Epoch 1, cost 0.219514459
Epoch 2, cost 0.059616797
Epoch 3, cost 0.043746632
Epoch 4, cost 0.035276860
Epoch 5, cost 0.029777972
Epoch 6, cost 0.024430074
Epoch 7, cost 0.020840559
Epoch 8, cost 0.017934659
Epoch 9, cost 0.014660881
Epoch 10, cost 0.012839266
Epoch 11, cost 0.011353732
Epoch 12, cost 0.009593834
Epoch 13, cost 0.008685712
Epoch 14, cost 0.006278273
Epoch 15, cost 0.006584868
Learning finished


In [10]:
# Test model and check accuracy
with torch.no_grad():
    X_test = mnist_test.data.view(len(mnist_test), 1, 28, 28).float().to(device)
    Y_test = mnist_test.targets.to(device)

    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())

RuntimeError: CUDA out of memory. Tried to allocate 958.00 MiB (GPU 0; 5.94 GiB total capacity; 1019.15 MiB already allocated; 515.81 MiB free; 1020.00 MiB reserved in total by PyTorch)