In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn.functional as F
import numpy as np

device = torch.device("cuda:0" if torch.cuda.is_available() else 'cpu')

In [2]:
#Hyperparams
num_epochs = 15
num_classes = 10
batch_size = 128
learning_rate = 0.001

In [3]:
#Load MNIST data set
train_dataset = torchvision.datasets.MNIST(root='../../data/',
                                           train = True,
                                           transform = transforms.ToTensor(),
                                           download=True)
test_dataset = torchvision.datasets.MNIST(root='../../data/',
                                          train=False,
                                          transform = transforms.ToTensor())

#Data Loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size = batch_size,
                                           shuffle = True)

test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                          batch_size = batch_size,
                                          shuffle = False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../../data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ../../data/MNIST/raw/train-images-idx3-ubyte.gz to ../../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../../data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ../../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../../data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ../../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ../../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../../data/MNIST/raw



In [4]:
type(train_loader)

torch.utils.data.dataloader.DataLoader

In [10]:
print(train_loader.dataset)

Dataset MNIST
    Number of datapoints: 60000
    Root location: ../../data/
    Split: Train
    StandardTransform
Transform: ToTensor()


In [11]:
class CNN(nn.Module):
  def __init__(self):
    super(CNN, self).__init__()
    self.conv1 = nn.Conv2d(1, 32, 3, 1)
    self.conv2 = nn.Conv2d(32, 64, 3, 1)
    self.dense1 = nn.Linear(12 * 12 * 64, 128)
    self.dense2 = nn.Linear(128, num_classes)

  def forward(self, x):
    x = F.relu(self.conv1(x))
    x = F.relu(self.conv2(x))
    x = F.max_pool2d(x, 2, 2)
    x = F.dropout(x, 0.25)
    x = x.view(-1, 12 * 12 * 64) # flatten으로 생각하면 좋다
    x = F.relu(self.dense1(x))
    x = F.dropout(x, 0.5)
    x = self.dense2(x)
    return F.log_softmax(x, dim=1)

In [13]:
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

total_step = len(train_loader)

for epoch in range(num_epochs):
  for i, (images, labels) in enumerate(train_loader):
    images = images.to(device) # GPU 옵션을 장착하는 개념이라 생각하면 될까?
    labels = labels.to(device)

    #Forward Pass
    outputs = model(images)
    loss = criterion(outputs, labels)

    #Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (i+1) % 100 == 0:
      print("Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}".format(epoch + 1, num_epochs, i+1, total_step, loss.item()))


Epoch [1/15], Step [100/469], Loss: 0.3043
Epoch [1/15], Step [200/469], Loss: 0.3453
Epoch [1/15], Step [300/469], Loss: 0.0676
Epoch [1/15], Step [400/469], Loss: 0.1857
Epoch [2/15], Step [100/469], Loss: 0.0716
Epoch [2/15], Step [200/469], Loss: 0.0644
Epoch [2/15], Step [300/469], Loss: 0.2204
Epoch [2/15], Step [400/469], Loss: 0.1995
Epoch [3/15], Step [100/469], Loss: 0.0704
Epoch [3/15], Step [200/469], Loss: 0.0346
Epoch [3/15], Step [300/469], Loss: 0.1402
Epoch [3/15], Step [400/469], Loss: 0.1148
Epoch [4/15], Step [100/469], Loss: 0.0360
Epoch [4/15], Step [200/469], Loss: 0.0200
Epoch [4/15], Step [300/469], Loss: 0.0537
Epoch [4/15], Step [400/469], Loss: 0.0544
Epoch [5/15], Step [100/469], Loss: 0.1029
Epoch [5/15], Step [200/469], Loss: 0.0659
Epoch [5/15], Step [300/469], Loss: 0.0810
Epoch [5/15], Step [400/469], Loss: 0.0449
Epoch [6/15], Step [100/469], Loss: 0.0638
Epoch [6/15], Step [200/469], Loss: 0.0823
Epoch [6/15], Step [300/469], Loss: 0.1115
Epoch [6/15

In [14]:
from sklearn.metrics import roc_auc_score

preds = []
y_true = []
#Test the model
model.eval() # Set model to evaluation mode.

with torch.no_grad():
  correct = 0
  total = 0
  for images, labels in test_loader:
    images = images.to(device)
    labels = labels.to(device)
    outputs = model(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()
    detached_pred = predicted.detach().cpu().numpy()
    detached_label = labels.detach().cpu().numpy()
    for f in range(0, len(detached_pred)):
      preds.append(detached_pred[f])
      y_true.append(detached_label[f])
    
  print("Test Accuracy of the model on the 10000 test images: {:.2%}".format(correct/total))
  preds = np.eye(num_classes)[preds]
  y_true = np.eye(num_classes)[y_true]
  auc = roc_auc_score(preds, y_true)
  print("AUC: {:.2%}".format(auc))

#save the model checkpoint
torch.save(model.state_dict(), 'pytorch_mnist_cnn.ckpt')

Test Accuracy of the model on the 10000 test images: 98.52%
AUC: 99.18%
