In [1]:
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms

import torch.nn.init

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
torch.manual_seed(777)
if device == "cuda":
  torch.cuda.manual_seed_all(777)

In [3]:
device

'cpu'

In [4]:
learning_rate = 0.001
train_epochs = 15
batch_size = 128

In [5]:
#MNIST DATASET
mnist_train = dsets.MNIST(root="",
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)

mnist_test = dsets.MNIST(root="",
                          train=False,
                          transform=transforms.ToTensor(),
                          download=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting MNIST/raw/train-images-idx3-ubyte.gz to MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting MNIST/raw/train-labels-idx1-ubyte.gz to MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting MNIST/raw/t10k-images-idx3-ubyte.gz to MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting MNIST/raw/t10k-labels-idx1-ubyte.gz to MNIST/raw



In [6]:
train_dataloader = torch.utils.data.DataLoader(dataset=mnist_train,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               drop_last=True)

In [7]:
class CNN(nn.Module):
  def __init__(self):
    super(CNN, self).__init__()
    self.layer1 = nn.Sequential(
        nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1), # (28 - 3 + 2) / 1 + 1 == 28
        nn.ReLU(),
        nn.MaxPool2d(2),  # 14
    )

    self.layer2 = nn.Sequential(
        nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), # (14 - 3 + 2) / 1 + 1 == 14
        nn.ReLU(),
        nn.MaxPool2d(2),  # 7
    )
    
    self.layer3 = nn.Sequential(
        nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), # (7 - 3 + 2) / 1 + 1 == 7
        nn.ReLU(),
        nn.MaxPool2d(2),  # 3
    )
    
    self.fc1 = nn.Linear(3 * 3 * 128, 625, bias= True)
    self.activate1 = nn.ReLU()
    self.fc2 = nn.Linear(625, 10, bias= True)
    torch.nn.init.xavier_uniform_(self.fc1.weight)
    torch.nn.init.xavier_uniform_(self.fc2.weight)
  
  def forward(self, x):
    out = self.layer1(x)
    out = self.layer2(out)
    out = self.layer3(out)
    out = out.view(out.size()[0], -1)
    out = self.activate1(self.fc1(out))
    out = self.fc2(out)
    return out

In [8]:
model = CNN().to(device)

In [9]:
loss_fn = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr= learning_rate)

In [16]:
for i in model.modules():
  print("====")
  print(type(i))

====
<class '__main__.CNN'>
====
<class 'torch.nn.modules.container.Sequential'>
====
<class 'torch.nn.modules.conv.Conv2d'>
====
<class 'torch.nn.modules.activation.ReLU'>
====
<class 'torch.nn.modules.pooling.MaxPool2d'>
====
<class 'torch.nn.modules.container.Sequential'>
====
<class 'torch.nn.modules.conv.Conv2d'>
====
<class 'torch.nn.modules.activation.ReLU'>
====
<class 'torch.nn.modules.pooling.MaxPool2d'>
====
<class 'torch.nn.modules.container.Sequential'>
====
<class 'torch.nn.modules.conv.Conv2d'>
====
<class 'torch.nn.modules.activation.ReLU'>
====
<class 'torch.nn.modules.pooling.MaxPool2d'>
====
<class 'torch.nn.modules.linear.Linear'>
====
<class 'torch.nn.modules.activation.ReLU'>
====
<class 'torch.nn.modules.linear.Linear'>


In [42]:
# Training
total_batch = len(train_dataloader)
for epoch in range(train_epochs):
  avg_cost = 0.
  for X, y in train_dataloader:
    X = X.to(device)
    y = y.to(device)
    out = model(X)
    cost = loss_fn(out, y)
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    avg_cost += cost / total_batch
  print(f"Epoch:{epoch+1} - Cost:{avg_cost}")
print("Learning Finished")

RuntimeError: ignored

In [None]:
with torch.no_grad():
  X_test = mnist_test.data.view(len(mnist_test), 1, 28, 28).float().to(device)
  y_test = mnist_test.targets.to(device)

  pred = model(X_test)
  correct_pred = (torch.argmax(pred, 1) == y_test)
  accuracy = correct_pred.float().mean()
  print("Accuracy: ", accuracy.item())