In [36]:
from torchvision import datasets
from torch.utils.data import TensorDataset, DataLoader
import torch
import torch.nn as nn
import torch.optim as optim

In [37]:
# !git clone https://github.com/baek2sm/ml.git
# !tar -zxvf ./ml/datasets/MNIST.tar.gz

path='./'
train_dataset = datasets.MNIST(path, train=True, download=True)
test_dataset = datasets.MNIST(path, train=False, download=True)

X_train, y_train = train_dataset.data/255, train_dataset.targets
X_test, y_test = test_dataset.data/255, test_dataset.targets

fatal: destination path 'ml' already exists and is not an empty directory.
MNIST/
MNIST/raw/
MNIST/raw/train-labels-idx1-ubyte
MNIST/raw/t10k-labels-idx1-ubyte.gz
MNIST/raw/t10k-labels-idx1-ubyte
MNIST/raw/t10k-images-idx3-ubyte.gz
MNIST/raw/train-images-idx3-ubyte
MNIST/raw/train-labels-idx1-ubyte.gz
MNIST/raw/t10k-images-idx3-ubyte
MNIST/raw/train-images-idx3-ubyte.gz
MNIST/processed/
MNIST/processed/training.pt
MNIST/processed/test.pt


In [38]:
print('train input data : ',X_train.shape)
print('train target : ',y_train.shape)
print('test input data : ',X_test.shape)
print('test target : ',y_test.shape)

train input data :  torch.Size([60000, 28, 28])
train target :  torch.Size([60000])
test input data :  torch.Size([10000, 28, 28])
test target :  torch.Size([10000])


In [39]:
X_train, X_test = X_train.unsqueeze(1),X_test.unsqueeze(1)

print('train input data : ',X_train.shape)
print('test input data : ',X_test.shape)


train input data :  torch.Size([60000, 1, 28, 28])
test input data :  torch.Size([10000, 1, 28, 28])


In [40]:
train_dset = TensorDataset(X_train, y_train)
test_dset = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_dset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dset, batch_size=32, shuffle=False)

In [41]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()

        self.hidden_layer1 = nn.Sequential(
            nn.Conv2d(1,64, kernel_size=(3,3)),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2,2)),
            nn.Dropout(0.5)
        )

        self.hidden_layer2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=(3,3)),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2,2)),
            nn.Dropout(0.5)
        )

        self.hidden_layer3 = nn.Linear(128*5*5, 128)
        self.output_layer = nn.Linear(128, 10)

    def forward(self, X):
      out = self.hidden_layer1(X)
      out = self.hidden_layer2(out)
      out = out.view(out.shape[0], -1)
      out = self.hidden_layer3(out)
      out = self.output_layer(out)
      return out

In [42]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

print('device : {}'.format(device))

device : cuda


In [43]:
def train(model, criterion, optimizer, loader):
    epoch_loss = 0
    epoch_acc = 0
    model.train()
    
    for X_batch, y_batch in loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        hypothesis = model(X_batch)
        loss = criterion(hypothesis, y_batch)
        loss.backward()
        optimizer.step()
        y_predicted = torch.argmax(hypothesis,1)

        acc = (y_predicted == y_batch).float().mean()
        epoch_loss += loss.item()
        epoch_acc += acc.item()

    return epoch_loss / len(loader), epoch_acc / len(loader)

In [44]:
def evaluate(model, criterion, loader):
  epoch_loss = 0
  epoch_acc = 0

  model.eval()

  with torch.no_grad():
    for X_batch, y_batch in loader:
      X_batch, y_batch = X_batch.to(device), y_batch.to(device)
      hypothesis = model(X_batch)
      loss = criterion(hypothesis, y_batch)
      y_predicted = torch.argmax(hypothesis,1)
      acc = (y_predicted == y_batch).float().mean()

      epoch_loss += loss.item()
      epoch_acc += acc.item()

    return epoch_loss / len(loader), epoch_acc / len(loader)


In [45]:
n_epochs = 20
for epoch in range(1, n_epochs+1):
    loss, acc = train(model, criterion, optimizer, train_loader)
    test_loss, test_acc = evaluate(model, criterion, test_loader)
    print("epoch:{}, loss:{:.3f}, acc:{:.2f}, test_loss:{:.3f}, test_acc:{:.3f}".format(epoch, loss, acc, test_loss, test_acc))

epoch:1, loss:0.423, acc:0.87, test_loss:0.129, test_acc:0.965
epoch:2, loss:0.149, acc:0.95, test_loss:0.084, test_acc:0.975
epoch:3, loss:0.113, acc:0.97, test_loss:0.063, test_acc:0.980
epoch:4, loss:0.095, acc:0.97, test_loss:0.051, test_acc:0.983
epoch:5, loss:0.085, acc:0.97, test_loss:0.047, test_acc:0.986
epoch:6, loss:0.076, acc:0.98, test_loss:0.042, test_acc:0.986
epoch:7, loss:0.070, acc:0.98, test_loss:0.045, test_acc:0.986
epoch:8, loss:0.065, acc:0.98, test_loss:0.037, test_acc:0.987
epoch:9, loss:0.061, acc:0.98, test_loss:0.035, test_acc:0.988
epoch:10, loss:0.060, acc:0.98, test_loss:0.035, test_acc:0.988
epoch:11, loss:0.056, acc:0.98, test_loss:0.033, test_acc:0.989
epoch:12, loss:0.053, acc:0.98, test_loss:0.030, test_acc:0.989
epoch:13, loss:0.049, acc:0.98, test_loss:0.028, test_acc:0.991
epoch:14, loss:0.048, acc:0.98, test_loss:0.027, test_acc:0.990
epoch:15, loss:0.046, acc:0.99, test_loss:0.026, test_acc:0.990
epoch:16, loss:0.045, acc:0.99, test_loss:0.025, 