In [None]:
from torchvision import datasets
from torch.utils.data import TensorDataset, DataLoader
import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
!git clone https://github.com/baek2sm/ml.git
!tar -zxvf ./ml/datasets/MNIST.tar.gz

path = './'
train_dataset = datasets.MNIST(path, train=True, download=True)
test_dataset = datasets.MNIST(path, train=False, download=True)

X_train, y_train = train_dataset.data / 255, train_dataset.targets
X_test, y_test = test_dataset.data / 255, test_dataset.targets

Cloning into 'ml'...
remote: Enumerating objects: 252, done.[K
remote: Counting objects: 100% (252/252), done.[K
remote: Compressing objects: 100% (173/173), done.[K
remote: Total 252 (delta 118), reused 194 (delta 72), pack-reused 0[K
Receiving objects: 100% (252/252), 64.48 MiB | 26.85 MiB/s, done.
Resolving deltas: 100% (118/118), done.
MNIST/
MNIST/raw/
MNIST/raw/train-labels-idx1-ubyte
MNIST/raw/t10k-labels-idx1-ubyte.gz
MNIST/raw/t10k-labels-idx1-ubyte
MNIST/raw/t10k-images-idx3-ubyte.gz
MNIST/raw/train-images-idx3-ubyte
MNIST/raw/train-labels-idx1-ubyte.gz
MNIST/raw/t10k-images-idx3-ubyte
MNIST/raw/train-images-idx3-ubyte.gz
MNIST/processed/
MNIST/processed/training.pt
MNIST/processed/test.pt


In [None]:
print('X_train : ',X_train.shape)
print('y_train : ',y_train.shape)
print('X_test : ',X_test.shape)
print('y_test : ',y_test.shape)

X_train :  torch.Size([60000, 28, 28])
y_train :  torch.Size([60000])
X_test :  torch.Size([10000, 28, 28])
y_test :  torch.Size([10000])


In [None]:
X_train, X_test = X_train.unsqueeze(1), X_test.unsqueeze(1)
print('X train shape : ', X_train.shape)
print('X_test shape : ',X_test.shape)

X train shape :  torch.Size([60000, 1, 28, 28])
X_test shape :  torch.Size([10000, 1, 28, 28])


In [None]:
train_dset = TensorDataset(X_train, y_train)
test_dset = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_dset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dset, batch_size=32, shuffle=False)

In [12]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden_layer1 = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=(3,3)),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2,2)),
            nn.Dropout(0.5)
        )
        self.hidden_layer2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=(3,3)),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2,2)),
            nn.Dropout(0.5)
        )
        self.hidden_layer3 = nn.Linear(128*5*5, 128)
        self.output_layer = nn.Linear(128,10)
    def forward(self, X):
        out = self.hidden_layer1(X)
        out = self.hidden_layer2(out)
        out = out.view(out.shape[0], -1)
        out = self.hidden_layer3(out)
        out = self.output_layer(out)
        return out

In [13]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

cpu


In [14]:
def train(model, criterion, optimizer, loader):
    epoch_loss = 0
    epoch_acc = 0

    model.train()

    for X_batch, y_batch in loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        hypothesis = model(X_batch)
        loss = criterion(hypothesis, y_batch)
        loss.backward()
        optimizer.step()
        y_predicted = torch.argmax(hypothesis, 1)
        acc = (y_predicted == y_batch).float().mean()
        epoch_loss += loss.item()
        epoch_acc += acc.item()

    return epoch_loss / len(loader), epoch_acc / len(loader)

In [22]:
def evaluate(model, criterion, loader):
    epoch_loss = 0
    epoch_acc = 0
    model.eval()
    with torch.no_grad():
        for X_batch, y_batch in loader:
            X_batch, y_vatch = X_batch.to(device), y_batch.to(device)
            hypothesis = model(X_batch)
            loss = criterion(hypothesis, y_batch)
            y_predicted = torch.argmax(hypothesis, 1)
            acc = (y_predicted == y_batch).float().mean()
            epoch_loss += loss.item()
            epoch_acc += acc.item()
        return epoch_loss / len(loader), epoch_acc /len(loader)

In [None]:
n_epochs = 20
for epoch in range(1, n_epochs+1):
    loss, acc = train(model, criterion, optimizer, train_loader)
    test_loss, test_acc = evaluate(model, criterion, test_loader)

    print('epoch {} - loss {:.3f} - acc{:.2f} - test_loss {:.3f} - test_acc {:.3f}'.format(epoch, loss, acc, test_loss, test_acc))

epoch 1 - loss 0.113 - acc0.97 - test_loss 0.061 - test_acc 0.982
epoch 2 - loss 0.095 - acc0.97 - test_loss 0.052 - test_acc 0.984
epoch 3 - loss 0.083 - acc0.97 - test_loss 0.046 - test_acc 0.985
epoch 4 - loss 0.075 - acc0.98 - test_loss 0.040 - test_acc 0.987
epoch 5 - loss 0.069 - acc0.98 - test_loss 0.036 - test_acc 0.988
epoch 6 - loss 0.066 - acc0.98 - test_loss 0.036 - test_acc 0.988
epoch 7 - loss 0.060 - acc0.98 - test_loss 0.035 - test_acc 0.988
epoch 8 - loss 0.057 - acc0.98 - test_loss 0.034 - test_acc 0.989
epoch 9 - loss 0.054 - acc0.98 - test_loss 0.033 - test_acc 0.990
epoch 10 - loss 0.053 - acc0.98 - test_loss 0.030 - test_acc 0.989
epoch 11 - loss 0.049 - acc0.99 - test_loss 0.028 - test_acc 0.990
epoch 12 - loss 0.047 - acc0.99 - test_loss 0.028 - test_acc 0.990
epoch 13 - loss 0.044 - acc0.99 - test_loss 0.029 - test_acc 0.990
epoch 14 - loss 0.043 - acc0.99 - test_loss 0.027 - test_acc 0.991
epoch 15 - loss 0.041 - acc0.99 - test_loss 0.027 - test_acc 0.991
epoc