In [15]:
from torchvision import datasets
from torch.utils.data import TensorDataset, DataLoader
import torch
import torch.nn as nn
import torch.optim as optim

In [16]:
# !git clone https://github.com/baek2sm/ml.git
# !tar -zxvf ./ml/datasets/MNIST.tar.gz

path ='./'
train_dataset = datasets.MNIST(path, train=True, download=True)
test_dataset = datasets.MNIST(path, train=False, download=True)

In [17]:
X_train, y_train = train_dataset.data / 255, train_dataset.targets
X_test, y_test = test_dataset.data / 255, test_dataset.targets

In [18]:
print('Train input data : ',X_train.shape)
print('Train target : ', y_train.shape)
print('Test input data : ', X_test.shape)
print('Test target : ', y_test.shape)

Train input data :  torch.Size([60000, 28, 28])
Train target :  torch.Size([60000])
Test input data :  torch.Size([10000, 28, 28])
Test target :  torch.Size([10000])


In [19]:
X_train, X_test = X_train.view(-1, 784), X_test.view(-1, 784)
print('Train input data : ',X_train.shape)
print('Test input data : ', X_test.shape)

Train input data :  torch.Size([60000, 784])
Test input data :  torch.Size([10000, 784])


In [20]:
train_dset = TensorDataset(X_train, y_train)
test_dset = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_dset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dset, batch_size=32, shuffle=False)

In [21]:
class DNN(nn.Module):
    def __init__(self, num_features):
        super().__init__()
        self.hidden_layer1 = nn.Sequential(
            nn.Linear(num_features, 256),
            nn.ReLU()
        )
        self.hidden_layer2 = nn.Sequential(
            nn.Linear(256, 128),
            nn.ReLU()
        )
        self.output_layer = nn.Linear(128, 10)

    def forward(self, X):
        out = self.hidden_layer1(X)
        out = self.hidden_layer2(out)
        out = self.output_layer(out)
        return out        

In [22]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('device : ', device)
model = DNN(784).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

device :  cuda


In [23]:
def train(model, criterion, optimizer, loader):
    epoch_loss = 0
    epoch_acc = 0

    for X_batch, y_batch in loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        hypothesis = model(X_batch)
        loss = criterion(hypothesis, y_batch)
        loss.backward()
        optimizer.step()
        y_predicted = torch.argmax(hypothesis, 1)
        acc = (y_predicted == y_batch).float().mean()
        epoch_loss += loss.item()
        epoch_acc += acc.item()
    return epoch_loss / len(loader), epoch_acc /len(loader)

In [24]:
def evaluate(model, criterion, loader):
    epoch_loss = 0
    epoch_acc = 0

    with torch.no_grad():
        for X_batch, y_batch in loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            hypothesis = model(X_batch)
            loss = criterion(hypothesis, y_batch)
            y_predicted = torch.argmax(hypothesis, 1)
            acc = (y_predicted == y_batch).float().mean()
            epoch_loss += loss.item()
            epoch_acc += acc.item()
        return epoch_loss /len(loader), epoch_acc / len(loader)

In [25]:
n_epochs = 20
for epoch in range(1, n_epochs+1):
    loss, acc = train(model, criterion, optimizer, train_loader)

    test_loss, test_acc = evaluate(model, criterion, test_loader)
    print('epoch : {}, loss : {:.3f}, acc : {:.2f}, test_loss : {:.3f}, test_acc : {:.3f}'.format(epoch, loss, acc, test_loss, test_acc))

epoch : 1, loss : 0.551, acc : 0.86, test_loss : 0.277, test_acc : 0.923
epoch : 2, loss : 0.252, acc : 0.93, test_loss : 0.211, test_acc : 0.940
epoch : 3, loss : 0.198, acc : 0.94, test_loss : 0.171, test_acc : 0.950
epoch : 4, loss : 0.163, acc : 0.95, test_loss : 0.152, test_acc : 0.954
epoch : 5, loss : 0.136, acc : 0.96, test_loss : 0.129, test_acc : 0.960
epoch : 6, loss : 0.115, acc : 0.97, test_loss : 0.120, test_acc : 0.962
epoch : 7, loss : 0.100, acc : 0.97, test_loss : 0.103, test_acc : 0.969
epoch : 8, loss : 0.087, acc : 0.97, test_loss : 0.094, test_acc : 0.973
epoch : 9, loss : 0.077, acc : 0.98, test_loss : 0.089, test_acc : 0.974
epoch : 10, loss : 0.068, acc : 0.98, test_loss : 0.082, test_acc : 0.976
epoch : 11, loss : 0.060, acc : 0.98, test_loss : 0.083, test_acc : 0.975
epoch : 12, loss : 0.053, acc : 0.98, test_loss : 0.076, test_acc : 0.977
epoch : 13, loss : 0.048, acc : 0.99, test_loss : 0.073, test_acc : 0.978
epoch : 14, loss : 0.042, acc : 0.99, test_loss