In [1]:
import torch
import torch.nn as nn
import torchvision.datasets as dataset
import torchvision.transforms as transform
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
import torch.nn.functional as F
import torch.optim as optim

In [2]:
import os
import random
import numpy as np

In [3]:
#reproducibility
SEED = 100

os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

<torch._C.Generator at 0x25d410e3b50>

In [4]:
#gpu acceleration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [5]:
#data load from torchvision

total_train = dataset.MNIST(root='./data', train=True, transform=transform.ToTensor(), download=True)
test = dataset.MNIST(root='./data', train=False, transform=transform.ToTensor(), download=True)

In [6]:
print(len(total_train))

60000


In [7]:
#get val_data and make dataloader

indices = list(range(len(total_train)))
np.random.shuffle(indices)

In [8]:
split = int(np.floor(0.9 * len(total_train)))
print(split)

54000


In [9]:
tr_idx, val_idx = indices[:split], indices[split:]

In [10]:
tr_sampler = SubsetRandomSampler(tr_idx)
val_sampler = SubsetRandomSampler(val_idx)

In [11]:
train_loader = DataLoader(
    dataset=total_train,
    batch_size=50,
    num_workers=0,
    sampler=tr_sampler
)

val_loader = DataLoader(
    dataset=total_train,
    batch_size=50,
    num_workers=0,
    sampler=val_sampler
)

In [12]:
class CNN_Net(nn.Module):
    def __init__(self):
        super().__init__()    
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.func = nn.Linear(32*7*7, 10, bias=True)
        
        torch.nn.init.xavier_uniform_(self.func.weight)
        
    def forward(self, x):
        out = x.view(-1, 1, 28, 28)
        out = self.pool(F.relu(self.conv1(out)))
        out = self.pool(F.relu(self.conv2(out)))
        out = out.view(out.size(0), -1)
        out = self.func(out)
        return out

In [13]:
model = CNN_Net()
model = model.to(device)

In [14]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)

In [15]:
epochs = 50

for epoch in range(1, epochs + 1):
    
    train_loss = 0
    train_total = 0
    train_correct = 0
    model.train()
    for X_data, Y_data in train_loader:

        X_data, Y_data = X_data.to(device), Y_data.to(device)
        
        hypothesis = model(X_data)
        loss = criterion(hypothesis, Y_data)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        
        _, predicted = hypothesis.max(1)
        train_total += Y_data.size(0)
        train_correct += predicted.eq(Y_data).sum().item()
    train_acc = train_correct / train_total
    
    valid_loss = 0
    valid_total = 0
    valid_correct = 0
    model.eval()
    with torch.no_grad():
        for X_data, Y_data in val_loader:
            X_data, Y_data = X_data.to(device), Y_data.to(device)
            
            outputs = model(X_data)
            loss = criterion(outputs, Y_data)
            
            valid_loss += loss.item()
            
            _, predicted = outputs.max(1)
            valid_total += Y_data.size(0)
            valid_correct += predicted.eq(Y_data).sum().item()
        valid_acc = valid_correct / valid_total

        if(epoch % 10 == 0):
            print('[%d/%d] TrainLoss: %.3f, ValLoss: %.3f | TrainAcc: %.2f, ValAcc: %.2f'
                 %(epoch, epochs, train_loss, valid_loss, train_acc, valid_acc))

[10/50] TrainLoss: 316.276, ValLoss: 34.631 | TrainAcc: 0.91, ValAcc: 0.92
[20/50] TrainLoss: 221.371, ValLoss: 24.130 | TrainAcc: 0.94, ValAcc: 0.94
[30/50] TrainLoss: 162.031, ValLoss: 17.884 | TrainAcc: 0.96, ValAcc: 0.95
[40/50] TrainLoss: 126.562, ValLoss: 14.032 | TrainAcc: 0.97, ValAcc: 0.96
[50/50] TrainLoss: 105.392, ValLoss: 12.207 | TrainAcc: 0.97, ValAcc: 0.97


In [16]:
torch.save(model.state_dict(), './model.pt')

In [17]:
print('state_dict format of the model: {}'.format(model.state_dict()))

state_dict format of the model: OrderedDict([('conv1.weight', tensor([[[[-0.2524,  0.2202, -0.1468],
          [-0.0045,  0.1271,  0.1785],
          [-0.1578, -0.3043, -0.1968]]],


        [[[-0.2185, -0.1694,  0.2604],
          [ 0.3808,  0.4730,  0.1943],
          [ 0.5519,  0.6511,  0.4521]]],


        [[[-0.1628, -0.1453, -0.2197],
          [ 0.3646,  0.2225,  0.5201],
          [ 0.1044,  0.5164,  0.3895]]],


        [[[-0.0100, -0.1672,  0.1519],
          [ 0.0111, -0.1695,  0.2060],
          [-0.0561, -0.3220, -0.1845]]],


        [[[ 0.4254,  0.2188, -0.2805],
          [ 0.4039,  0.0392, -0.3780],
          [ 0.2688, -0.1000, -0.1838]]],


        [[[ 0.0196, -0.3034,  0.0076],
          [ 0.2716,  0.0650, -0.0377],
          [ 0.1532, -0.0269, -0.1068]]],


        [[[ 0.1687,  0.1476,  0.2326],
          [ 0.0273,  0.4319,  0.3808],
          [-0.0719,  0.2033,  0.1573]]],


        [[[ 0.6225,  0.6750,  0.4713],
          [ 0.5809,  0.5055,  0.1824],
          [ 0

In [None]:
#test model

In [31]:
new_model = CNN_Net()
new_model.load_state_dict(torch.load('./2017313135_권동민.pt'))
new_model = new_model.to(device)

In [34]:
with torch.no_grad():
    X_data = test.test_data.view(len(test), 1, 28, 28).float().to(device)
    Y_data = test.test_labels.to(device)

    prediction = new_model(X_data)
    _, predicted = prediction.max(1)
    correct = predicted.eq(Y_data).sum().item()
    test_acc = correct / Y_data.size(0)
    print('TestAcc: %.2f' % test_acc)

TestAcc: 0.97
