## Import Required Libraries

In [6]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn.init
import torch.nn as nn
import torch.optim as optim

## GPU 사용 가능 여부 확인 및 Random Seed 설정

In [7]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

#Random Seed 설정
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

cuda


In [8]:
# parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 50

### torchvision.datasets 의 Parameter
- root : 데이터셋을 어느 경로에 받을 것인가?
- train : Train dataset을 다운 받을 것인지? Test dataset을 다운 받을 것인지?
- transform : 일반 이미지 (H,W,C) (픽셀값 : 0 ~ 255) --> Torch에서 쓸 수 있는 이미지 (C,H,W) (픽셀값 : 0 ~ 1)
- download : 만약 없을시 다운로드를 할 것인가?

In [9]:
mnist_train = dsets.MNIST(root = 'MNIST_data/',
                          train = True,
                          transform = transforms.ToTensor(),
                          download = True)

mnist_test = dsets.MNIST(root = 'MNIST_data/',
                          train = False,
                          transform = transforms.ToTensor(),
                          download = True)

### torch.utils.data.DataLoader 의 Parameter
- dataset : 어떤 데이터셋을 읽어올 것인지?
- batch_size : batch_size를 몇으로 할 것인지?
- shuffle : dataset을 섞을 것인지?
- drop_last : 만약 batch_size만큼 데이터를 가져왔을 때, 남은 데이터는 어떻게 처리할 것인지?

In [10]:
# dataset loader
data_loader = torch.utils.data.DataLoader(dataset = mnist_train,
                                          batch_size = batch_size,
                                          shuffle = True,
                                          drop_last = True)

## CNN Model (2 con layers)

In [6]:
class CNN(torch.nn.Module):
    def __init__(self):
        super(CNN,self).__init__()   # nn.Module에서 상속을 받기 위해서 있어야 함.
        self.layer1 = nn.Sequential(
                      nn.Conv2d(1,32,kernel_size=3, stride = 1, padding =1),
                      nn.ReLU(),
                      nn.MaxPool2d(2))

        self.layer2 = nn.Sequential(
                      nn.Conv2d(32,64,kernel_size=3, stride = 1, padding = 1),
                      nn.ReLU(),
                      nn.MaxPool2d(2))

        self.fc = nn.Linear(7*7*64, 10, bias = True)

        # Initialization
        torch.nn.init.xavier_uniform_(self.fc.weight)

    def forward(self,x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0),-1)  #Flatten (batch_size, -1)
        out = self.fc(out)

        return out
    
    




In [7]:
# Model
model = CNN().to(device)

In [8]:
# Loss function
criterion = nn.CrossEntropyLoss().to(device)

# Optimizer
optimizer = optim.Adam(model.parameters(), lr = learning_rate)

## Training

In [9]:
total_batch = len(data_loader)
print('Learning Started')

for epoch in range(training_epochs):
    avg_cost = 0
    
    for X, Y in data_loader:
        X = X.to(device)   # GPU로 연산을 진행하기 위해서
        Y = Y.to(device)
        
        # 예측 값
        hypothesis = model(X)
        
        # Loss 계산
        cost = criterion(hypothesis,Y)
        
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        
        avg_cost += cost / total_batch
    
    print('[epoch: {}] cost = {}'.format(epoch+1,avg_cost))

print('Learning finished!')

Learning Started
[epoch: 1] cost = 0.17086203396320343
[epoch: 2] cost = 0.05398032069206238
[epoch: 3] cost = 0.03933505713939667
[epoch: 4] cost = 0.030365170910954475
[epoch: 5] cost = 0.02387661300599575
[epoch: 6] cost = 0.01979607157409191
[epoch: 7] cost = 0.015497392974793911
[epoch: 8] cost = 0.01374112069606781
[epoch: 9] cost = 0.010761722922325134
[epoch: 10] cost = 0.008580321446061134
[epoch: 11] cost = 0.008147109299898148
[epoch: 12] cost = 0.0075375731103122234
[epoch: 13] cost = 0.006176360882818699
[epoch: 14] cost = 0.004374283831566572
[epoch: 15] cost = 0.0048863887786865234
Learning finished!


## Test

In [10]:
with torch.no_grad():
    X_test = mnist_test.data.view(len(mnist_test),1,28,28).float().to(device)
    Y_test = mnist_test.targets.to(device)
    
    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction,1) == Y_test
    accuracy = correct_prediction.float().mean()
    
    print('Accuracy : ', accuracy.item())

Accuracy :  0.9837999939918518


## Use another Model

In [11]:
class CNN(torch.nn.Module):
    def __init__(self):
        super(CNN,self).__init__()   # nn.Module에서 상속을 받기 위해서 있어야 함.
        self.layer1 = nn.Sequential(
                      nn.Conv2d(1,32,kernel_size=3, stride = 1, padding =1),
                      nn.ReLU(),
                      nn.MaxPool2d(2))

        self.layer2 = nn.Sequential(
                      nn.Conv2d(32,64,kernel_size=3, stride = 1, padding = 1),
                      nn.ReLU(),
                      nn.MaxPool2d(2))
        
        self.layer3 = nn.Sequential(
                      nn.Conv2d(64,128, kernel_size = 3, stride = 1, padding = 1),
                      nn.ReLU(),
                      nn.MaxPool2d(2))

        self.fc1 = nn.Linear(3*3*128, 625)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(625, 10, bias = True)
        

        # Initialization
        torch.nn.init.xavier_uniform_(self.fc1.weight)
        torch.nn.init.xavier_uniform_(self.fc2.weight)

    def forward(self,x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        
        out = out.view(out.size(0),-1)  #Flatten (batch_size, -1)
        out = self.fc1(out)
        out = self.relu(out)
        out = self.fc2(out)

        return out
    
    




In [12]:
# Model
model = CNN().to(device)

In [13]:
# Loss function
criterion = nn.CrossEntropyLoss().to(device)

# Optimizer
optimizer = optim.Adam(model.parameters(), lr = learning_rate)

## Training

In [14]:
total_batch = len(data_loader)
print('Learning Started')

for epoch in range(training_epochs):
    avg_cost = 0
    
    for X, Y in data_loader:
        X = X.to(device)   # GPU로 연산을 진행하기 위해서
        Y = Y.to(device)
        
        # 예측 값
        hypothesis = model(X)
        
        # Loss 계산
        cost = criterion(hypothesis,Y)
        
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        
        avg_cost += cost / total_batch
    
    print('[epoch: {}] cost = {}'.format(epoch+1,avg_cost))

print('Learning finished!')

Learning Started
[epoch: 1] cost = 0.13422012329101562
[epoch: 2] cost = 0.040615614503622055
[epoch: 3] cost = 0.029511310160160065
[epoch: 4] cost = 0.021188275888562202
[epoch: 5] cost = 0.018298352137207985
[epoch: 6] cost = 0.013908224180340767
[epoch: 7] cost = 0.013179261237382889
[epoch: 8] cost = 0.010114748030900955
[epoch: 9] cost = 0.00945969671010971
[epoch: 10] cost = 0.008538463152945042
[epoch: 11] cost = 0.0077784089371562
[epoch: 12] cost = 0.006555082276463509
[epoch: 13] cost = 0.0072351377457380295
[epoch: 14] cost = 0.0056805056519806385
[epoch: 15] cost = 0.004413490649312735
Learning finished!


## Test

In [15]:
torch.cuda.empty_cache()

with torch.no_grad():
    X_test = mnist_test.data.view(len(mnist_test),1,28,28).float().to(device)
    Y_test = mnist_test.targets.to(device)
    
    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction,1) == Y_test
    accuracy = correct_prediction.float().mean()
    
    print('Accuracy : ', accuracy.item())

Accuracy :  0.9592999815940857


## 결론 : Layer를 깊이 쌓는다고 무조건 좋은 것은 아니다!

두 CNN 모델을 연속으로 돌리면 out of memory라는 구문이 나오는데 아직 해결하지 못했다..