# Hyperparameter 

- 여러 CNN 구조
- Dropout
- BatchNormalization
- Optimization
- Loss Function
- Learning rate
- Data augmentation
- epoch

In [1]:
import torch
import torch.nn as nn
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cpu


### Data Augmentation

Data를 load할 때 다양한 augmentation을 적용

In [3]:
transform = transforms.Compose([
    transforms.RandomResizedCrop(32, scale=(0.2, 1.0)), 
    # (0.2, 1.0) : lower and upper bounds for the random area of the crop
    transforms.RandomHorizontalFlip(),
    transforms.RandomGrayscale(p=0.2),
    # p=0.2의 확률로 gray scale로 변환
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    # 각각 R,G,B 3가지 channel에 대해 normalize
])

### Batch Size

Batch Size : 128

In [4]:
train_data = datasets.CIFAR10(
    root="../data",
    train=True,
    download=True,
    transform=transform, # transform.ToTensor()를 하지 않아도 됨
)

test_data = datasets.CIFAR10(
    root="../data",
    train=False,
    download=True,
    transform=transform,
)

# Data loader
train_loader = DataLoader(train_data, batch_size=128, shuffle=True)
test_loader = DataLoader(test_data, batch_size=128, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


Batch Size : 256

In [5]:
train_data = datasets.CIFAR10(
    root="../data",
    train=True,
    download=True,
    transform=transform,
)

test_data = datasets.CIFAR10(
    root="../data",
    train=False,
    download=True,
    transform=transform,
)

# Data loader
train_loader = DataLoader(train_data, batch_size=256, shuffle=True)
test_loader = DataLoader(test_data, batch_size=256, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


### 다양한 모델 구조

layer 수, kernel size, stride, padding

* 일반적으로 사용하는 Hyperparameter 구조

- Layer 5개, kernel size 3, stride 1, padding 1
- Channel 3 - 32 - 64 - 128 - 256
- 일반적인 구조로 주로 사용

In [6]:
class CNN_Model(nn.Module):
    def __init__(self, num_classes):
        super(CNN_Model, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.maxpool4 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.mlp = nn.Linear(256*2*2, num_classes)  # batch_size=256
        
    def forward(self, x):
        out = self.conv1(x)
        out = self.relu(out)
        out = self.maxpool1(out)
#         print('out 1:',out.shape) # [128, 32, 16, 16]
        
        out = self.conv2(out)
        out = self.relu(out)
        out = self.maxpool2(out)
#         print('out 2:',out.shape) # [128, 64, 8, 8]
        
        out = self.conv3(out)
        out = self.relu(out)
        out = self.maxpool3(out)
#         print('out 3:',out.shape) # [128, 128, 4, 4]
        
        out = self.conv4(out)
        out = self.relu(out)
        out = self.maxpool4(out)
#         print('out 4:',out.shape) # [128, 256, 2, 2]
        
        out = out.view(out.size(0), -1) # batchsize, 
        out = self.mlp(out) # 마지막 classification을 위해 필요
        
        return out

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CNN_Model(num_classes=10).to(device)

- Layer 3개, kernel size, stride, padding 모두 다르게
- Channel 3 - 16 - 64 - 128
- 원하는대로 바꿀 수 있다!

In [7]:
class CNN_Model(nn.Module):
    def __init__(self, num_classes):
        super(CNN_Model, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=7, stride=2, padding=1)
        self.relu = nn.ReLU()
        
        self.conv2 = nn.Conv2d(16, 64, kernel_size=5, stride=2)
        self.relu = nn.ReLU()
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.mlp = nn.Linear(128*1*1, num_classes)  # 마지막에 맞게 계산해야 함  => 편하게 하려면 AdaptiveAvgPool2d(1,1) 사용
        
    def forward(self, x):
        out = self.conv1(x) 
        out = self.relu(out)
#         print('out 1:',out.shape) # [128, 32, 14, 14]
        
        out = self.conv2(out) 
        out = self.relu(out)
        out = self.maxpool2(out)
#         print('out 2:',out.shape) # [128, 64, 2, 2]
        
        out = self.conv3(out)
        out = self.relu(out)
        out = self.maxpool3(out)
#         print('out 3:',out.shape) # [128, 128, 1, 1]
        
        out = out.view(out.size(0), -1)
        out = self.mlp(out) # 마지막 classification을 위해 필요
        
        return out

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CNN_Model(num_classes=10).to(device)

### Activation

- ReLU
- LeakyReLU
- Sigmoid
- Tanh
등 자유롭게 사용 가능하며 일반적으로 ReLU를 많이 사용

In [8]:
class CNN_Model(nn.Module):
    def __init__(self, num_classes):
        super(CNN_Model, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.nonlinear = nn.ReLU()
#         self.nonlinear = nn.LeakyReLU()
#         self.nonlinear = nn.Sigmoid()
#         self.nonlinear = nn.Tanh()
        
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.nonlinear = nn.ReLU()
#         self.nonlinear = nn.LeakyReLU()
#         self.nonlinear = nn.Sigmoid()
#         self.nonlinear = nn.Tanh()
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.nonlinear = nn.ReLU()
#         self.nonlinear = nn.LeakyReLU()
#         self.nonlinear = nn.Sigmoid()
#         self.nonlinear = nn.Tanh()
        self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.nonlinear = nn.ReLU()
#         self.nonlinear = nn.LeakyReLU()
#         self.nonlinear = nn.Sigmoid()
#         self.nonlinear = nn.Tanh()
        self.maxpool4 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.mlp = nn.Linear(256*2*2, num_classes)  
        
    def forward(self, x):
        out = self.conv1(x)
        out = self.nonlinear(out)
        out = self.maxpool1(out)
#         print('out 1:',out.shape) # [128, 32, 16, 16]
        
        out = self.conv2(out)
        out = self.nonlinear(out)
        out = self.maxpool2(out)
#         print('out 2:',out.shape) # [128, 64, 8, 8]
        
        out = self.conv3(out)
        out = self.nonlinear(out)
        out = self.maxpool3(out)
#         print('out 3:',out.shape) # [128, 128, 4, 4]
        
        out = self.conv4(out)
        out = self.nonlinear(out)
        out = self.maxpool4(out)
#         print('out 4:',out.shape) # [128, 256, 2, 2]
        
        out = out.view(out.size(0), -1)
        out = self.mlp(out) # 마지막 classification을 위해 필요
        
        return out

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CNN_Model(num_classes=10).to(device)

### Dropout / Batch Normalization

- 원하는 위치에 사용 
- 일반적으로 Convolution Layer와 NonLinear 함수 사이에 사용한다

In [9]:
class CNN_Model(nn.Module):
    def __init__(self, num_classes):
        super(CNN_Model, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.relu = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.dropout = nn.Dropout(p=0.5)
        self.relu = nn.ReLU()
        self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.maxpool4 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.mlp = nn.Linear(256*2*2, num_classes)  
        
    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.maxpool1(out)
#         print('out 1:',out.shape) # [128, 32, 16, 16]
        
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        out = self.maxpool2(out)
#         print('out 2:',out.shape) # [128, 64, 8, 8]
        
        out = self.conv3(out)
        out = self.dropout(out)
        out = self.relu(out)
        out = self.maxpool3(out)
#         print('out 3:',out.shape) # [128, 128, 4, 4]
        
        out = self.conv4(out)
        out = self.relu(out)
        out = self.maxpool4(out)
#         print('out 4:',out.shape) # [128, 256, 2, 2]
        
        out = out.view(out.size(0), -1)
        out = self.mlp(out) # 마지막 classification을 위해 필요
        
        return out

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CNN_Model(num_classes=10).to(device)

In [10]:
CELoss = nn.CrossEntropyLoss()
adam_optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # 최적화 알고리즘 class 선언

In [11]:
# 뉴럴 네트워크 모델 학습
total_epochs = 3
print('number of iteration :', len(train_loader))
# epoch : 모든 데이터를 한 번 학습하는 단위
for epoch in range(total_epochs):
    # iteration : 한 'mini-batch' 단위의 데이터를 학습하는 단위
    for i, (images, labels) in enumerate(train_loader):  
        # images : [mini-batch, 1, 28, 28]
        # labels : [mini-batch]
#         images = images.reshape(-1, 28*28).to(device) 
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        ce_loss = CELoss(outputs, labels)
        
        # Backward and optimize
        adam_optimizer.zero_grad() # 다양한 optimization 기법 적용 가능
        ce_loss.backward() # Back propagation
        adam_optimizer.step() # optimizer 작동
            
    print ('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, total_epochs, ce_loss.item()))

number of iteration : 196
Epoch [1/3], Loss: 1.5140
Epoch [2/3], Loss: 1.5127
Epoch [3/3], Loss: 1.1263


### Loss Function

- 원하는 목적 및 tasks에 따라 다양한 Loss Function 적용 가능
- 일반적으로 분류 문제는 CrossEntropyLoss, 회귀 문제에는 MSELoss를 주로 사용

In [12]:
Loss = nn.CrossEntropyLoss()

In [14]:
Loss = nn.L1Loss()

In [15]:
Loss = nn.MSELoss()

In [16]:
Loss = nn.NLLLoss

In [17]:
Loss = nn.BCELoss()

### Optimization

- 여러 optimization을 사용할 수 있음
- 일반적으로 SGD with momentum과 Adam을 주로 많이 사용

In [18]:
# Stochastic Gradient Descent
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

# Stochastic Gradient Descent with momentum
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

# Adagrad
optimizer = torch.optim.Adagrad(model.parameters(), lr=0.01)

# RMSprop
optimizer = torch.optim.RMSprop(model.parameters(), lr=0.01)

# Adam
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)