## 16. CNN Batch Norm

- 8번의 설명이랑 유사
- CNN에서는 **convolution 연산 후, 활성화 함수 적용 전에** BatchNorm을 적용 
⇒ 정규화는 **각 배치의 같은 채널**을 기준으로 수행함

In [1]:
import torch

# CPU/GPU 선택
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cpu


In [2]:
import torch
import torch.nn as nn

input_tensor = torch.Tensor([[[[1, 2, 3, 4],
                             [5, 6, 7, 8],
                             [9, 10, 11, 12],
                             [13, 14, 15, 16],
                             ]]])

# Pooling layer 생성, 절반으로 줄이기
maxpool_layer = nn.MaxPool2d(kernel_size=2)
output_tensor = maxpool_layer(input_tensor)
print(f'Input Tensor shape: {input_tensor.shape}')
print(f'Output Tensor shape: {output_tensor.shape}')

Input Tensor shape: torch.Size([1, 1, 4, 4])
Output Tensor shape: torch.Size([1, 1, 2, 2])


In [3]:
import torch
from torchvision import datasets, transforms

# 이미지 변환(전처리)
transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# CIFAR-10은 10개의 클래스에 걸쳐 총 60,000개의 32x32 컬러 이미지로 구성된 데이터셋
train_dataset = datasets.CIFAR10(
    root='./data',
    train=True,
    download=True,
    transform=transforms
)
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=4,
    shuffle=True,
    num_workers=4,
)

test_dataset = datasets.CIFAR10(
    root='./data',
    train=False,
    download=True,
    transform=transforms
)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=4,
    shuffle=False,
    num_workers=4,
)

print("Train dataset size:", len(train_dataset))
print("Validation dataset size:", len(test_dataset))



Train dataset size: 50000
Validation dataset size: 10000


In [7]:
class ModernGAPCNN(nn.Module):
  def __init__(self, num_classes=10):
    super().__init__()

    self.feature_extractor = nn.Sequential(
        nn.Conv2d(3, 16, kernel_size=5, stride=1, padding=1), # 16 * 30 * 30
        # channel을 기준으로 batch 정규화
        nn.BatchNorm2d(16),
        nn.ReLU(inplace=True),
        # stride를 2로 설정하여 feature map 크기를 줄임(pooling 대체)
        nn.Conv2d(16, 32, kernel_size=5, stride=2, padding=1), # 32 * 15 * 15
        nn.BatchNorm2d(32),
        nn.ReLU(inplace=True),
        nn.Conv2d(32, 64, kernel_size=5, stride=2, padding=1), # 64 * 7 * 17
        nn.BatchNorm2d(64),
        nn.ReLU(inplace=True),
    )

    # Global Average Pooling을 이용해 Channel별로 전체 값을 평균내어 1개의 값으로 축소
    self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1)) # 64 × 1 × 1
    self.classifier = nn.Linear(64, num_classes)

  def forward(self, x):
      x = self.feature_extractor(x)
      x = self.global_avg_pool(x)
      x = torch.flatten(x, 1)
      x = self.classifier(x)
      return x


In [None]:
import torch.optim as optim

net = ModernGAPCNN(10)
net.to(device)

loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

for epoch in range(10):
  net.train()
  for batch_idx, (data, label) in enumerate(train_loader):
    data, label = data.to(device), label.to(device)
    optimizer.zero_grad()
    output = net(data)
    train_loss = loss_fn(output, label)
    train_loss.backward()
    optimizer.step()

  net.eval()
  val_loss = 0
  correct = 0
  with torch.no_grad():
    for data, label in test_loader:
      data, label = data.to(device), label.to(device)
      output = net(data)
      val_loss += loss_fn(output, label).item()
      pred = output.argmax(dim=1, keepdim=True)
      correct += pred.eq(label.view_as(pred)).sum().item()
  
  val_loss /= len(test_loader.dataset)
  accuracy = 100. * correct / len(test_loader.dataset)

  print(f'Epoch {epoch+1}, Loss: {train_loss.item():.4f}, Val Loss: {val_loss:.4f}, Accuracy: {accuracy:.2f}%')




