<a href="https://colab.research.google.com/github/Siu0901/AI_study/blob/main/%EB%8D%B0%EC%9D%B4%ED%84%B0_%EC%A6%9D%EA%B0%95_%EC%A0%81%EC%9A%A9%ED%95%9C_VGG16.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn.functional as F
from torch import nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np

import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.utils.data import Dataset
import cv2

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

batch_size = 64
learning_rate = 0.0001
num_epochs = 40

In [3]:
train_mean = [0.485, 0.456, 0.406]  # R, G, B
train_std = [0.229, 0.224, 0.225]   # R, G, B

In [4]:
train_transform = A.Compose([
    A.Resize(256, 256),
    A.RandomCrop(224, 224),
    A.HorizontalFlip(),
    A.Normalize(mean=train_mean, std=train_std),
    A.RandomBrightnessContrast(p=0.5, brightness_limit=0.8, contrast_limit=0.8),
    ToTensorV2()
])

In [5]:
test_transform = A.Compose([
    A.Resize(256, 256),
    A.CenterCrop(224, 224),
    A.Normalize(mean=train_mean, std=train_std),
    ToTensorV2()
])

In [6]:
class Transform():
  def __init__(self, transform):
    self.transform = transform

  def __call__(self, img):
    return self.transform(image=np.array(img))['image']

In [7]:
train_dataset = torchvision.datasets.STL10(root='./data', split='train', transform=Transform(train_transform), download=True)
test_dataset = torchvision.datasets.STL10(root='./data', split='test', transform=Transform(test_transform), download=True)

100%|██████████| 2.64G/2.64G [00:54<00:00, 48.2MB/s]


In [8]:
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

In [9]:
class VGG16(nn.Module):
  def __init__(self, n_class):
    super(VGG16, self).__init__()
    self.features = nn.Sequential(
        nn.Conv2d(3,64,3, padding=1),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.Conv2d(64,64,3, padding=1),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.MaxPool2d(2,2),

        nn.Conv2d(64,128,3, padding=1),
        nn.BatchNorm2d(128),
        nn.ReLU(),
        nn.Conv2d(128,128,3, padding=1),
        nn.BatchNorm2d(128),
        nn.ReLU(),
        nn.MaxPool2d(2,2),

        nn.Conv2d(128,256,3, padding=1),
        nn.BatchNorm2d(256),
        nn.ReLU(),
        nn.Conv2d(256,256,3, padding=1),
        nn.BatchNorm2d(256),
        nn.ReLU(),
        nn.Conv2d(256,256,3, padding=1),
        nn.BatchNorm2d(256),
        nn.ReLU(),
        nn.MaxPool2d(2,2),

        nn.Conv2d(256,512,3, padding=1),
        nn.BatchNorm2d(512),
        nn.ReLU(),
        nn.Conv2d(512,512,3, padding=1),
        nn.BatchNorm2d(512),
        nn.ReLU(),
        nn.Conv2d(512,512,3, padding=1),
        nn.BatchNorm2d(512),
        nn.ReLU(),
        nn.MaxPool2d(2,2),

        nn.Conv2d(512,512,3, padding=1),
        nn.BatchNorm2d(512),
        nn.ReLU(),
        nn.Conv2d(512,512,3, padding=1),
        nn.BatchNorm2d(512),
        nn.ReLU(),
        nn.Conv2d(512,512,3, padding=1),
        nn.BatchNorm2d(512),
        nn.ReLU(),
        nn.MaxPool2d(2,2)
    )
    self.full_connected = nn.Sequential(
        nn.Flatten(),
        nn.Linear(7*7*512,4096),
        nn.ReLU(),
        nn.Dropout(), # vgg는 보통 과적합 방지하려고 이렇게 뉴런 끔
        nn.Linear(4096,1000),
        nn.ReLU(),
        nn.Dropout(),
        nn.Linear(1000,n_class)
    )

  def forward(self, x):
    x = self.features(x)
    x = self.full_connected(x)
    return x

In [10]:
model = VGG16(n_class=10).to(device)

In [11]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [13]:
for epoch in range(num_epochs):
  model.train()
  running_train_loss = 0.0

  for idx, (img, label) in enumerate(train_loader):
    image = img.to(device)
    label = label.to(device)

    optimizer.zero_grad()
    output = model(image)
    loss = criterion(output, label)
    loss.backward()
    optimizer.step()

    running_train_loss += loss.item()

    if (idx+1) % 50 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Step [{idx+1}/{len(train_loader)}], Train Loss: {running_train_loss/50:.4f}')
        running_train_loss = 0.0

  model.eval()
  t_loss = 0.0
  correct = 0
  total = 0

  with torch.no_grad():
    for t_images, t_labels in test_loader:
        t_images = t_images.to(device)
        t_labels = t_labels.to(device)

        t_output = model(t_images)
        t_batch_loss = criterion(t_output, t_labels)
        t_loss += t_batch_loss.item()

        _, t_predicted = torch.max(t_output.data, 1)
        total += t_labels.size(0)
        correct += (t_predicted == t_labels).sum().item()

    avg_test_loss = t_loss / len(test_loader)
    test_accuracy = 100 * correct / total
    print(f'Epoch [{epoch+1}/{num_epochs}], Test Loss: {avg_test_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%')
    print('-' * 60)

Epoch [1/40], Step [50/79], Train Loss: 1.6445
Epoch [1/40], Test Loss: 1.5780, Test Accuracy: 40.14%
------------------------------------------------------------
Epoch [2/40], Step [50/79], Train Loss: 1.6213
Epoch [2/40], Test Loss: 1.5017, Test Accuracy: 43.35%
------------------------------------------------------------
Epoch [3/40], Step [50/79], Train Loss: 1.5896
Epoch [3/40], Test Loss: 1.4603, Test Accuracy: 44.36%
------------------------------------------------------------
Epoch [4/40], Step [50/79], Train Loss: 1.5190
Epoch [4/40], Test Loss: 1.4423, Test Accuracy: 44.92%
------------------------------------------------------------
Epoch [5/40], Step [50/79], Train Loss: 1.4563
Epoch [5/40], Test Loss: 1.5204, Test Accuracy: 46.02%
------------------------------------------------------------
Epoch [6/40], Step [50/79], Train Loss: 1.3958
Epoch [6/40], Test Loss: 1.3962, Test Accuracy: 48.71%
------------------------------------------------------------
Epoch [7/40], Step [50

In [14]:
model.eval()
with torch.no_grad():
  correct = 0
  total = 0
  for images, labels in test_loader:
    images, labels = images.to(device), labels.to(device)
    outputs = model(images)

    _, predicted = torch.max(outputs.data, 1)

    total += labels.size(0)
    correct += (predicted == labels).sum().item()
  print(f"테스트 데이터셋 정확도: {100 * correct / total}%")

테스트 데이터셋 정확도: 73.175%


 이전에 데이터를 정제만 해서 학습 시켰을 땐 정확도가 약 68% 나왔음.

 데이터를 방향 전환하고 밝기를 조절 하는 등 증강 기법을 쓰니 정확도가 73%,
 약 5% 오른 것을 확인할 수 있었음.

 결론: 데이터 증강은 필수다!