In [1]:
import torch
import torch.nn as nn
from torch import optim
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torchvision import models
from torchvision import transforms
from torchvision.datasets import ImageFolder

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# 하이퍼 파라미터 설정 : 데이터 증강 추가
hyperparams = {
    "batch_size": 4,
    "learning_rate": 0.001,
    "epochs": 10,
    "transform": transforms.Compose([
        transforms.RandomHorizontalFlip(),        # 좌우 반전 (50% 확률)
        transforms.RandomRotation(15),            # ±15도 회전
        transforms.Resize(256),                   # 256으로 크기 조정
        transforms.CenterCrop(224),               # 중앙 부분 224x224 크롭
        transforms.ToTensor(),                    # 텐서로 변환
        transforms.Normalize(mean=[0.48235, 0.45882, 0.40784],  # 이미지넷 기준 평균
                             std=[0.229, 0.224, 0.225])         # 이미지넷 기준 표준편차
    ])
}

In [3]:
from torch.utils.data import random_split
from torchvision.datasets import ImageFolder

dataset = ImageFolder('../Project/dataset-resized', transform=hyperparams['transform'])

from sklearn.model_selection import train_test_split
import numpy as np

# 각 클래스별 인덱스 정리
targets = np.array(dataset.targets)  # ImageFolder에서 클래스 레이블 가져오기
train_idx, temp_idx = train_test_split(np.arange(len(targets)), test_size=0.2, stratify=targets)
valid_idx, test_idx = train_test_split(temp_idx, test_size=0.5, stratify=targets[temp_idx])

# Subset을 이용해 분할된 데이터셋 생성
from torch.utils.data import Subset

train_dataset = Subset(dataset, train_idx)
valid_dataset = Subset(dataset, valid_idx)
test_dataset = Subset(dataset, test_idx)

print(f"Train: {len(train_dataset)}, Valid: {len(valid_dataset)}, Test: {len(test_dataset)}")

Train: 2021, Valid: 253, Test: 253


In [4]:
# 데이터로더 생성하기
# shuffle = True : 일반화 성능 향상, 과적합 방지
# drop_last = True : 마지막 배치 제거 - 마지막 배치가 지정한 batch_size보다 작을 수 있음 이를 방지
train_dataloader = DataLoader(train_dataset, batch_size=hyperparams['batch_size'], shuffle=True, drop_last=True)
test_dataloader = DataLoader(test_dataset, batch_size=hyperparams['batch_size'], shuffle=True, drop_last=True)
valid_dataloader = DataLoader(valid_dataset, batch_size=hyperparams['batch_size'], shuffle=True, drop_last=True)

In [5]:
# 사전 학습된 VGG16 모델 불러오기
model = models.vgg16(weights='VGG16_Weights.IMAGENET1K_V1')

In [6]:
# 모델 층별 추출 
FeatureLayers = model.features                # 이미지 특징 추출 부분 (Feature Map 추출)
ClassifierLayers = model.classifier           # 분류기 부분 (Full-Connected)

In [7]:
# 각 층 확인하기 : 미세조정을 하기 위해 정보 확인
print(FeatureLayers)
print(ClassifierLayers)

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (17): Conv2d(256, 512, kernel_si

In [8]:
# VGG16 모델 미세 조정

# 원본 데이터셋에서 클래스 개수를 가져오기
num_classes = len(dataset.classes)

# 모델의 마지막 레이어 변경
model.classifier[6] = nn.Linear(4096, num_classes)

In [9]:
# 학습 관련 설정
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = model.to(device)    # to(device) : 모델과 데이터를 지정한 장치로 옮기는 작업 수행 
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.SGD(model.parameters(), lr=hyperparams['learning_rate'])

In [10]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import torch
import matplotlib.pyplot as plt

# 초기 설정
best_valid_loss = float('inf')
patience = 4
early_stop_counter = 0
save_path = "VGG16_best_model.pth"

# 그래프를 위한 기록 리스트
train_losses, valid_losses = [], []
train_accuracies, valid_accuracies = [], []
train_f1s, valid_f1s = [], []

for epoch in range(hyperparams['epochs']):
    cost = 0.0
    all_preds, all_labels = [], []

    # Training
    model.train()
    for images, labels in train_dataloader:
        images, labels = images.to(device), labels.to(device)

        output = model(images)
        loss = criterion(output, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        cost += loss.item()

        preds = torch.argmax(output, dim=1).cpu().numpy()
        labels = labels.cpu().numpy()
        all_preds.extend(preds)
        all_labels.extend(labels)

    cost /= len(train_dataloader)

    train_accuracy = accuracy_score(all_labels, all_preds)
    train_precision = precision_score(all_labels, all_preds, average="weighted")
    train_recall = recall_score(all_labels, all_preds, average="weighted")
    train_f1 = f1_score(all_labels, all_preds, average="weighted")
    train_conf_matrix = confusion_matrix(all_labels, all_preds)

    print(f"📌 Train - Epoch {epoch+1} | Cost: {cost:.3f}, Accuracy: {train_accuracy:.3f}, Precision: {train_precision:.3f}, Recall: {train_recall:.3f}, F1-score: {train_f1:.3f}")
    print(f"Train Confusion Matrix:\n{train_conf_matrix}")

    # Validation
    model.eval()
    valid_loss = 0.0
    valid_preds, valid_labels = [], []
    with torch.no_grad():
        for images, labels in valid_dataloader:
            images, labels = images.to(device), labels.to(device)
            output = model(images)
            loss = criterion(output, labels)
            valid_loss += loss.item()

            preds = torch.argmax(output, dim=1).cpu().numpy()
            labels = labels.cpu().numpy()
            valid_preds.extend(preds)
            valid_labels.extend(labels)

    valid_loss /= len(valid_dataloader)

    valid_accuracy = accuracy_score(valid_labels, valid_preds)
    valid_precision = precision_score(valid_labels, valid_preds, average="weighted")
    valid_recall = recall_score(valid_labels, valid_preds, average="weighted")
    valid_f1 = f1_score(valid_labels, valid_preds, average="weighted")
    valid_conf_matrix = confusion_matrix(valid_labels, valid_preds)

    print(f"🔎 Validation - Epoch {epoch+1} | Loss: {valid_loss:.3f}, Accuracy: {valid_accuracy:.3f}, Precision: {valid_precision:.3f}, Recall: {valid_recall:.3f}, F1-score: {valid_f1:.3f}")
    print(f"Validation Confusion Matrix:\n{valid_conf_matrix}")

    # 성능 기록
    train_losses.append(cost)
    valid_losses.append(valid_loss)
    train_accuracies.append(train_accuracy)
    valid_accuracies.append(valid_accuracy)
    train_f1s.append(train_f1)
    valid_f1s.append(valid_f1)

    # Early Stopping
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        early_stop_counter = 0
        torch.save(model.state_dict(), save_path)
        print("✅ 모델이 개선되었으므로 저장되었습니다!")
    else:
        early_stop_counter += 1
        print(f"⚠️ {early_stop_counter}/{patience} Epoch 동안 개선되지 않았습니다.")

    if early_stop_counter >= patience:
        print("⏳ 조기 종료를 수행합니다.")
        break

# 🔍 학습 결과 시각화
epochs = range(1, len(train_losses) + 1)
plt.figure(figsize=(16, 5))

# Loss
plt.subplot(1, 3, 1)
plt.plot(epochs, train_losses, 'b-o', label='Train Loss')
plt.plot(epochs, valid_losses, 'r-o', label='Valid Loss')
plt.title('📉 Loss per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

# Accuracy
plt.subplot(1, 3, 2)
plt.plot(epochs, train_accuracies, 'b-o', label='Train Accuracy')
plt.plot(epochs, valid_accuracies, 'r-o', label='Valid Accuracy')
plt.title('📈 Accuracy per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

# F1-score
plt.subplot(1, 3, 3)
plt.plot(epochs, train_f1s, 'b-o', label='Train F1')
plt.plot(epochs, valid_f1s, 'r-o', label='Valid F1')
plt.title('📊 F1-score per Epoch')
plt.xlabel('Epoch')
plt.ylabel('F1-score')
plt.legend()

plt.tight_layout()
plt.show()

KeyboardInterrupt: 