In [2]:
import torch
import torch.nn as nn
from torchvision import models, transforms, datasets
from torch.utils.data import DataLoader, ConcatDataset, Subset
import torch.optim as optim
from sklearn.model_selection import StratifiedKFold
import numpy as np
import matplotlib.pyplot as plt
from torchvision.transforms.v2 import CutMix

# 1. 장치 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 2. 데이터 전처리 설정
# 사용자 데이터셋 기본 전처리
basic_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.247, 0.243, 0.261))
])

# CIFAR-10 데이터에 사용할 두 가지 증강 기법
augment1 = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.247, 0.243, 0.261))
])

augment2 = transforms.Compose([
    transforms.RandomRotation(20),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.247, 0.243, 0.261))
])

# 3. 사용자 데이터셋 불러오기
trainset_path = "C:/Users/jongcheol/OneDrive/바탕 화면/Semester2/train_data"
user_dataset = datasets.ImageFolder(root=trainset_path, transform=basic_transform)

# 4. CIFAR-10 데이터셋 불러오기 (증강1과 증강2 적용)
cifar10_dataset1 = datasets.CIFAR10(root="C:/Users/jongcheol/OneDrive/바탕 화면/Semester2/CIFAR10", train=True, download=False, transform=augment1)
cifar10_dataset2 = datasets.CIFAR10(root="C:/Users/jongcheol/OneDrive/바탕 화면/Semester2/CIFAR10", train=True, download=False, transform=augment2)

# 5. 사용자 데이터셋과 CIFAR-10 데이터셋 통합
combined_dataset = ConcatDataset([user_dataset, cifar10_dataset1, cifar10_dataset2])
combined_labels = np.concatenate((np.array([label for _, label in user_dataset.samples]),
                                  np.array(cifar10_dataset1.targets),
                                  np.array(cifar10_dataset2.targets)))

In [11]:
print(len(combined_dataset))
print(combined_labels.shape)

104000
(104000,)


In [14]:

# 6. K-Fold Cross Validation 설정
k = 5
kf = StratifiedKFold(n_splits=k, shuffle=True, random_state=42)
fold_accuracies = []
train_accuracies, val_accuracies = [], []

# CutMix 추가를 위해 transforms.Compose에 포함
cutmix_transform = CutMix(num_classes=10, alpha=0.3)

def collate_fn(batch):
    # 기본 DataLoader의 배치와 동일하게 (데이터, 라벨) 형식으로 변환
    images, labels = zip(*batch)
    images = torch.stack(images)
    labels = torch.tensor(labels)
    return cutmix_transform((images, labels))

# 7. Cross Validation 학습 및 평가
num_epochs = 12
for fold, (train_idx, val_idx) in enumerate(kf.split(np.zeros(len(combined_labels)), combined_labels)):
    print(f"\n=== Fold {fold + 1} 시작 ===")

    # Fold별 데이터셋 분리
    train_subset = Subset(combined_dataset, train_idx)
    val_subset = Subset(combined_dataset, val_idx)

    # CutMix 적용된 데이터로더
    train_loader = DataLoader(train_subset, batch_size=16, shuffle=True, num_workers=1, collate_fn=collate_fn)
    val_loader = DataLoader(val_subset, batch_size=16, shuffle=False, num_workers=1)

    # EfficientNet-B0 모델 설정
    model = models.efficientnet_b0(pretrained=False)
    num_classes = 15
    model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)
    model = model.to(device)

    # 손실 함수 및 옵티마이저 설정
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=0)

    # Fold 학습 및 검증
    fold_train_acc, fold_val_acc = [], []
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total_train += labels.size(0)
            correct_train += predicted.eq(labels).sum().item()

        train_acc = 100. * correct_train / total_train
        fold_train_acc.append(train_acc)

        model.eval()
        correct_val = 0
        total_val = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = outputs.max(1)
                total_val += labels.size(0)
                correct_val += predicted.eq(labels).sum().item()

        val_acc = 100. * correct_val / total_val
        fold_val_acc.append(val_acc)
        scheduler.step()

        print(f"Fold [{fold + 1}], Epoch [{epoch+1}/{num_epochs}], "
              f"Train Acc: {train_acc:.2f}%, Val Acc: {val_acc:.2f}%")

    train_accuracies.append(fold_train_acc)
    val_accuracies.append(fold_val_acc)
    fold_accuracies.append(val_acc)

# 8. 최종 K-Fold 평균 정확도 출력
print("\n=== 최종 K-Fold 평균 정확도 ===")
print(f"Average {k}-Fold Accuracy: {np.mean(fold_accuracies):.2f}%")

# 9. 정확도 시각화
epochs_range = range(1, num_epochs + 1)
plt.figure(figsize=(14, 5))
for i in range(k):
    plt.plot(epochs_range, train_accuracies[i], label=f'Fold {i+1} Train Acc')
    plt.plot(epochs_range, val_accuracies[i], label=f'Fold {i+1} Val Acc', linestyle='--')

plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.title('Train and Validation Accuracy per Fold')
plt.legend(loc='best')
plt.show()



=== Fold 1 시작 ===




RuntimeError: DataLoader worker (pid(s) 33996) exited unexpectedly