In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, ConcatDataset, Subset
from torchvision import transforms, datasets, models
import numpy as np
from sklearn.model_selection import StratifiedKFold
import matplotlib.pyplot as plt

# 1. 장치 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 2. 데이터 전처리 설정
basic_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.247, 0.243, 0.261))
])

augment1 = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.247, 0.243, 0.261))
])

augment2 = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomRotation(45),
    transforms.RandomHorizontalFlip(p=0.7),
    transforms.ColorJitter(brightness=0.3, contrast=0.3),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.247, 0.243, 0.261))
])

# 3. 데이터셋 로드 및 결합
trainset_path = "C:/Users/jongcheol/OneDrive/바탕 화면/Semester2/train_data"
user_dataset = datasets.ImageFolder(root=trainset_path, transform=basic_transform)

cifar10 = datasets.CIFAR10(root="C:/Users/jongcheol/OneDrive/바탕 화면/Semester2/CIFAR10", train=True, download=False, transform=basic_transform)
cifar10_dataset1 = datasets.CIFAR10(root="C:/Users/jongcheol/OneDrive/바탕 화면/Semester2/CIFAR10", train=True, download=False, transform=augment1)
cifar10_dataset2 = datasets.CIFAR10(root="C:/Users/jongcheol/OneDrive/바탕 화면/Semester2/CIFAR10", train=True, download=False, transform=augment2)

combined_dataset = ConcatDataset([user_dataset, cifar10, cifar10_dataset1, cifar10_dataset2])
combined_labels = np.concatenate((np.array([label for _, label in user_dataset.samples]),
                                  np.array(cifar10.targets),
                                  np.array(cifar10_dataset1.targets),
                                  np.array(cifar10_dataset2.targets)))

# 4. CutMix를 적용하는 함수
def cutmix_data(input, target, beta=1.0):
    lam = np.random.beta(beta, beta)
    rand_index = torch.randperm(input.size()[0]).to(device)
    target_a = target
    target_b = target[rand_index]
    bbx1, bby1, bbx2, bby2 = rand_bbox(input.size(), lam)
    input[:, :, bbx1:bbx2, bby1:bby2] = input[rand_index, :, bbx1:bbx2, bby1:bby2]
    lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (input.size(-1) * input.size(-2)))
    return input, target_a, target_b, lam

def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = int(W * cut_rat)
    cut_h = int(H * cut_rat)
    cx = np.random.randint(W)
    cy = np.random.randint(H)
    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)
    return bbx1, bby1, bbx2, bby2

# 5. K-Fold Cross Validation 설정
k = 5
skf = StratifiedKFold(n_splits=k, shuffle=True, random_state=42)
num_epochs = 30
batch_size = 32
cutmix_prob = 0.5
train_accuracies, val_accuracies = [], []

# 6. 모델 학습 및 검증 루프
for fold, (train_idx, val_idx) in enumerate(skf.split(np.zeros(len(combined_labels)), combined_labels)):
    print(f"\n=== Fold {fold + 1}/{k} ===")
    train_subset = Subset(combined_dataset, train_idx)
    val_subset = Subset(combined_dataset, val_idx)
    train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False)

    model = models.efficientnet_b0(pretrained=False)
    model.classifier[1] = nn.Linear(model.classifier[1].in_features, 10)
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    train_acc_history, val_acc_history = [], []

    for epoch in range(num_epochs):
        model.train()
        correct_train, total_train = 0, 0
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)

            # CutMix 적용 여부 결정
            r = np.random.rand(1)
            if r < cutmix_prob:
                inputs, target_a, target_b, lam = cutmix_data(inputs, targets)
                outputs = model(inputs)
                loss = lam * criterion(outputs, target_a) + (1 - lam) * criterion(outputs, target_b)
            else:
                outputs = model(inputs)
                loss = criterion(outputs, targets)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            _, preds = outputs.max(1)
            correct_train += preds.eq(targets).sum().item()
            total_train += targets.size(0)

        train_acc = 100 * correct_train / total_train
        train_acc_history.append(train_acc)

        # 검증
        model.eval()
        correct_val, total_val = 0, 0
        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)
                _, preds = outputs.max(1)
                correct_val += preds.eq(targets).sum().item()
                total_val += targets.size(0)

        val_acc = 100 * correct_val / total_val
        val_acc_history.append(val_acc)

        print(f"Epoch [{epoch + 1}/{num_epochs}], Train Acc: {train_acc:.2f}%, Val Acc: {val_acc:.2f}%")

    train_accuracies.append(train_acc_history)
    val_accuracies.append(val_acc_history)

# 7. 학습 및 검증 정확도 시각화
epochs_range = range(1, num_epochs + 1)
plt.figure(figsize=(14, 5))
for i in range(k):
    plt.plot(epochs_range, train_accuracies[i], label=f'Fold {i+1} Train Acc')
    plt.plot(epochs_range, val_accuracies[i], label=f'Fold {i+1} Val Acc', linestyle='--')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.title('Train and Validation Accuracy per Fold')
plt.legend()
plt.show()



=== Fold 1/5 ===




KeyboardInterrupt: 