In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingLR
from torchvision import datasets, transforms
import numpy as np
import random
import torch.optim.lr_scheduler as lr_scheduler
import torchvision.transforms as transforms
from torchvision.transforms import AutoAugment, AutoAugmentPolicy

In [None]:
# Random Seed 생성입니다
def set_seed(seed):
    # Python random 시드 고정
    random.seed(seed)

    # Numpy random 시드 고정
    np.random.seed(seed)

    # PyTorch random 시드 고정 (CPU)
    torch.manual_seed(seed)

    # PyTorch random 시드 고정 (GPU)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  # 모든 GPU에 동일한 시드 적용

    # CUDNN deterministic 모드 사용 (비결정론적 연산 방지)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [None]:
# Random Seed 사용입니다
set_seed(42)

In [2]:
# CIFAR-100 슈퍼클래스 정의
superclasses = {
    'aquatic_mammals': [4, 30, 55, 72, 95],
    'fish': [1, 32, 67, 73, 91],
    'flowers': [54, 62, 70, 82, 92],
    'food_containers': [9, 10, 16, 28, 61],
    'fruit_and_vegetables': [0, 51, 53, 57, 83],
    'household_electrical_devices': [22, 39, 40, 86, 87],
    'household_furniture': [5, 20, 25, 84, 94],
    'insects': [6, 7, 14, 18, 24],
    'large_carnivores': [3, 42, 43, 88, 97],
    'large_man-made_outdoor_things': [12, 17, 37, 68, 76],
    'large_natural_outdoor_scenes': [23, 33, 49, 60, 71],
    'large_omnivores_and_herbivores': [15, 19, 21, 31, 38],
    'medium_mammals': [34, 63, 64, 66, 75],
    'non-insect_invertebrates': [26, 45, 77, 79, 99],
    'people': [2, 11, 35, 46, 98],
    'reptiles': [27, 29, 44, 78, 93],
    'small_mammals': [36, 50, 65, 74, 80],
    'trees': [47, 52, 56, 59, 96],
    'vehicles_1': [8, 13, 48, 58, 90],
    'vehicles_2': [41, 69, 81, 85, 89]
}

In [3]:
# Early Stopping 구현
class EarlyStopping:
    def __init__(self, patience=10, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_score = None
        self.early_stop = False

    def __call__(self, val_loss):
        score = -val_loss
        if self.best_score is None:
            self.best_score = score
        elif score < self.best_score + self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.counter = 0

In [4]:
# DenseNet 블록 정의 (Dropout 추가)
class DenseBlock(nn.Module):
    def __init__(self, in_channels, growth_rate, num_layers, dropout_rate=0.2):
        super(DenseBlock, self).__init__()
        layers = []
        for i in range(num_layers):
            layers.append(self._make_layer(in_channels + i * growth_rate, growth_rate, dropout_rate))
        self.net = nn.Sequential(*layers)
    
    def _make_layer(self, in_channels, growth_rate, dropout_rate):
        return nn.Sequential(
            nn.BatchNorm2d(in_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels, growth_rate, kernel_size=3, stride=1, padding=1, bias=False),
            nn.Dropout(p=dropout_rate)  # Dropout 추가
        )
    
    def forward(self, x):
        for layer in self.net:
            out = layer(x)
            x = torch.cat([x, out], 1)
        return x


In [5]:
# Transition Layer 정의
class TransitionLayer(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(TransitionLayer, self).__init__()
        self.net = nn.Sequential(
            nn.BatchNorm2d(in_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, bias=False),
            nn.AvgPool2d(2, stride=2)
        )
    
    def forward(self, x):
        return self.net(x)

In [6]:
# 모델 수정: DenseNet의 복잡도 증가
class DenseNet(nn.Module):
    def __init__(self, growth_rate=48, num_blocks=5, num_layers_per_block=16, reduction=0.5, num_classes=100, dropout_rate=0.05):
        super(DenseNet, self).__init__()
        num_channels = 2 * growth_rate  # 초기 채널 수

        # 초기 Convolution 레이어
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, num_channels, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(num_channels),
            nn.ReLU(inplace=True)
        )
        
        # Dense Blocks와 Transition Layers 추가
        self.blocks = nn.ModuleList()
        for i in range(num_blocks):
            block = DenseBlock(num_channels, growth_rate, num_layers_per_block, dropout_rate)
            self.blocks.append(block)
            num_channels += growth_rate * num_layers_per_block
            if i != num_blocks - 1:
                transition = TransitionLayer(num_channels, int(num_channels * reduction))
                self.blocks.append(transition)
                num_channels = int(num_channels * reduction)
        
        # 최종 BatchNorm, ReLU 및 FC 레이어
        self.bn = nn.BatchNorm2d(num_channels)
        self.fc = nn.Linear(num_channels, num_classes)
        self.dropout = nn.Dropout(dropout_rate)
    
    def forward(self, x):
        x = self.conv1(x)
        for block in self.blocks:
            x = block(x)
        x = self.bn(x)
        x = torch.relu(x)
        x = torch.mean(x, dim=[2, 3])  # Global Average Pooling
        x = self.dropout(x)
        x = self.fc(x)
        return x

In [7]:
# DenseNet 초기화 함수
def densenet_cifar(num_classes=100):
    return DenseNet(growth_rate=32, num_blocks=3, num_layers_per_block=6, reduction=0.5, num_classes=num_classes)

In [8]:
# 데이터 증강 (Cutout, MixUp 추가)
class Cutout(object):
    def __init__(self, n_holes, length):
        self.n_holes = n_holes
        self.length = length

    def __call__(self, img):
        h, w = img.size(1), img.size(2)
        mask = np.ones((h, w), np.float32)
        
        for _ in range(self.n_holes):
            y = np.random.randint(h)
            x = np.random.randint(w)
            
            y1 = np.clip(y - self.length // 2, 0, h)
            y2 = np.clip(y + self.length // 2, 0, h)
            x1 = np.clip(x - self.length // 2, 0, w)
            x2 = np.clip(x + self.length // 2, 0, w)
            
            mask[y1: y2, x1: x2] = 0.
        
        mask = torch.from_numpy(mask)
        mask = mask.expand_as(img)
        img = img * mask
        return img

# 데이터 증강 (MixUp 비활성화 또는 alpha 값 조정)
# MixUp을 처음에는 제거하여 학습 진행
def mixup_data(x, y, alpha=0.4):  # alpha 값을 0.4로 낮춤
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1
    batch_size = x.size()[0]
    index = torch.randperm(batch_size).to(x.device)
    
    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

In [9]:
# 데이터 증강 수정 (Cutout 및 Random Erasing 확률 감소)
# 데이터 증강 - AutoAugment 추가
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    AutoAugment(AutoAugmentPolicy.CIFAR10),  # AutoAugment 추가
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
    Cutout(n_holes=1, length=8),
    transforms.RandomErasing(p=0.1)
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))
])


In [10]:
# 데이터셋 로드
train_dataset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform_train)
test_dataset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform_test)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=128, shuffle=True, num_workers=4)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=128, shuffle=False, num_workers=4)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Files already downloaded and verified
Files already downloaded and verified


In [11]:
# 모델 수정: Dropout 비율 약간 증가, 학습률 조정
model = DenseNet(num_classes=100, dropout_rate=0.2).to(device)

In [12]:
# 옵티마이저 및 손실 함수 (학습률 및 weight_decay 조정)
optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=5e-5)  # 학습률 및 weight_decay 조정
criterion = nn.CrossEntropyLoss()

In [13]:
# 학습률 스케줄러 조정 - CosineAnnealingWarmRestarts
scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2, eta_min=1e-5)
# ReduceLROnPlateau을 추가하여 학습률을 동적으로 감소시키기 위한 코드
scheduler_plateau = lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=3, verbose=True)

In [14]:
# Early stopping 추가
early_stopping = EarlyStopping(patience=10, min_delta=0.01)

In [15]:
# 모델 평가 함수 (Top-1, Top-5, 평균 Superclass 정확도만 출력)
def evaluate_model(model, test_loader):
    model.eval()
    correct1 = 0
    correct5 = 0
    superclass_correct = {key: 0 for key in superclasses.keys()}
    superclass_total = {key: 0 for key in superclasses.keys()}
    
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            
            # Top-1 정확도 계산
            _, predicted = torch.max(outputs, 1)
            correct1 += (predicted == labels).sum().item()
            
            # Top-5 정확도 계산
            top5_prob, top5_pred = torch.topk(outputs, 5, dim=1)
            correct5 += sum([1 if labels[i] in top5_pred[i] else 0 for i in range(len(labels))])

            # Superclass 정확도 계산
            for i in range(len(labels)):
                label = labels[i].item()
                pred = predicted[i].item()
                for superclass, indices in superclasses.items():
                    if label in indices:
                        superclass_total[superclass] += 1
                        if pred in indices:
                            superclass_correct[superclass] += 1
            
            total += labels.size(0)

    top1_acc = correct1 / total * 100
    top5_acc = correct5 / total * 100
    # 슈퍼클래스별 정확도의 평균 계산
    superclass_acc = {key: (superclass_correct[key] / superclass_total[key]) * 100 if superclass_total[key] > 0 else 0 for key in superclasses.keys()}
    average_superclass_acc = sum(superclass_acc.values()) / len(superclass_acc)  # 평균 슈퍼클래스 정확도

    return top1_acc, top5_acc, average_superclass_acc

In [16]:
# 학습 및 평가 함수 (MixUp 제외)
def train_and_evaluate(epochs):
    epoch_list = []
    train_acc_list = []
    val_acc_list = []
    top1_acc_list = []
    top5_acc_list = []
    avg_superclass_acc_list = []  # 평균 슈퍼클래스 정확도 리스트
    best_val_acc = 0

    for epoch in range(epochs):
        model.train()
        running_loss = 0
        correct = 0
        total = 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            # MixUp 제거 후 일반 학습
            optimizer.zero_grad()

            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

        train_acc = 100 * correct / total
        
        # Validation 및 정확도 평가
        top1_acc, top5_acc, average_superclass_acc = evaluate_model(model, test_loader)

        # 기록 저장
        epoch_list.append(epoch + 1)
        train_acc_list.append(train_acc)
        val_acc_list.append(top1_acc)  # Top-1 정확도를 validation accuracy로 간주
        top1_acc_list.append(top1_acc)
        top5_acc_list.append(top5_acc)
        avg_superclass_acc_list.append(average_superclass_acc)  # 평균 슈퍼클래스 정확도 저장
        
        # 학습률 스케줄러 스텝 (Plateau 기준)
        scheduler_plateau.step(top1_acc)

        # Early stopping 체크
        early_stopping(-top1_acc)
        if early_stopping.early_stop:
            print(f"Early stopping at epoch {epoch+1}")
            break

        # 스코어 계산 및 출력
        total_score = top1_acc + top5_acc + average_superclass_acc
        print(f"Epoch [{epoch+1}/{epochs}] - Loss: {running_loss/len(train_loader):.4f}, Train Accuracy: {train_acc:.2f}%, Val Accuracy (Top-1): {top1_acc:.2f}%")
        print(f"Top-1 Accuracy: {top1_acc:.2f}% | Top-5 Accuracy: {top5_acc:.2f}% | Avg Superclass Accuracy: {average_superclass_acc:.2f}% | Total Score: {total_score:.2f}")

    return epoch_list, train_acc_list, val_acc_list, top1_acc_list, top5_acc_list, avg_superclass_acc_list


In [17]:
# 시각화 코드 추가
import matplotlib.pyplot as plt

# 정확도 시각화 (Train, Val, Top-1, Top-5)
def plot_accuracies(epoch_list, train_acc_list, val_acc_list, top1_acc_list, top5_acc_list):
    plt.figure(figsize=(10,5))
    plt.plot(epoch_list, train_acc_list, label='Train Accuracy')
    plt.plot(epoch_list, val_acc_list, label='Validation Accuracy (Top-1)')
    plt.plot(epoch_list, top1_acc_list, label='Top-1 Accuracy')
    plt.plot(epoch_list, top5_acc_list, label='Top-5 Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy (%)')
    plt.legend()
    plt.title('Accuracy Metrics Over Epochs')
    plt.show()

# Superclass Accuracy 시각화
def plot_superclass_accuracies(superclass_acc_list):
    superclass_names = list(superclass_acc_list[0].keys())
    superclass_accuracies = [superclass_acc_list[-1][key] for key in superclass_names]

    plt.figure(figsize=(12,6))
    plt.bar(superclass_names, superclass_accuracies)
    plt.xticks(rotation=90)
    plt.ylabel('Accuracy (%)')
    plt.title('Superclass Accuracy at Final Epoch')
    plt.show()

In [None]:
# 학습 실행
epochs = 200
epoch_list, train_acc_list, val_acc_list, top1_acc_list, top5_acc_list, avg_superclass_acc_list = train_and_evaluate(epochs)

Epoch [1/200] - Loss: 4.0798, Train Accuracy: 7.97%, Val Accuracy (Top-1): 13.46%
Top-1 Accuracy: 13.46% | Top-5 Accuracy: 38.42% | Avg Superclass Accuracy: 25.51% | Total Score: 77.39
Epoch [2/200] - Loss: 3.5384, Train Accuracy: 16.36%, Val Accuracy (Top-1): 24.71%
Top-1 Accuracy: 24.71% | Top-5 Accuracy: 55.27% | Avg Superclass Accuracy: 38.88% | Total Score: 118.86
Epoch [3/200] - Loss: 3.1382, Train Accuracy: 23.39%, Val Accuracy (Top-1): 27.64%
Top-1 Accuracy: 27.64% | Top-5 Accuracy: 59.34% | Avg Superclass Accuracy: 40.50% | Total Score: 127.48
Epoch [4/200] - Loss: 2.8252, Train Accuracy: 29.44%, Val Accuracy (Top-1): 33.26%
Top-1 Accuracy: 33.26% | Top-5 Accuracy: 64.74% | Avg Superclass Accuracy: 47.87% | Total Score: 145.87
Epoch [5/200] - Loss: 2.5823, Train Accuracy: 34.21%, Val Accuracy (Top-1): 28.45%
Top-1 Accuracy: 28.45% | Top-5 Accuracy: 55.55% | Avg Superclass Accuracy: 41.64% | Total Score: 125.64
Epoch [6/200] - Loss: 2.3981, Train Accuracy: 37.62%, Val Accuracy 

In [40]:
# 정확도 시각화
plot_accuracies(epoch_list, train_acc_list, val_acc_list, top1_acc_list, top5_acc_list)

NameError: name 'epoch_list' is not defined

In [None]:
# 평균 슈퍼클래스 정확도 시각화
def plot_avg_superclass_accuracies(avg_superclass_acc_list):
    plt.figure(figsize=(10,5))
    plt.plot(range(1, len(avg_superclass_acc_list) + 1), avg_superclass_acc_list, label='Avg Superclass Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Avg Superclass Accuracy (%)')
    plt.legend()
    plt.title('Average Superclass Accuracy Over Epochs')
    plt.show()

plot_avg_superclass_accuracies(avg_superclass_acc_list)