<a href="https://colab.research.google.com/github/PRESSANDPULL/hanghae99/blob/main/advanded_CIFAR10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# [심화과제] CIFAR10

기본과제와 마찬가지로 개발 관련 기본 지식이 부족하여 학습하는 속도가 과제를 따라갈 수 없어서 과제는 우선 ChatGPT를 활용하였습니다..

##	1.	Dataset 변경:
torchvision.datasets.MNIST를 torchvision.datasets.CIFAR10으로 변경.
CIFAR10 데이터셋의 입력 shape은 **[3, 32, 32]**이므로, 이를 모델의 input_dim으로 반영.
##	2.	Activation 함수 변경:
모델에서 nn.ReLU를 nn.LeakyReLU로 변경.

In [2]:
import torch
import torch.nn as nn  # nn 모듈 import
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

# CIFAR10 데이터셋 로드
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# DataLoader 생성
batch_size = 256
trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=False)

# Activation 함수 변경
class Model(nn.Module):
    def __init__(self, input_dim, n_dim):
        super().__init__()
        self.layer1 = nn.Linear(input_dim, n_dim)
        self.layer2 = nn.Linear(n_dim, n_dim)
        self.layer3 = nn.Linear(n_dim, 10)  # CIFAR10은 10개의 클래스
        self.act = nn.LeakyReLU()  # Leaky ReLU 적용

    def forward(self, x):
        x = torch.flatten(x, start_dim=1)
        x = self.act(self.layer1(x))
        x = self.act(self.layer2(x))
        x = self.layer3(x)
        return x

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:18<00:00, 9.01MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


## 2. CIFAR10 입력 Shape 확인

In [3]:
# CIFAR10 입력 shape 확인
print("Trainset size:", len(trainset))
print("Image shape:", trainset[0][0].shape)  # (3, 32, 32)

# input_dim 설정
input_dim = 3 * 32 * 32
model = Model(input_dim, 1024)

Trainset size: 50000
Image shape: torch.Size([3, 32, 32])


## 3. SGD와 Adam 성능 비교

In [None]:
from torch.optim import SGD, Adam  # SGD와 Adam 모두 import
import matplotlib.pyplot as plt

# 손실 함수 정의
criterion = nn.CrossEntropyLoss()

# accuracy 함수 정의
def accuracy(model, dataloader):
    model.eval()  # 평가 모드
    correct = 0
    total = 0

    with torch.no_grad():  # 기울기 계산 비활성화
        for inputs, labels in dataloader:
            inputs, labels = inputs.to('cuda'), labels.to('cuda')  # 데이터 이동
            outputs = model(inputs)
            preds = torch.argmax(outputs, dim=1)  # 가장 높은 확률의 클래스 선택
            correct += (preds == labels).sum().item()  # 올바르게 예측한 샘플 수
            total += labels.size(0)  # 전체 샘플 수

    return correct / total  # 정확도 계산

# SGD와 Adam Optimizer 정의
optimizer_sgd = SGD(model.parameters(), lr=0.001)
optimizer_adam = Adam(model.parameters(), lr=0.001)

# SGD와 Adam 각각의 학습 정확도 저장
train_accs_sgd = []
train_accs_adam = []

# 모델을 GPU로 이동
model = model.to('cuda')

# 학습 루프
for epoch in range(50):  # n_epochs = 50
    print(f"Epoch {epoch + 1}/50")  # 에포크 시작 출력

    # SGD 학습
    model.train()
    running_loss_sgd = 0.0  # SGD 학습 손실 저장
    for batch_idx, (inputs, labels) in enumerate(trainloader):
        inputs, labels = inputs.to('cuda'), labels.to('cuda')
        optimizer_sgd.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer_sgd.step()
        running_loss_sgd += loss.item()

        # 10번째 배치마다 출력
        if (batch_idx + 1) % 10 == 0:
            print(f"  [SGD] Batch {batch_idx + 1}/{len(trainloader)}, Loss: {loss.item():.4f}")

    train_acc_sgd = accuracy(model, trainloader)
    train_accs_sgd.append(train_acc_sgd)
    print(f"  [SGD] Epoch {epoch + 1} Loss: {running_loss_sgd / len(trainloader):.4f}, Train Accuracy: {train_acc_sgd:.4f}")

    # Adam 학습
    model.train()
    running_loss_adam = 0.0  # Adam 학습 손실 저장
    for batch_idx, (inputs, labels) in enumerate(trainloader):
        inputs, labels = inputs.to('cuda'), labels.to('cuda')
        optimizer_adam.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer_adam.step()
        running_loss_adam += loss.item()

        # 10번째 배치마다 출력
        if (batch_idx + 1) % 10 == 0:
            print(f"  [Adam] Batch {batch_idx + 1}/{len(trainloader)}, Loss: {loss.item():.4f}")

    train_acc_adam = accuracy(model, trainloader)
    train_accs_adam.append(train_acc_adam)
    print(f"  [Adam] Epoch {epoch + 1} Loss: {running_loss_adam / len(trainloader):.4f}, Train Accuracy: {train_acc_adam:.4f}")

# SGD와 Adam 성능 비교 그래프
plt.plot(range(50), train_accs_sgd, label="SGD")
plt.plot(range(50), train_accs_adam, label="Adam")
plt.xlabel("Epoch")
plt.ylabel("Train Accuracy")
plt.title("SGD vs Adam: Train Accuracy")
plt.legend()
plt.show()

Epoch 1/50
  [SGD] Batch 10/196, Loss: 0.7124
  [SGD] Batch 20/196, Loss: 0.7665
  [SGD] Batch 30/196, Loss: 0.6415
  [SGD] Batch 40/196, Loss: 0.6817
  [SGD] Batch 50/196, Loss: 0.6786
  [SGD] Batch 60/196, Loss: 0.6019
  [SGD] Batch 70/196, Loss: 0.7321
  [SGD] Batch 80/196, Loss: 0.6753
  [SGD] Batch 90/196, Loss: 0.6989
  [SGD] Batch 100/196, Loss: 0.7100
  [SGD] Batch 110/196, Loss: 0.6562
  [SGD] Batch 120/196, Loss: 0.5955
  [SGD] Batch 130/196, Loss: 0.6661
  [SGD] Batch 140/196, Loss: 0.6245
  [SGD] Batch 150/196, Loss: 0.6903
  [SGD] Batch 160/196, Loss: 0.6102
  [SGD] Batch 170/196, Loss: 0.6907
  [SGD] Batch 180/196, Loss: 0.6991
  [SGD] Batch 190/196, Loss: 0.7306
  [SGD] Epoch 1 Loss: 0.6716, Train Accuracy: 0.7809
  [Adam] Batch 10/196, Loss: 0.7690
  [Adam] Batch 20/196, Loss: 0.8396
  [Adam] Batch 30/196, Loss: 0.7708
  [Adam] Batch 40/196, Loss: 0.6537
  [Adam] Batch 50/196, Loss: 0.7021
  [Adam] Batch 60/196, Loss: 0.6691
  [Adam] Batch 70/196, Loss: 0.7141
  [Adam] 

## 4. Leaky ReLU와 Sigmoid 성능 비교

In [None]:
# Sigmoid를 사용하는 모델 클래스 정의
class SigmoidModel(Model):
    def __init__(self, input_dim, n_dim):
        super().__init__(input_dim, n_dim)
        self.act = nn.Sigmoid()  # Sigmoid 적용

# Sigmoid 모델 학습
sigmoid_model = SigmoidModel(input_dim, 1024).to('cuda')
optimizer_sigmoid = Adam(sigmoid_model.parameters(), lr=0.001)
train_accs_sigmoid = []

for epoch in range(50):
    print(f"Epoch {epoch + 1}/50 [Sigmoid Model]")  # 에포크 출력
    sigmoid_model.train()
    running_loss_sigmoid = 0.0  # 에포크 손실 저장

    for batch_idx, (inputs, labels) in enumerate(trainloader):
        inputs, labels = inputs.to('cuda'), labels.to('cuda')
        optimizer_sigmoid.zero_grad()
        outputs = sigmoid_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer_sigmoid.step()
        running_loss_sigmoid += loss.item()

        # 10번째 배치마다 출력
        if (batch_idx + 1) % 10 == 0:
            print(f"  Batch {batch_idx + 1}/{len(trainloader)}, Loss: {loss.item():.4f}")

    train_acc = accuracy(sigmoid_model, trainloader)
    train_accs_sigmoid.append(train_acc)

    # 에포크별 손실과 정확도 출력
    print(f"  Epoch {epoch + 1} Loss: {running_loss_sigmoid / len(trainloader):.4f}, Train Accuracy: {train_acc:.4f}")

# Leaky ReLU와 Sigmoid 성능 비교 그래프
plt.plot(range(50), train_accs_adam, label="Leaky ReLU (Adam)")
plt.plot(range(50), train_accs_sigmoid, label="Sigmoid (Adam)")
plt.xlabel("Epoch")
plt.ylabel("Train Accuracy")
plt.title("Leaky ReLU vs Sigmoid: Train Accuracy")
plt.legend()
plt.show()

## 5. Dropout 적용 후 Generalization Error 확인

In [None]:
# Dropout이 포함된 모델 정의
class DropoutModel(Model):
    def __init__(self, input_dim, n_dim):
        super().__init__(input_dim, n_dim)
        self.dropout = nn.Dropout(0.1)  # Dropout 확률 0.1

    def forward(self, x):
        x = torch.flatten(x, start_dim=1)
        x = self.dropout(self.act(self.layer1(x)))
        x = self.dropout(self.act(self.layer2(x)))
        x = self.layer3(x)
        return x

# Dropout 모델 학습
dropout_model = DropoutModel(input_dim, 1024).to('cuda')
optimizer_dropout = Adam(dropout_model.parameters(), lr=0.001)
train_accs_dropout = []
test_accs_dropout = []

for epoch in range(50):
    print(f"Epoch {epoch + 1}/50 [Dropout Model]")  # 에포크 시작 출력
    dropout_model.train()
    running_loss_dropout = 0.0  # 에포크 손실 저장

    for batch_idx, (inputs, labels) in enumerate(trainloader):
        inputs, labels = inputs.to('cuda'), labels.to('cuda')
        optimizer_dropout.zero_grad()
        outputs = dropout_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer_dropout.step()
        running_loss_dropout += loss.item()

        # 10번째 배치마다 손실 출력
        if (batch_idx + 1) % 10 == 0:
            print(f"  Batch {batch_idx + 1}/{len(trainloader)}, Loss: {loss.item():.4f}")

    # 학습 및 테스트 정확도 계산
    train_acc = accuracy(dropout_model, trainloader)
    test_acc = accuracy(dropout_model, testloader)
    train_accs_dropout.append(train_acc)
    test_accs_dropout.append(test_acc)

    # 에포크 손실 및 정확도 출력
    print(f"  Epoch {epoch + 1} Loss: {running_loss_dropout / len(trainloader):.4f}")
    print(f"  Train Accuracy: {train_acc:.4f}, Test Accuracy: {test_acc:.4f}")

# Dropout 적용 전후 성능 비교 그래프
plt.plot(range(50), train_accs_dropout, label="Train Accuracy (Dropout)")
plt.plot(range(50), test_accs_dropout, label="Test Accuracy (Dropout)")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Dropout: Train vs Test Accuracy")
plt.legend()
plt.show()