## PYTORCH MLP 모델

- CIFAR10 데이터셋을 사용하여 간단한 MLP 모델을 훈련 및 평가하는 내용입니다
- 모델은 Fully Connected Layer들로 구성
- 활성화 함수로는 ReLU를 사용
- 훈련 함수와 테스트 함수도 포함

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader

# 하이퍼파라미터 설정
BATCH_SIZE = 64
EPOCHS = 10
LEARNING_RATE = 0.001
INPUT_SIZE = 3 * 32 * 32  # CIFAR10 이미지의 Flatten 크기
NUM_CLASSES = 10  # CIFAR10은 10개의 클래스

# 데이터 전처리 정의
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# 데이터셋 로드
train_dataset = datasets.CIFAR10(root="./data", train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root="./data", train=False, download=True, transform=transform)

train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# MLP 모델 정의
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(INPUT_SIZE, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, NUM_CLASSES)
        )

    def forward(self, x):
        return self.model(x)

# 모델 초기화
model = MLP()

# 손실 함수 및 옵티마이저 정의
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

def train():
    model.train()
    for epoch in range(EPOCHS):
        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        print(f"Epoch [{epoch + 1}/{EPOCHS}], Loss: {running_loss / len(train_loader):.4f}")

def test():
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Test Accuracy: {100 * correct / total:.2f}%')

if __name__ == "__main__":
    train()
    test()


100%|██████████| 170M/170M [00:10<00:00, 15.7MB/s] 


Epoch [1/10], Loss: 1.6538
Epoch [2/10], Loss: 1.4355
Epoch [3/10], Loss: 1.3241
Epoch [4/10], Loss: 1.2271
Epoch [5/10], Loss: 1.1404
Epoch [6/10], Loss: 1.0678
Epoch [7/10], Loss: 0.9954
Epoch [8/10], Loss: 0.9235
Epoch [9/10], Loss: 0.8585
Epoch [10/10], Loss: 0.7944
Test Accuracy: 54.05%


## 📌 **하이퍼파라미터 수정**

### Weight Decay (가중치 감쇠)
- `weight_decay=0.0001`으로 **Adam 옵티마이저**에 추가했습니다.

### Dropout Rate (드롭아웃 확률)
- MLP 모델의 각 레이어 사이에 `nn.Dropout(0.5)`를 추가했습니다.

### Batch Normalization (배치 정규화)
- **레이어마다** `nn.BatchNorm1d()`를 적용했습니다.

### Learning Rate Scheduler (학습률 스케줄러)
- `torch.optim.lr_scheduler.StepLR`을 이용하여 **매 에폭 5회마다 학습률을 10%로 감소**시키도록 설정했습니다.

### Activation Function (활성화 함수)
- 마지막 **Hidden Layer**는 `nn.LeakyReLU(0.1)`로 변경했습니다.

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader

# 하이퍼파라미터 설정
BATCH_SIZE = 64
EPOCHS = 10
LEARNING_RATE = 0.001
WEIGHT_DECAY = 0.0001  # 가중치 감쇠
DROPOUT_RATE = 0.5  # 드롭아웃 확률
INPUT_SIZE = 3 * 32 * 32  # CIFAR10 이미지의 Flatten 크기
NUM_CLASSES = 10  # CIFAR10은 10개의 클래스

# 데이터 전처리 정의
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# 데이터셋 로드
train_dataset = datasets.CIFAR10(root="./data", train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root="./data", train=False, download=True, transform=transform)

train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# MLP 모델 정의
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(INPUT_SIZE, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(DROPOUT_RATE),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(DROPOUT_RATE),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(0.1),
            nn.Linear(128, NUM_CLASSES)
        )

    def forward(self, x):
        return self.model(x)

# 모델 초기화
model = MLP()

# 손실 함수 및 옵티마이저 정의
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

def train():
    model.train()
    for epoch in range(EPOCHS):
        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        scheduler.step()  # 학습률 감소
        print(f"Epoch [{epoch + 1}/{EPOCHS}], Loss: {running_loss / len(train_loader):.4f}")

def test():
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Test Accuracy: {100 * correct / total:.2f}%')

if __name__ == "__main__":
    train()
    test()


Files already downloaded and verified
Files already downloaded and verified
Epoch [1/10], Loss: 1.7861
Epoch [2/10], Loss: 1.6285
Epoch [3/10], Loss: 1.5698
Epoch [4/10], Loss: 1.5375
Epoch [5/10], Loss: 1.5103
Epoch [6/10], Loss: 1.4344
Epoch [7/10], Loss: 1.3983
Epoch [8/10], Loss: 1.3852
Epoch [9/10], Loss: 1.3641
Epoch [10/10], Loss: 1.3533
Test Accuracy: 53.97%


### 📌 레이어 추가 및 하이퍼파라미터 변경

### 🔨 레이어 추가
- 기존의 모델 구조에 **`nn.Linear(256, 256)`** 레이어를 추가하였습니다.  
- 추가된 레이어 이후에도 일관성을 유지하기 위해 **Batch Normalization, ReLU, Dropout**을 추가하였습니다.

```python
nn.Linear(256, 256),  # 추가된 레이어
nn.BatchNorm1d(256),
nn.ReLU(),
nn.Dropout(DROPOUT_RATE),
```

### 🔨 하이퍼파라미터 변경
- **DROPOUT_RATE**: 0.5 ➔ 0.3 (과적합 방지를 위해 조정)
- **WEIGHT_DECAY**: 0.0001 ➔ 1e-5 (모델의 학습 능력 향상을 위해 낮춤)

### 🔨 학습률 스케줄러 변경
- 학습률 감소 비율을 0.1 ➔ 0.5 로 변경하여 너무 급격히 감소하지 않도록 설정

```python
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
```



In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import os

# 하이퍼파라미터 설정
BATCH_SIZE = 64
EPOCHS = 10
LEARNING_RATE = 0.001
WEIGHT_DECAY = 1e-5  # 가중치 감쇠를 줄임
DROPOUT_RATE = 0.3  # 드롭아웃 확률을 낮춤
INPUT_SIZE = 3 * 32 * 32  # CIFAR10 이미지의 Flatten 크기
NUM_CLASSES = 10  # CIFAR10은 10개의 클래스
MODEL_PATH = './best_model.pth'  # 모델 저장 경로

# 데이터 전처리 정의
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# 데이터셋 로드
train_dataset = datasets.CIFAR10(root="./data", train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root="./data", train=False, download=True, transform=transform)

train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# MLP 모델 정의
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(INPUT_SIZE, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(DROPOUT_RATE),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(DROPOUT_RATE),
            nn.Linear(256, 256),  # 추가된 레이어
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(DROPOUT_RATE),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),  # LeakyReLU 대신 ReLU 사용
            nn.Linear(128, NUM_CLASSES)
        )

    def forward(self, x):
        return self.model(x)

# 모델 초기화
model = MLP()

# 손실 함수 및 옵티마이저 정의
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)  # 스케줄러 변경

best_loss = float('inf')  # 초기값을 무한대로 설정

def train():
    global best_loss
    model.train()
    for epoch in range(EPOCHS):
        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        scheduler.step()  # 학습률 감소

        avg_loss = running_loss / len(train_loader)
        print(f"Epoch [{epoch + 1}/{EPOCHS}], Loss: {avg_loss:.4f}")

        # 모델 저장: 베스트 로스일 경우
        if avg_loss < best_loss:
            best_loss = avg_loss
            torch.save(model.state_dict(), MODEL_PATH)
            print(f"Best model saved with loss: {best_loss:.4f}")

def test():
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Test Accuracy: {100 * correct / total:.2f}%')

if __name__ == "__main__":
    train()
    test()


Files already downloaded and verified
Files already downloaded and verified
Epoch [1/10], Loss: 1.7386
Best model saved with loss: 1.7386
Epoch [2/10], Loss: 1.5618
Best model saved with loss: 1.5618
Epoch [3/10], Loss: 1.4865
Best model saved with loss: 1.4865
Epoch [4/10], Loss: 1.4331
Best model saved with loss: 1.4331
Epoch [5/10], Loss: 1.3920
Best model saved with loss: 1.3920
Epoch [6/10], Loss: 1.3116
Best model saved with loss: 1.3116
Epoch [7/10], Loss: 1.2709
Best model saved with loss: 1.2709
Epoch [8/10], Loss: 1.2503
Best model saved with loss: 1.2503
Epoch [9/10], Loss: 1.2269
Best model saved with loss: 1.2269
Epoch [10/10], Loss: 1.1999
Best model saved with loss: 1.1999
Test Accuracy: 56.10%


### 📌 **데이터 증강 기법 적용**
- `RandomHorizontalFlip()`: 이미지를 좌우로 무작위로 뒤집음.
- `RandomCrop(32, padding=4)`: 원래 크기로 되돌리기 전에 패딩을 추가하여 임의의 부분을 잘라냄.
- `ColorJitter()`: 밝기, 대비, 채도, 색조를 무작위로 변경.
- `RandomRotation(10)`: 이미지를 최대 10도 범위에서 무작위로 회전.

```python
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
```

- 데이터 증강은 데이터의 수를 늘리는 것이 아니라, 학습 과정에서 기존 데이터를 변형하여 다양한 방식으로 보여주는 것입니다.

### 📌 **데이터 증강의 특징**
- 데이터 수는 그대로 유지됩니다. (`train_dataset`의 크기는 동일합니다.)
- 매번 미니배치가 모델에 전달될 때, 동적으로 변형이 적용됩니다.
- 같은 이미지라 하더라도 매번 다른 형태(회전, 색상 변경, 좌우 반전 등)로 모델에 입력됩니다.
- 증강된 이미지는 메모리에 저장되지 않고, 매번 실시간으로 처리됩니다.

### 📌 **예시**
- 학습 데이터셋의 크기는 여전히 `len(train_dataset) = 50,000` 입니다.
- 하지만 각 에포크마다 모델은 완전히 다른 형태의 변형된 이미지를 보게 됩니다.
- 결과적으로는 데이터의 다양성을 늘려 과적합을 방지하고 모델의 일반화 성능을 향상시킬 수 있습니다.

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import os

# 하이퍼파라미터 설정
BATCH_SIZE = 64
EPOCHS = 10
LEARNING_RATE = 0.001
WEIGHT_DECAY = 1e-5  # 가중치 감쇠를 줄임
DROPOUT_RATE = 0.3  # 드롭아웃 확률을 낮춤
INPUT_SIZE = 3 * 32 * 32  # CIFAR10 이미지의 Flatten 크기
NUM_CLASSES = 10  # CIFAR10은 10개의 클래스
MODEL_PATH = './best_model.pth'  # 모델 저장 경로

# 데이터 전처리 정의 (데이터 증강 추가)
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# 데이터셋 로드
train_dataset = datasets.CIFAR10(root="./data", train=True, download=True, transform=transform_train)
test_dataset = datasets.CIFAR10(root="./data", train=False, download=True, transform=transform_test)

train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# MLP 모델 정의
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(INPUT_SIZE, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(DROPOUT_RATE),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(DROPOUT_RATE),
            nn.Linear(256, 256),  # 추가된 레이어
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(DROPOUT_RATE),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Linear(128, NUM_CLASSES)
        )

    def forward(self, x):
        return self.model(x)

# 모델 초기화
model = MLP()

# 손실 함수 및 옵티마이저 정의
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)  # 스케줄러 변경

best_loss = float('inf')  # 초기값을 무한대로 설정

def train():
    global best_loss
    model.train()
    for epoch in range(EPOCHS):
        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        scheduler.step()  # 학습률 감소

        avg_loss = running_loss / len(train_loader)
        print(f"Epoch [{epoch + 1}/{EPOCHS}], Loss: {avg_loss:.4f}")

        # 모델 저장: 베스트 로스일 경우
        if avg_loss < best_loss:
            best_loss = avg_loss
            torch.save(model.state_dict(), MODEL_PATH)
            print(f"Best model saved with loss: {best_loss:.4f}")

def test():
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Test Accuracy: {100 * correct / total:.2f}%')

if __name__ == "__main__":
    train()
    test()


Files already downloaded and verified
Files already downloaded and verified
Epoch [1/10], Loss: 1.9366
Best model saved with loss: 1.9366
Epoch [2/10], Loss: 1.8013
Best model saved with loss: 1.8013
Epoch [3/10], Loss: 1.7497
Best model saved with loss: 1.7497
Epoch [4/10], Loss: 1.7116
Best model saved with loss: 1.7116
Epoch [5/10], Loss: 1.6841
Best model saved with loss: 1.6841
Epoch [6/10], Loss: 1.6448
Best model saved with loss: 1.6448
Epoch [7/10], Loss: 1.6258
Best model saved with loss: 1.6258
Epoch [8/10], Loss: 1.6110
Best model saved with loss: 1.6110
Epoch [9/10], Loss: 1.6040
Best model saved with loss: 1.6040
Epoch [10/10], Loss: 1.5969
Best model saved with loss: 1.5969
Test Accuracy: 50.23%


### 📌 **데이터 증강 강도 변경**

1. 데이터 증강 강도 감소:
- ColorJitter: brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05 로 줄임.
- RandomRotation: 10 ➔ 5 로 줄임.

2. 학습 에폭 증가:
- EPOCHS = 10 ➔ 20 으로 증가.

3. 두 가지 데이터 증강 기법 정의:
- weak_transform: 초기 학습에 사용, 가벼운 데이터 증강 (RandomHorizontalFlip, RandomCrop 만 적용)
- strong_transform: 후반 학습에 사용, 강한 데이터 증강 (ColorJitter, RandomRotation 추가)

4. 에폭의 절반 이후 데이터 증강 기법 교체:

```python
if epoch == EPOCHS // 2:
    train_dataset.transform = strong_transform
    print("Switched to Strong Data Augmentation")
```

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import os

# 하이퍼파라미터 설정
BATCH_SIZE = 64
EPOCHS = 20  # 에폭 수 증가
LEARNING_RATE = 0.001
WEIGHT_DECAY = 1e-5  # 가중치 감쇠를 줄임
DROPOUT_RATE = 0.3  # 드롭아웃 확률을 낮춤
INPUT_SIZE = 3 * 32 * 32  # CIFAR10 이미지의 Flatten 크기
NUM_CLASSES = 10  # CIFAR10은 10개의 클래스
MODEL_PATH = './best_model.pth'  # 모델 저장 경로

# CIFAR-10 데이터셋의 실제 평균과 표준편차 사용
CIFAR10_MEAN = (0.4914, 0.4822, 0.4465)
CIFAR10_STD = (0.2023, 0.1994, 0.2010)

# 약한 데이터 증강 (초기 학습용)
weak_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD)
])

# 강한 데이터 증강 (후반 학습용)
strong_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05),
    transforms.RandomRotation(5),
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD)
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD)
])

# 데이터셋 로드 (초기에는 약한 데이터 증강 사용)
train_dataset = datasets.CIFAR10(root="./data", train=True, download=True, transform=weak_transform)
test_dataset = datasets.CIFAR10(root="./data", train=False, download=True, transform=transform_test)

train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# MLP 모델 정의
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(INPUT_SIZE, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(DROPOUT_RATE),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(DROPOUT_RATE),
            nn.Linear(256, 256),  # 추가된 레이어
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(DROPOUT_RATE),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Linear(128, NUM_CLASSES)
        )

    def forward(self, x):
        return self.model(x)

# 모델 초기화
model = MLP()

# 손실 함수 및 옵티마이저 정의
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)  # 스케줄러 변경

best_loss = float('inf')  # 초기값을 무한대로 설정

def train():
    global best_loss, train_dataset
    model.train()
    for epoch in range(EPOCHS):
        # 에폭 절반 이후 강한 데이터 증강으로 교체
        if epoch == EPOCHS // 2:
            train_dataset.transform = strong_transform
            print("Switched to Strong Data Augmentation")

        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        scheduler.step()  # 학습률 감소

        avg_loss = running_loss / len(train_loader)
        print(f"Epoch [{epoch + 1}/{EPOCHS}], Loss: {avg_loss:.4f}")

        # 모델 저장: 베스트 로스일 경우
        if avg_loss < best_loss:
            best_loss = avg_loss
            torch.save(model.state_dict(), MODEL_PATH)
            print(f"Best model saved with loss: {best_loss:.4f}")

def test():
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Test Accuracy: {100 * correct / total:.2f}%')

if __name__ == "__main__":
    train()
    test()


### 📌 **CIFAR10 정규화값 변경**

- CIFAR-10의 평균: (0.4914, 0.4822, 0.4465)

- CIFAR-10의 표준편차: (0.2023, 0.1994, 0.2010)

```python
CIFAR10_MEAN = (0.4914, 0.4822, 0.4465)
CIFAR10_STD = (0.2023, 0.1994, 0.2010)


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import os

# 하이퍼파라미터 설정
BATCH_SIZE = 64
EPOCHS = 10
LEARNING_RATE = 0.001
WEIGHT_DECAY = 1e-5  # 가중치 감쇠를 줄임
DROPOUT_RATE = 0.3  # 드롭아웃 확률을 낮춤
INPUT_SIZE = 3 * 32 * 32  # CIFAR10 이미지의 Flatten 크기
NUM_CLASSES = 10  # CIFAR10은 10개의 클래스
MODEL_PATH = './best_model.pth'  # 모델 저장 경로

# CIFAR-10 데이터셋의 실제 평균과 표준편차 사용
CIFAR10_MEAN = (0.4914, 0.4822, 0.4465)
CIFAR10_STD = (0.2023, 0.1994, 0.2010)

# 데이터 전처리 정의 (데이터 증강 추가)
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD)
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD)
])

# 데이터셋 로드
train_dataset = datasets.CIFAR10(root="./data", train=True, download=True, transform=transform_train)
test_dataset = datasets.CIFAR10(root="./data", train=False, download=True, transform=transform_test)

train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# MLP 모델 정의
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(INPUT_SIZE, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(DROPOUT_RATE),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(DROPOUT_RATE),
            nn.Linear(256, 256),  # 추가된 레이어
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(DROPOUT_RATE),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Linear(128, NUM_CLASSES)
        )

    def forward(self, x):
        return self.model(x)

# 모델 초기화
model = MLP()

# 손실 함수 및 옵티마이저 정의
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)  # 스케줄러 변경

best_loss = float('inf')  # 초기값을 무한대로 설정

def train():
    global best_loss
    model.train()
    for epoch in range(EPOCHS):
        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        scheduler.step()  # 학습률 감소

        avg_loss = running_loss / len(train_loader)
        print(f"Epoch [{epoch + 1}/{EPOCHS}], Loss: {avg_loss:.4f}")

        # 모델 저장: 베스트 로스일 경우
        if avg_loss < best_loss:
            best_loss = avg_loss
            torch.save(model.state_dict(), MODEL_PATH)
            print(f"Best model saved with loss: {best_loss:.4f}")

def test():
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Test Accuracy: {100 * correct / total:.2f}%')

if __name__ == "__main__":
    train()
    test()


Files already downloaded and verified
Files already downloaded and verified
Epoch [1/10], Loss: 1.9369
Best model saved with loss: 1.9369
Epoch [2/10], Loss: 1.8046
Best model saved with loss: 1.8046
Epoch [3/10], Loss: 1.7432
Best model saved with loss: 1.7432
Epoch [4/10], Loss: 1.7115
Best model saved with loss: 1.7115
Epoch [5/10], Loss: 1.6846
Best model saved with loss: 1.6846
Epoch [6/10], Loss: 1.6456
Best model saved with loss: 1.6456
Epoch [7/10], Loss: 1.6194
Best model saved with loss: 1.6194
Epoch [8/10], Loss: 1.6148
Best model saved with loss: 1.6148
Epoch [9/10], Loss: 1.6003
Best model saved with loss: 1.6003
Epoch [10/10], Loss: 1.5886
Best model saved with loss: 1.5886
Test Accuracy: 49.81%


###📌 **모델 불러오기 기능 추가**

1. 모델 불러오기 함수 (load_model) 추가:

```python
def load_model():
    if os.path.exists(MODEL_PATH):
        model.load_state_dict(torch.load(MODEL_PATH))
        print('Model loaded from checkpoint.')
    else:
        print('No checkpoint found, training from scratch.')
```
- 학습을 시작하기 전에 저장된 모델(`best_model.pth`)이 존재하면 이를 불러옵니다.
- 모델이 없으면 새로 학습을 시작합니다.

2. 모델 불러오기 코드 추가 (`main()` 함수 내):

```python
if __name__ == "__main__":
    load_model()  # 저장된 모델 불러오기
    train()
    test()


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import os

# 하이퍼파라미터 설정
BATCH_SIZE = 64
EPOCHS = 20  # 에폭 수 증가
LEARNING_RATE = 0.001
WEIGHT_DECAY = 1e-5  # 가중치 감쇠를 줄임
DROPOUT_RATE = 0.3  # 드롭아웃 확률을 낮춤
INPUT_SIZE = 3 * 32 * 32  # CIFAR10 이미지의 Flatten 크기
NUM_CLASSES = 10  # CIFAR10은 10개의 클래스
MODEL_PATH = './best_model.pth'  # 모델 저장 경로

# CIFAR-10 데이터셋의 실제 평균과 표준편차 사용
CIFAR10_MEAN = (0.4914, 0.4822, 0.4465)
CIFAR10_STD = (0.2023, 0.1994, 0.2010)

# 약한 데이터 증강 (초기 학습용)
weak_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD)
])

# 강한 데이터 증강 (후반 학습용)
strong_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05),
    transforms.RandomRotation(5),
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD)
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD)
])

# 데이터셋 로드 (초기에는 약한 데이터 증강 사용)
train_dataset = datasets.CIFAR10(root="./data", train=True, download=True, transform=weak_transform)
test_dataset = datasets.CIFAR10(root="./data", train=False, download=True, transform=transform_test)

train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# MLP 모델 정의
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(INPUT_SIZE, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(DROPOUT_RATE),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(DROPOUT_RATE),
            nn.Linear(256, 256),  # 추가된 레이어
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(DROPOUT_RATE),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Linear(128, NUM_CLASSES)
        )

    def forward(self, x):
        return self.model(x)

# 모델 초기화
model = MLP()

def load_model():
    if os.path.exists(MODEL_PATH):
        model.load_state_dict(torch.load(MODEL_PATH))
        print('Model loaded from checkpoint.')
    else:
        print('No checkpoint found, training from scratch.')

# 손실 함수 및 옵티마이저 정의
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)  # 스케줄러 변경

best_loss = float('inf')  # 초기값을 무한대로 설정

def train():
    global best_loss, train_dataset
    model.train()
    for epoch in range(EPOCHS):
        # 에폭 절반 이후 강한 데이터 증강으로 교체
        if epoch == EPOCHS // 2:
            train_dataset.transform = strong_transform
            print("Switched to Strong Data Augmentation")

        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        scheduler.step()  # 학습률 감소

        avg_loss = running_loss / len(train_loader)
        print(f"Epoch [{epoch + 1}/{EPOCHS}], Loss: {avg_loss:.4f}")

        # 모델 저장: 베스트 로스일 경우
        if avg_loss < best_loss:
            best_loss = avg_loss
            torch.save(model.state_dict(), MODEL_PATH)
            print(f"Best model saved with loss: {best_loss:.4f}")

def test():
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Test Accuracy: {100 * correct / total:.2f}%')

if __name__ == "__main__":
    load_model()  # 저장된 모델 불러오기
    train()
    test()


100%|██████████| 170M/170M [00:01<00:00, 86.5MB/s]


No checkpoint found, training from scratch.
Epoch [1/20], Loss: 1.8899
Best model saved with loss: 1.8899
Epoch [2/20], Loss: 1.7429
Best model saved with loss: 1.7429
Epoch [3/20], Loss: 1.6875
Best model saved with loss: 1.6875
Epoch [4/20], Loss: 1.6513
Best model saved with loss: 1.6513
Epoch [5/20], Loss: 1.6220
Best model saved with loss: 1.6220
Epoch [6/20], Loss: 1.5676
Best model saved with loss: 1.5676
Epoch [7/20], Loss: 1.5471
Best model saved with loss: 1.5471
Epoch [8/20], Loss: 1.5373
Best model saved with loss: 1.5373
Epoch [9/20], Loss: 1.5266
Best model saved with loss: 1.5266
Epoch [10/20], Loss: 1.5135
Best model saved with loss: 1.5135
Switched to Strong Data Augmentation
Epoch [11/20], Loss: 1.5187
Epoch [12/20], Loss: 1.5073
Best model saved with loss: 1.5073
Epoch [13/20], Loss: 1.5022
Best model saved with loss: 1.5022
Epoch [14/20], Loss: 1.4918
Best model saved with loss: 1.4918
Epoch [15/20], Loss: 1.4858
Best model saved with loss: 1.4858
Epoch [16/20], Los

###📌 **모델 시각화**

1. torchviz 라이브러리 사용:
- 모델의 구조를 시각화하여 MLP_Model_Structure.png 로 저장합니다.

2. 모델 시각화 함수 (visualize_model) 추가:

```python
def visualize_model():
    dummy_input = torch.randn(1, 3, 32, 32)  # CIFAR10 이미지 사이즈
    output = model(dummy_input)
    graph = make_dot(output, params=dict(model.named_parameters()))
    graph.render("MLP_Model_Structure", format="png", cleanup=True)
    print("Model visualization saved as 'MLP_Model_Structure.png'")
```

3. 모델 시각화 함수 호출:

```python
visualize_model()  # 모델 시각화 호출


In [None]:
# !pip install torchviz

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import os
from torchviz import make_dot  # 모델 시각화를 위한 라이브러리 추가

# 하이퍼파라미터 설정
BATCH_SIZE = 64
EPOCHS = 20  # 에폭 수 증가
LEARNING_RATE = 0.001
WEIGHT_DECAY = 1e-5  # 가중치 감쇠를 줄임
DROPOUT_RATE = 0.3  # 드롭아웃 확률을 낮춤
INPUT_SIZE = 3 * 32 * 32  # CIFAR10 이미지의 Flatten 크기
NUM_CLASSES = 10  # CIFAR10은 10개의 클래스
MODEL_PATH = './best_model.pth'  # 모델 저장 경로

# CIFAR-10 데이터셋의 실제 평균과 표준편차 사용
CIFAR10_MEAN = (0.4914, 0.4822, 0.4465)
CIFAR10_STD = (0.2023, 0.1994, 0.2010)

# 약한 데이터 증강 (초기 학습용)
weak_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD)
])

# 강한 데이터 증강 (후반 학습용)
strong_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05),
    transforms.RandomRotation(5),
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD)
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD)
])

# 데이터셋 로드 (초기에는 약한 데이터 증강 사용)
train_dataset = datasets.CIFAR10(root="./data", train=True, download=True, transform=weak_transform)
test_dataset = datasets.CIFAR10(root="./data", train=False, download=True, transform=transform_test)

train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# MLP 모델 정의
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(INPUT_SIZE, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(DROPOUT_RATE),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(DROPOUT_RATE),
            nn.Linear(256, 256),  # 추가된 레이어
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(DROPOUT_RATE),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Linear(128, NUM_CLASSES)
        )

    def forward(self, x):
        return self.model(x)

# 모델 초기화
model = MLP()

def load_model():
    if os.path.exists(MODEL_PATH):
        model.load_state_dict(torch.load(MODEL_PATH))
        print('Model loaded from checkpoint.')
    else:
        print('No checkpoint found, training from scratch.')

# 모델 시각화 함수 정의

def visualize_model():
    dummy_input = torch.randn(1, 3, 32, 32)  # CIFAR10 이미지 사이즈
    output = model(dummy_input)
    graph = make_dot(output, params=dict(model.named_parameters()))
    graph.render("MLP_Model_Structure", format="png", cleanup=True)
    print("Model visualization saved as 'MLP_Model_Structure.png'")

# 손실 함수 및 옵티마이저 정의
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)  # 스케줄러 변경

best_loss = float('inf')  # 초기값을 무한대로 설정

def train():
    global best_loss, train_dataset
    model.train()
    for epoch in range(EPOCHS):
        if epoch == EPOCHS // 2:
            train_dataset.transform = strong_transform
            print("Switched to Strong Data Augmentation")

        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        scheduler.step()

        avg_loss = running_loss / len(train_loader)
        print(f"Epoch [{epoch + 1}/{EPOCHS}], Loss: {avg_loss:.4f}")

        if avg_loss < best_loss:
            best_loss = avg_loss
            torch.save(model.state_dict(), MODEL_PATH)
            print(f"Best model saved with loss: {best_loss:.4f}")

def test():
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Test Accuracy: {100 * correct / total:.2f}%')

if __name__ == "__main__":
    load_model()
    visualize_model()  # 모델 시각화 호출
    train()
    test()


###📌 **Train/Validation/Test 데이터셋 활**

1. 학습 데이터와 검증 데이터를 분리하기 (Train-Validation Split)
- 데이터셋 분할을 위한 `random_split()` 사용
- 하이퍼파라미터에 `SPLIT_RATIO`를 설정하여 학습 데이터와 검증 데이터 분리 비율 설정

2. 학습 코드 수정하기 (Train & Validation)
- 기존의 `train()` 함수에서 학습 후 매 에폭마다 검증 데이터를 평가하는 코드를 추가


3. 검증 함수 추가하기
- `validate()` 함수

```python
def validate():
    model.eval()  # 모델을 평가 모드로 설정
    correct = 0
    total = 0
    running_loss = 0.0

    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    avg_loss = running_loss / len(val_loader)
    accuracy = 100 * correct / total
    model.train()  # 다시 학습 모드로 전환
    return avg_loss, accuracy
```

4. Metric 추가
- **Precision**, **Recall**, **F1-Score** 추가 (매 에폭마다 검증 시 계산)

5. Early Stopping 수정
- Validation Accuracy(`best_val_acc`)가 개선되지 않으면, `patience`가 3일 때 조기 종료.

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, random_split
import os
from sklearn.metrics import precision_score, recall_score, f1_score
from PIL import Image

# 하이퍼파라미터 설정
BATCH_SIZE = 64
EPOCHS = 20
LEARNING_RATE = 0.001
WEIGHT_DECAY = 1e-5
DROPOUT_RATE = 0.3
INPUT_SIZE = 3 * 32 * 32
NUM_CLASSES = 10
MODEL_PATH = './best_model.pth'
PATIENCE = 3
SPLIT_RATIO = 0.8

# CIFAR10 데이터셋의 평균 및 표준편차 (정규화 기준)
CIFAR10_MEAN = (0.4914, 0.4822, 0.4465)
CIFAR10_STD = (0.2023, 0.1994, 0.2010)

# 약한 데이터 증강 설정 (초기 학습용)
weak_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD)
])

# 강한 데이터 증강 설정 (후반 학습용)
strong_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05),
    transforms.RandomRotation(5),
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD)
])

# 테스트 데이터 변환 (데이터 증강 없이 정규화만 적용)
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD)
])

# 데이터셋 로드
full_train_dataset = datasets.CIFAR10(root="./data", train=True, download=True, transform=weak_transform)
test_dataset = datasets.CIFAR10(root="./data", train=False, transform=transform_test, download=True)

# 학습 및 검증 데이터셋 분리
train_size = int(SPLIT_RATIO * len(full_train_dataset))
val_size = len(full_train_dataset) - train_size
train_dataset, val_dataset = random_split(full_train_dataset, [train_size, val_size])
val_dataset.dataset.transform = transform_test

# DataLoader 정의
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# 모델 정의 (MLP)
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(INPUT_SIZE, 512), nn.BatchNorm1d(512), nn.ReLU(), nn.Dropout(DROPOUT_RATE),
            nn.Linear(512, 256), nn.BatchNorm1d(256), nn.ReLU(), nn.Dropout(DROPOUT_RATE),
            nn.Linear(256, 256), nn.BatchNorm1d(256), nn.ReLU(), nn.Dropout(DROPOUT_RATE),
            nn.Linear(256, 128), nn.BatchNorm1d(128), nn.ReLU(),
            nn.Linear(128, NUM_CLASSES)
        )

    def forward(self, x):
        return self.model(x)

# 모델 초기화
model = MLP()

# 모델, 손실 함수, 옵티마이저 설정
def load_model():
    if os.path.exists(MODEL_PATH):
        model.load_state_dict(torch.load(MODEL_PATH))
        print('Model loaded from checkpoint.')
    else:
        print('No checkpoint found, training from scratch.')

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

best_val_acc = 0
patience_counter = 0

# 학습 함수 정의
def train():
    global best_val_acc, patience_counter
    model.train()
    for epoch in range(EPOCHS):
        if epoch == EPOCHS // 2:  # 에폭 절반 이후 강한 데이터 증강으로 전환
            train_dataset.dataset.transform = strong_transform

        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        scheduler.step()

        avg_loss = running_loss / len(train_loader)
        val_loss, val_acc = validate()

        print(f"Epoch [{epoch+1}/{EPOCHS}], Train Loss: {avg_loss:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")

        if val_acc > best_val_acc:  # Early Stopping 체크
            best_val_acc = val_acc
            patience_counter = 0
            torch.save(model.state_dict(), MODEL_PATH)
            print(f"New Best Model Saved! Validation Accuracy: {best_val_acc:.2f}%")
        else:
            patience_counter += 1
            if patience_counter >= PATIENCE:
                print("Early stopping triggered.")
                break

def validate():
    model.eval()
    correct = 0
    total = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_acc = 100 * correct / total
    val_loss = criterion(outputs, labels).item()

    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')

    print(f"Validation - Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}")

    model.train()
    return val_loss, val_acc


def test():
    model.load_state_dict(torch.load(MODEL_PATH))
    model.eval()
    correct = 0
    total = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')

    print(f'Test Accuracy: {accuracy:.2f}%')
    print(f'Test Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}')


if __name__ == "__main__":
    # load_model()  # 모델 로드 시도
    train()
    test()


Validation - Precision: 0.4376, Recall: 0.4379, F1 Score: 0.4288
Epoch [1/20], Train Loss: 1.7689, Val Loss: 1.2486, Val Acc: 43.73%
New Best Model Saved! Validation Accuracy: 43.73%
Validation - Precision: 0.4723, Recall: 0.4722, F1 Score: 0.4660
Epoch [2/20], Train Loss: 1.5828, Val Loss: 1.1320, Val Acc: 47.11%
New Best Model Saved! Validation Accuracy: 47.11%
Validation - Precision: 0.4895, Recall: 0.4881, F1 Score: 0.4818
Epoch [3/20], Train Loss: 1.5040, Val Loss: 1.1204, Val Acc: 48.70%
New Best Model Saved! Validation Accuracy: 48.70%
Validation - Precision: 0.4985, Recall: 0.4995, F1 Score: 0.4946
Epoch [4/20], Train Loss: 1.4524, Val Loss: 1.1050, Val Acc: 49.96%
New Best Model Saved! Validation Accuracy: 49.96%
Validation - Precision: 0.5107, Recall: 0.5117, F1 Score: 0.5066
Epoch [5/20], Train Loss: 1.4065, Val Loss: 1.1460, Val Acc: 51.13%
New Best Model Saved! Validation Accuracy: 51.13%
Validation - Precision: 0.5234, Recall: 0.5263, F1 Score: 0.5211
Epoch [6/20], Train 

###📌 **하이퍼 파라미터 서치**

1. **Optuna** 사용
- `import optuna`

2. 하이퍼파라미터 탐색 대상:

- `batch_size` (32, 64, 128)
- `learning_rate` (1e-4 ~ 1e-2)
- `dropout_rate` (0.1 ~ 0.5)
- `optimizer` (Adam, SGD)

3. 학습 중간 검증:
- 매 에폭마다 `trial.report()` 로 결과를 보고합니다.
- 성능이 개선되지 않으면 `Optuna`가 학습을 조기 중단 (`TrialPruned()`)합니다.

4. 학습 완료 후 최적 하이퍼파라미터로 모델 학습 및 테스트 진행

In [None]:
# !pip install optuna

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, random_split
import os
import optuna  # 하이퍼파라미터 서치 라이브러리
from sklearn.metrics import precision_score, recall_score, f1_score
from PIL import Image

# 하이퍼파라미터 설정
BATCH_SIZE = 64
EPOCHS = 20
LEARNING_RATE = 0.001
WEIGHT_DECAY = 1e-5
DROPOUT_RATE = 0.3
INPUT_SIZE = 3 * 32 * 32
NUM_CLASSES = 10
MODEL_PATH = './best_model.pth'
PATIENCE = 3
SPLIT_RATIO = 0.8

# CIFAR10 데이터셋의 평균 및 표준편차 (정규화 기준)
CIFAR10_MEAN = (0.4914, 0.4822, 0.4465)
CIFAR10_STD = (0.2023, 0.1994, 0.2010)

# 약한 데이터 증강 설정 (초기 학습용)
weak_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD)
])

# 강한 데이터 증강 설정 (후반 학습용)
strong_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05),
    transforms.RandomRotation(5),
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD)
])

# 테스트 데이터 변환 (데이터 증강 없이 정규화만 적용)
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD)
])

# 데이터셋 로드
full_train_dataset = datasets.CIFAR10(root="./data", train=True, download=True, transform=weak_transform)
test_dataset = datasets.CIFAR10(root="./data", train=False, transform=transform_test, download=True)

# 학습 및 검증 데이터셋 분리
train_size = int(SPLIT_RATIO * len(full_train_dataset))
val_size = len(full_train_dataset) - train_size
train_dataset, val_dataset = random_split(full_train_dataset, [train_size, val_size])
val_dataset.dataset.transform = transform_test

# DataLoader 정의
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# 모델 정의 (MLP)
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(INPUT_SIZE, 512), nn.BatchNorm1d(512), nn.ReLU(), nn.Dropout(DROPOUT_RATE),
            nn.Linear(512, 256), nn.BatchNorm1d(256), nn.ReLU(), nn.Dropout(DROPOUT_RATE),
            nn.Linear(256, 256), nn.BatchNorm1d(256), nn.ReLU(), nn.Dropout(DROPOUT_RATE),
            nn.Linear(256, 128), nn.BatchNorm1d(128), nn.ReLU(),
            nn.Linear(128, NUM_CLASSES)
        )

    def forward(self, x):
        return self.model(x)

# 모델 초기화
model = MLP()

# 모델 불러오기
def load_model():
    if os.path.exists(MODEL_PATH):
        model.load_state_dict(torch.load(MODEL_PATH))
        print('Model loaded from checkpoint.')
    else:
        print('No checkpoint found, training from scratch.')

# 하이퍼파라미터 서치
def objective(trial):
    global model, optimizer, scheduler, BATCH_SIZE, LEARNING_RATE, DROPOUT_RATE, WEIGHT_DECAY, best_val_acc, patience_counter

    # 하이퍼파라미터 샘플링
    BATCH_SIZE = trial.suggest_categorical('batch_size', [32, 64, 128])
    LEARNING_RATE = trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)
    DROPOUT_RATE = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
    WEIGHT_DECAY = trial.suggest_loguniform('weight_decay', 1e-6, 1e-3)
    step_size = trial.suggest_int('step_size', 3, 10)
    gamma = trial.suggest_uniform('gamma', 0.1, 0.9)
    optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'SGD'])

    model = MLP()
    if optimizer_name == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
    elif optimizer_name == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=0.9, weight_decay=WEIGHT_DECAY)

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)
    best_val_acc = 0
    patience_counter = 0

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

    # 학습 과정
    for epoch in range(EPOCHS):
        model.train()
        if epoch == EPOCHS // 2:
            train_dataset.dataset.transform = strong_transform

        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        scheduler.step()

        val_loss, val_acc = validate()  # 검증 단계
        trial.report(val_acc, epoch)

        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), MODEL_PATH)

    return best_val_acc

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

best_val_acc = 0
patience_counter = 0

# 학습 함수 정의
def train():
    global best_val_acc, patience_counter
    model.train()
    for epoch in range(EPOCHS):
        if epoch == EPOCHS // 2:  # 에폭 절반 이후 강한 데이터 증강으로 전환
            train_dataset.dataset.transform = strong_transform

        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        scheduler.step()

        avg_loss = running_loss / len(train_loader)
        val_loss, val_acc = validate()

        print(f"Epoch [{epoch+1}/{EPOCHS}], Train Loss: {avg_loss:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")

        if val_acc > best_val_acc:  # Early Stopping 체크
            best_val_acc = val_acc
            patience_counter = 0
            torch.save(model.state_dict(), MODEL_PATH)
            print(f"New Best Model Saved! Validation Accuracy: {best_val_acc:.2f}%")
        else:
            patience_counter += 1
            if patience_counter >= PATIENCE:
                print("Early stopping triggered.")
                break

def validate():
    model.eval()
    correct = 0
    total = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_acc = 100 * correct / total
    val_loss = criterion(outputs, labels).item()

    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')

    print(f"Validation - Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}")

    model.train()
    return val_loss, val_acc


def test():
    model.load_state_dict(torch.load(MODEL_PATH))
    model.eval()
    correct = 0
    total = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')

    print(f'Test Accuracy: {accuracy:.2f}%')
    print(f'Test Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}')


if __name__ == "__main__":
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=20)
    print(f"Best Trial: {study.best_trial.value}")
    print(f"Best Hyperparameters: {study.best_trial.params}")

    # 최적의 하이퍼파라미터로 다시 학습 및 테스트 진행
    load_model()
    train()
    test()





[I 2025-03-27 13:06:13,870] A new study created in memory with name: no-name-1dab4e45-5ddc-423f-8608-eb1c86f15e22
  LEARNING_RATE = trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)
  DROPOUT_RATE = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
  WEIGHT_DECAY = trial.suggest_loguniform('weight_decay', 1e-6, 1e-3)
  gamma = trial.suggest_uniform('gamma', 0.1, 0.9)


Validation - Precision: 0.4291, Recall: 0.4247, F1 Score: 0.4202
Validation - Precision: 0.4652, Recall: 0.4653, F1 Score: 0.4588
Validation - Precision: 0.4857, Recall: 0.4858, F1 Score: 0.4804
Validation - Precision: 0.5000, Recall: 0.5026, F1 Score: 0.4986
Validation - Precision: 0.5125, Recall: 0.5137, F1 Score: 0.5079
Validation - Precision: 0.5212, Recall: 0.5170, F1 Score: 0.5121
Validation - Precision: 0.5296, Recall: 0.5266, F1 Score: 0.5233
Validation - Precision: 0.5332, Recall: 0.5337, F1 Score: 0.5314
Validation - Precision: 0.5402, Recall: 0.5411, F1 Score: 0.5374
Validation - Precision: 0.5507, Recall: 0.5502, F1 Score: 0.5489
Validation - Precision: 0.4498, Recall: 0.4479, F1 Score: 0.4442
Validation - Precision: 0.4583, Recall: 0.4601, F1 Score: 0.4561
Validation - Precision: 0.4635, Recall: 0.4647, F1 Score: 0.4579
Validation - Precision: 0.4687, Recall: 0.4703, F1 Score: 0.4680
Validation - Precision: 0.4721, Recall: 0.4755, F1 Score: 0.4699
Validation - Precision: 0

[I 2025-03-27 13:16:47,473] Trial 0 finished with value: 55.21 and parameters: {'batch_size': 128, 'learning_rate': 0.003109548697581316, 'dropout_rate': 0.22553735533002361, 'weight_decay': 4.508346108254859e-05, 'step_size': 8, 'gamma': 0.7972745415846182, 'optimizer': 'SGD'}. Best is trial 0 with value: 55.21.


Validation - Precision: 0.4942, Recall: 0.4965, F1 Score: 0.4940


  LEARNING_RATE = trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)
  DROPOUT_RATE = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
  WEIGHT_DECAY = trial.suggest_loguniform('weight_decay', 1e-6, 1e-3)
  gamma = trial.suggest_uniform('gamma', 0.1, 0.9)


Validation - Precision: 0.3357, Recall: 0.3373, F1 Score: 0.3296
Validation - Precision: 0.3541, Recall: 0.3517, F1 Score: 0.3443
Validation - Precision: 0.3536, Recall: 0.3549, F1 Score: 0.3410
Validation - Precision: 0.3732, Recall: 0.3754, F1 Score: 0.3716
Validation - Precision: 0.3756, Recall: 0.3697, F1 Score: 0.3517
Validation - Precision: 0.3895, Recall: 0.3808, F1 Score: 0.3714
Validation - Precision: 0.3818, Recall: 0.3759, F1 Score: 0.3666
Validation - Precision: 0.4002, Recall: 0.3938, F1 Score: 0.3762
Validation - Precision: 0.3987, Recall: 0.4015, F1 Score: 0.3905
Validation - Precision: 0.4091, Recall: 0.4067, F1 Score: 0.3962
Validation - Precision: 0.4096, Recall: 0.4087, F1 Score: 0.4062
Validation - Precision: 0.3924, Recall: 0.3981, F1 Score: 0.3859
Validation - Precision: 0.4088, Recall: 0.4097, F1 Score: 0.4038
Validation - Precision: 0.4191, Recall: 0.4238, F1 Score: 0.4139
Validation - Precision: 0.4349, Recall: 0.4380, F1 Score: 0.4301
Validation - Precision: 0

[I 2025-03-27 13:44:19,385] Trial 1 finished with value: 44.69 and parameters: {'batch_size': 64, 'learning_rate': 0.009320346498472817, 'dropout_rate': 0.237750765520911, 'weight_decay': 2.028978770068843e-05, 'step_size': 7, 'gamma': 0.6376061337271416, 'optimizer': 'Adam'}. Best is trial 0 with value: 55.21.


Validation - Precision: 0.4314, Recall: 0.4368, F1 Score: 0.4292


  LEARNING_RATE = trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)
  DROPOUT_RATE = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
  WEIGHT_DECAY = trial.suggest_loguniform('weight_decay', 1e-6, 1e-3)
  gamma = trial.suggest_uniform('gamma', 0.1, 0.9)


Validation - Precision: 0.3170, Recall: 0.3114, F1 Score: 0.3015
Validation - Precision: 0.3439, Recall: 0.3416, F1 Score: 0.3344
Validation - Precision: 0.3631, Recall: 0.3622, F1 Score: 0.3533
Validation - Precision: 0.3802, Recall: 0.3757, F1 Score: 0.3709
Validation - Precision: 0.3875, Recall: 0.3836, F1 Score: 0.3785
Validation - Precision: 0.3950, Recall: 0.3926, F1 Score: 0.3880
Validation - Precision: 0.4021, Recall: 0.3992, F1 Score: 0.3923
Validation - Precision: 0.4083, Recall: 0.4060, F1 Score: 0.3997
Validation - Precision: 0.4039, Recall: 0.4036, F1 Score: 0.3977
Validation - Precision: 0.4176, Recall: 0.4152, F1 Score: 0.4082
Validation - Precision: 0.4155, Recall: 0.4152, F1 Score: 0.4100
Validation - Precision: 0.4122, Recall: 0.4117, F1 Score: 0.4053
Validation - Precision: 0.4111, Recall: 0.4137, F1 Score: 0.4070
Validation - Precision: 0.4256, Recall: 0.4245, F1 Score: 0.4186
Validation - Precision: 0.4210, Recall: 0.4188, F1 Score: 0.4129
Validation - Precision: 0

[I 2025-03-27 14:01:31,641] Trial 2 finished with value: 42.63 and parameters: {'batch_size': 64, 'learning_rate': 0.0003805663988899745, 'dropout_rate': 0.2219431290557982, 'weight_decay': 1.3523658748252338e-06, 'step_size': 4, 'gamma': 0.507303455610086, 'optimizer': 'SGD'}. Best is trial 0 with value: 55.21.


Validation - Precision: 0.4159, Recall: 0.4141, F1 Score: 0.4090


  LEARNING_RATE = trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)
  DROPOUT_RATE = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
  WEIGHT_DECAY = trial.suggest_loguniform('weight_decay', 1e-6, 1e-3)
  gamma = trial.suggest_uniform('gamma', 0.1, 0.9)


Validation - Precision: 0.3738, Recall: 0.3595, F1 Score: 0.3487
Validation - Precision: 0.3874, Recall: 0.3855, F1 Score: 0.3757
Validation - Precision: 0.4074, Recall: 0.4017, F1 Score: 0.3967
Validation - Precision: 0.3946, Recall: 0.3958, F1 Score: 0.3834
Validation - Precision: 0.4224, Recall: 0.4182, F1 Score: 0.4150
Validation - Precision: 0.4203, Recall: 0.4176, F1 Score: 0.4092
Validation - Precision: 0.4107, Recall: 0.4126, F1 Score: 0.4025
Validation - Precision: 0.4166, Recall: 0.4203, F1 Score: 0.4137
Validation - Precision: 0.4206, Recall: 0.4258, F1 Score: 0.4174
Validation - Precision: 0.4574, Recall: 0.4597, F1 Score: 0.4551
Validation - Precision: 0.4556, Recall: 0.4621, F1 Score: 0.4565
Validation - Precision: 0.4621, Recall: 0.4650, F1 Score: 0.4610
Validation - Precision: 0.4657, Recall: 0.4708, F1 Score: 0.4654
Validation - Precision: 0.4629, Recall: 0.4665, F1 Score: 0.4633
Validation - Precision: 0.4643, Recall: 0.4703, F1 Score: 0.4641
Validation - Precision: 0

[I 2025-03-27 14:24:14,852] Trial 3 finished with value: 50.38 and parameters: {'batch_size': 64, 'learning_rate': 0.0012269573803510066, 'dropout_rate': 0.1557565882173503, 'weight_decay': 0.0004650420504685621, 'step_size': 9, 'gamma': 0.35019103938049145, 'optimizer': 'Adam'}. Best is trial 0 with value: 55.21.


Validation - Precision: 0.4983, Recall: 0.5020, F1 Score: 0.4984


  LEARNING_RATE = trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)
  DROPOUT_RATE = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
  WEIGHT_DECAY = trial.suggest_loguniform('weight_decay', 1e-6, 1e-3)
  gamma = trial.suggest_uniform('gamma', 0.1, 0.9)


Validation - Precision: 0.3184, Recall: 0.2817, F1 Score: 0.2509
Validation - Precision: 0.3304, Recall: 0.3233, F1 Score: 0.3040
Validation - Precision: 0.3445, Recall: 0.3352, F1 Score: 0.3180
Validation - Precision: 0.3631, Recall: 0.3532, F1 Score: 0.3407
Validation - Precision: 0.3672, Recall: 0.3584, F1 Score: 0.3473
Validation - Precision: 0.3848, Recall: 0.3793, F1 Score: 0.3675
Validation - Precision: 0.3773, Recall: 0.3785, F1 Score: 0.3618
Validation - Precision: 0.3929, Recall: 0.3902, F1 Score: 0.3785
Validation - Precision: 0.3988, Recall: 0.3944, F1 Score: 0.3826
Validation - Precision: 0.4029, Recall: 0.3991, F1 Score: 0.3866
Validation - Precision: 0.4037, Recall: 0.4035, F1 Score: 0.3914
Validation - Precision: 0.4064, Recall: 0.4032, F1 Score: 0.3930
Validation - Precision: 0.4123, Recall: 0.4073, F1 Score: 0.3965
Validation - Precision: 0.3994, Recall: 0.3976, F1 Score: 0.3851


###📌 **GPU 자원 활용**

1. GPU 설정

```python
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
```

2. 모델 초기화 후 GPU로 이동(`model.to(DEVICE)`)

```python
model = MLP().to(DEVICE)
```

3. 입력 데이터 (inputs, labels)도 GPU로 전송(inputs.to(DEVICE)`)

```python
inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
```

4. 손실 함수 (criterion)도 GPU에서 사용하도록 전송

```python
criterion = nn.CrossEntropyLoss().to(DEVICE)
```

5. 하이퍼파라미터 서치 파트 안에도 똑같이 수정

In [None]:
# !pip install optuna

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, random_split
import os
import json
import platform
import pkg_resources
import optuna  # 하이퍼파라미터 서치 라이브러리
from sklearn.metrics import precision_score, recall_score, f1_score
from PIL import Image

# GPU 설정
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 하이퍼파라미터 설정
BATCH_SIZE = 64
EPOCHS = 20
LEARNING_RATE = 0.001
WEIGHT_DECAY = 1e-5
DROPOUT_RATE = 0.3
INPUT_SIZE = 3 * 32 * 32
NUM_CLASSES = 10
MODEL_PATH = './best_model.pth'
PATIENCE = 3
SPLIT_RATIO = 0.8

# CIFAR10 데이터셋의 평균 및 표준편차 (정규화 기준)
CIFAR10_MEAN = (0.4914, 0.4822, 0.4465)
CIFAR10_STD = (0.2023, 0.1994, 0.2010)

# 약한 데이터 증강 설정 (초기 학습용)
weak_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD)
])

# 강한 데이터 증강 설정 (후반 학습용)
strong_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05),
    transforms.RandomRotation(5),
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD)
])

# 테스트 데이터 변환 (데이터 증강 없이 정규화만 적용)
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD)
])

# 데이터셋 로드
full_train_dataset = datasets.CIFAR10(root="./data", train=True, download=True, transform=weak_transform)
test_dataset = datasets.CIFAR10(root="./data", train=False, transform=transform_test, download=True)

# 학습 및 검증 데이터셋 분리
train_size = int(SPLIT_RATIO * len(full_train_dataset))
val_size = len(full_train_dataset) - train_size
train_dataset, val_dataset = random_split(full_train_dataset, [train_size, val_size])
val_dataset.dataset.transform = transform_test

# DataLoader 정의
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# 모델 정의 (MLP)
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(INPUT_SIZE, 512), nn.BatchNorm1d(512), nn.ReLU(), nn.Dropout(DROPOUT_RATE),
            nn.Linear(512, 256), nn.BatchNorm1d(256), nn.ReLU(), nn.Dropout(DROPOUT_RATE),
            nn.Linear(256, 256), nn.BatchNorm1d(256), nn.ReLU(), nn.Dropout(DROPOUT_RATE),
            nn.Linear(256, 128), nn.BatchNorm1d(128), nn.ReLU(),
            nn.Linear(128, NUM_CLASSES)
        )

    def forward(self, x):
        return self.model(x)

# 모델 초기화
model = MLP().to(DEVICE)

# 모델 불러오기
def load_model():
    if os.path.exists(MODEL_PATH):
        model.load_state_dict(torch.load(MODEL_PATH))
        model.to(DEVICE)
        print('Model loaded from checkpoint.')
    else:
        print('No checkpoint found, training from scratch.')

# 하이퍼파라미터 서치
def objective(trial):
    global model, optimizer, scheduler, BATCH_SIZE, LEARNING_RATE, DROPOUT_RATE, WEIGHT_DECAY, best_val_acc, patience_counter

    # 하이퍼파라미터 샘플링
    BATCH_SIZE = trial.suggest_categorical('batch_size', [32, 64, 128])
    LEARNING_RATE = trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)
    DROPOUT_RATE = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
    WEIGHT_DECAY = trial.suggest_loguniform('weight_decay', 1e-6, 1e-3)
    step_size = trial.suggest_int('step_size', 3, 10)
    gamma = trial.suggest_uniform('gamma', 0.1, 0.9)
    optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'SGD'])

    model = MLP().to(DEVICE)
    if optimizer_name == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
    elif optimizer_name == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=0.9, weight_decay=WEIGHT_DECAY)

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)
    best_val_acc = 0
    patience_counter = 0

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

    # 학습 과정
    for epoch in range(EPOCHS):
        model.train()
        if epoch == EPOCHS // 2:
            train_dataset.dataset.transform = strong_transform

        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        scheduler.step()

        val_loss, val_acc = validate()  # 검증 단계
        trial.report(val_acc, epoch)

        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), MODEL_PATH)

    return best_val_acc

criterion = nn.CrossEntropyLoss().to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

best_val_acc = 0
patience_counter = 0

# 학습 함수 정의
def train():
    global best_val_acc, patience_counter
    model.train()
    for epoch in range(EPOCHS):
        if epoch == EPOCHS // 2:  # 에폭 절반 이후 강한 데이터 증강으로 전환
            train_dataset.dataset.transform = strong_transform

        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        scheduler.step()

        avg_loss = running_loss / len(train_loader)
        val_loss, val_acc = validate()

        print(f"Epoch [{epoch+1}/{EPOCHS}], Train Loss: {avg_loss:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")

        if val_acc > best_val_acc:  # Early Stopping 체크
            best_val_acc = val_acc
            patience_counter = 0
            torch.save(model.state_dict(), MODEL_PATH)
            print(f"New Best Model Saved! Validation Accuracy: {best_val_acc:.2f}%")
        else:
            patience_counter += 1
            if patience_counter >= PATIENCE:
                print("Early stopping triggered.")
                break

def validate():
    model.eval()
    correct = 0
    total = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_acc = 100 * correct / total
    val_loss = criterion(outputs, labels).item()

    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')

    print(f"Validation - Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}")

    model.train()
    return val_loss, val_acc


def test():
    model.load_state_dict(torch.load(MODEL_PATH))
    model.eval()
    correct = 0
    total = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')

    print(f'Test Accuracy: {accuracy:.2f}%')
    print(f'Test Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}')

# 최적의 하이퍼파라미터 정보와 환경 정보 저장 함수
def save_study_info(study, filename="best_study_info.json"):
    best_params = study.best_trial.params
    best_value = study.best_trial.value

    # Python 및 라이브러리 정보 가져오기
    python_version = platform.python_version()
    platform_info = platform.platform()
    installed_packages = {pkg.key: pkg.version for pkg in pkg_resources.working_set}

    # 데이터셋 정보 수집
    dataset_name = 'CIFAR10'  # 현재 사용 중인 데이터셋 이름
    dataset_info = {
        "dataset_name": dataset_name,
        "train_size": len(train_dataset),
        "validation_size": len(val_dataset),
        "test_size": len(test_dataset),
        "image_size": (32, 32),  # CIFAR-10 이미지 크기
        "num_classes": NUM_CLASSES
    }

    # 모델 정보 수집
    model_info = {
        "model_name": model.__class__.__name__,
        "model_structure": str(model),
        "input_size": INPUT_SIZE,
        "num_classes": NUM_CLASSES
    }

    # 정보 저장
    data = {
        "best_params": best_params,
        "best_value": best_value,
        "python_version": python_version,
        "platform_info": platform_info,
        "installed_packages": installed_packages,
        "dataset_info": dataset_info,
        "model_info": model_info
    }

    with open(filename, "w") as f:
        json.dump(data, f, indent=4)

    print(f"Best study info saved to {filename}")


if __name__ == "__main__":
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=20)
    print(f"Best Trial: {study.best_trial.value}")
    print(f"Best Hyperparameters: {study.best_trial.params}")

    # 최적의 하이퍼파라미터로 다시 학습 및 테스트 진행
    load_model()
    train()
    test()

    # 최적의 정보 저장
    save_study_info(study)


###📌 **학습에 필요한 모든 정보 저장**

1. 최적 하이퍼파라미터 정보 저장
- 최적의 하이퍼파라미터 (`best_params`)
- 최적의 검증 정확도 (`best_value`)

2. 파이썬 버전 저장

- `python_version`

3. 플랫폼 정보 저장
- `platform_info`

4. 설치된 모든 라이브러리 및 버전 정보
- `installed_packages`

5. 저장 형식: JSON 파일
- `import json`

In [None]:
# !pip install optuna

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, random_split
import os
import json
import platform
import pkg_resources
import optuna  # 하이퍼파라미터 서치 라이브러리
from sklearn.metrics import precision_score, recall_score, f1_score
from PIL import Image

# 하이퍼파라미터 설정
BATCH_SIZE = 64
EPOCHS = 20
LEARNING_RATE = 0.001
WEIGHT_DECAY = 1e-5
DROPOUT_RATE = 0.3
INPUT_SIZE = 3 * 32 * 32
NUM_CLASSES = 10
MODEL_PATH = './best_model.pth'
PATIENCE = 3
SPLIT_RATIO = 0.8

# CIFAR10 데이터셋의 평균 및 표준편차 (정규화 기준)
CIFAR10_MEAN = (0.4914, 0.4822, 0.4465)
CIFAR10_STD = (0.2023, 0.1994, 0.2010)

# 약한 데이터 증강 설정 (초기 학습용)
weak_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD)
])

# 강한 데이터 증강 설정 (후반 학습용)
strong_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05),
    transforms.RandomRotation(5),
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD)
])

# 테스트 데이터 변환 (데이터 증강 없이 정규화만 적용)
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD)
])

# 데이터셋 로드
full_train_dataset = datasets.CIFAR10(root="./data", train=True, download=True, transform=weak_transform)
test_dataset = datasets.CIFAR10(root="./data", train=False, transform=transform_test, download=True)

# 학습 및 검증 데이터셋 분리
train_size = int(SPLIT_RATIO * len(full_train_dataset))
val_size = len(full_train_dataset) - train_size
train_dataset, val_dataset = random_split(full_train_dataset, [train_size, val_size])
val_dataset.dataset.transform = transform_test

# DataLoader 정의
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# 모델 정의 (MLP)
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(INPUT_SIZE, 512), nn.BatchNorm1d(512), nn.ReLU(), nn.Dropout(DROPOUT_RATE),
            nn.Linear(512, 256), nn.BatchNorm1d(256), nn.ReLU(), nn.Dropout(DROPOUT_RATE),
            nn.Linear(256, 256), nn.BatchNorm1d(256), nn.ReLU(), nn.Dropout(DROPOUT_RATE),
            nn.Linear(256, 128), nn.BatchNorm1d(128), nn.ReLU(),
            nn.Linear(128, NUM_CLASSES)
        )

    def forward(self, x):
        return self.model(x)

# 모델 초기화
model = MLP()

# 모델 불러오기
def load_model():
    if os.path.exists(MODEL_PATH):
        model.load_state_dict(torch.load(MODEL_PATH))
        print('Model loaded from checkpoint.')
    else:
        print('No checkpoint found, training from scratch.')

# 하이퍼파라미터 서치
def objective(trial):
    global model, optimizer, scheduler, BATCH_SIZE, LEARNING_RATE, DROPOUT_RATE, WEIGHT_DECAY, best_val_acc, patience_counter

    # 하이퍼파라미터 샘플링
    BATCH_SIZE = trial.suggest_categorical('batch_size', [32, 64, 128])
    LEARNING_RATE = trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)
    DROPOUT_RATE = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
    WEIGHT_DECAY = trial.suggest_loguniform('weight_decay', 1e-6, 1e-3)
    step_size = trial.suggest_int('step_size', 3, 10)
    gamma = trial.suggest_uniform('gamma', 0.1, 0.9)
    optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'SGD'])

    model = MLP()
    if optimizer_name == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
    elif optimizer_name == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=0.9, weight_decay=WEIGHT_DECAY)

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)
    best_val_acc = 0
    patience_counter = 0

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

    # 학습 과정
    for epoch in range(EPOCHS):
        model.train()
        if epoch == EPOCHS // 2:
            train_dataset.dataset.transform = strong_transform

        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        scheduler.step()

        val_loss, val_acc = validate()  # 검증 단계
        trial.report(val_acc, epoch)

        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), MODEL_PATH)

    return best_val_acc

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

best_val_acc = 0
patience_counter = 0

# 학습 함수 정의
def train():
    global best_val_acc, patience_counter
    model.train()
    for epoch in range(EPOCHS):
        if epoch == EPOCHS // 2:  # 에폭 절반 이후 강한 데이터 증강으로 전환
            train_dataset.dataset.transform = strong_transform

        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        scheduler.step()

        avg_loss = running_loss / len(train_loader)
        val_loss, val_acc = validate()

        print(f"Epoch [{epoch+1}/{EPOCHS}], Train Loss: {avg_loss:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")

        if val_acc > best_val_acc:  # Early Stopping 체크
            best_val_acc = val_acc
            patience_counter = 0
            torch.save(model.state_dict(), MODEL_PATH)
            print(f"New Best Model Saved! Validation Accuracy: {best_val_acc:.2f}%")
        else:
            patience_counter += 1
            if patience_counter >= PATIENCE:
                print("Early stopping triggered.")
                break

def validate():
    model.eval()
    correct = 0
    total = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_acc = 100 * correct / total
    val_loss = criterion(outputs, labels).item()

    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')

    print(f"Validation - Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}")

    model.train()
    return val_loss, val_acc


def test():
    model.load_state_dict(torch.load(MODEL_PATH))
    model.eval()
    correct = 0
    total = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')

    print(f'Test Accuracy: {accuracy:.2f}%')
    print(f'Test Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}')


# 최적의 하이퍼파라미터 정보와 환경 정보 저장 함수
def save_study_info(study, filename="best_study_info.json"):
    best_params = study.best_trial.params
    best_value = study.best_trial.value

    # Python 및 라이브러리 정보 가져오기
    python_version = platform.python_version()
    platform_info = platform.platform()
    installed_packages = {pkg.key: pkg.version for pkg in pkg_resources.working_set}

    # 데이터셋 정보 수집
    dataset_name = 'CIFAR10'  # 현재 사용 중인 데이터셋 이름
    dataset_info = {
        "dataset_name": dataset_name,
        "train_size": len(train_dataset),
        "validation_size": len(val_dataset),
        "test_size": len(test_dataset),
        "image_size": (32, 32),  # CIFAR-10 이미지 크기
        "num_classes": NUM_CLASSES
    }

    # 모델 정보 수집
    model_info = {
        "model_name": model.__class__.__name__,
        "model_structure": str(model),
        "input_size": INPUT_SIZE,
        "num_classes": NUM_CLASSES
    }

    # 정보 저장
    data = {
        "best_params": best_params,
        "best_value": best_value,
        "python_version": python_version,
        "platform_info": platform_info,
        "installed_packages": installed_packages,
        "dataset_info": dataset_info,
        "model_info": model_info
    }

    with open(filename, "w") as f:
        json.dump(data, f, indent=4)

    print(f"Best study info saved to {filename}")


if __name__ == "__main__":
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=20)
    print(f"Best Trial: {study.best_trial.value}")
    print(f"Best Hyperparameters: {study.best_trial.params}")

    # 최적의 하이퍼파라미터로 다시 학습 및 테스트 진행
    load_model()
    train()
    test()

    # 최적의 정보 저장
    save_study_info(study)

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, random_split
import os
import json
import platform
import pkg_resources
from sklearn.metrics import precision_score, recall_score, f1_score

# GPU 설정
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 하이퍼파라미터 설정
BATCH_SIZE = 128
EPOCHS = 20
LEARNING_RATE = 0.001
WEIGHT_DECAY = 1e-5
DROPOUT_RATE = 0.3
INPUT_SIZE = 3 * 32 * 32
NUM_CLASSES = 10
MODEL_PATH = './best_model.pth'
PATIENCE = 3
SPLIT_RATIO = 0.8

# CIFAR10 데이터셋의 평균 및 표준편차 (정규화 기준)
CIFAR10_MEAN = (0.4914, 0.4822, 0.4465)
CIFAR10_STD = (0.2023, 0.1994, 0.2010)

# 학습 데이터 증강 설정 (후반 학습용)
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05),
    transforms.RandomRotation(5),
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD)
])

# 테스트 데이터 변환 (데이터 증강 없이 정규화만 적용)
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD)
])

# 데이터셋 로드
full_train_dataset = datasets.CIFAR10(root="./data", train=True, download=True, transform=transform_train)
test_dataset = datasets.CIFAR10(root="./data", train=False, transform=transform_test, download=True)

# 학습 및 검증 데이터셋 분리
train_size = int(SPLIT_RATIO * len(full_train_dataset))
val_size = len(full_train_dataset) - train_size
train_dataset, val_dataset = random_split(full_train_dataset, [train_size, val_size])
val_dataset.dataset.transform = transform_test

# DataLoader 정의
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# 모델 정의 (MLP)
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(INPUT_SIZE, 512), nn.BatchNorm1d(512), nn.ReLU(), nn.Dropout(DROPOUT_RATE),
            nn.Linear(512, 256), nn.BatchNorm1d(256), nn.ReLU(), nn.Dropout(DROPOUT_RATE),
            nn.Linear(256, 256), nn.BatchNorm1d(256), nn.ReLU(), nn.Dropout(DROPOUT_RATE),
            nn.Linear(256, 128), nn.BatchNorm1d(128), nn.ReLU(),
            nn.Linear(128, NUM_CLASSES)
        )

    def forward(self, x):
        return self.model(x)

# 모델 초기화
model = MLP().to(DEVICE)

# 모델 불러오기
def load_model():
    if os.path.exists(MODEL_PATH):
        model.load_state_dict(torch.load(MODEL_PATH))
        print('Model loaded from checkpoint.')
    else:
        print('No checkpoint found, training from scratch.')
    return model

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

best_val_acc = 0
patience_counter = 0

# 학습 함수 정의
def train():
    global best_val_acc, patience_counter
    model.train()
    for epoch in range(EPOCHS):

        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        scheduler.step()

        avg_loss = running_loss / len(train_loader)
        val_loss, val_acc = validate()

        print(f"Epoch [{epoch+1}/{EPOCHS}], Train Loss: {avg_loss:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")

        if val_acc > best_val_acc:  # Early Stopping 체크
            best_val_acc = val_acc
            patience_counter = 0
            torch.save(model.state_dict(), MODEL_PATH)
            print(f"New Best Model Saved! Validation Accuracy: {best_val_acc:.2f}%")
        else:
            patience_counter += 1
            if patience_counter >= PATIENCE:
                print("Early stopping triggered.")
                break

def validate():
    model.eval()
    correct = 0
    total = 0
    val_loss_total = 0.0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss_total += loss.item()

            _, predicted = torch.max(outputs, 1) # 반환:(최댓값, 인덱스)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_loss = val_loss_total / len(val_loader)
    val_acc = 100 * correct / total

    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')

    print(f"Validation - Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}")

    model.train()
    return val_loss, val_acc


def test():
    model.load_state_dict(torch.load(MODEL_PATH))
    model.eval()
    correct = 0
    total = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')

    print(f'Test Accuracy: {accuracy:.2f}%')
    print(f'Test Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}')


# 최적의 하이퍼파라미터 정보와 환경 정보 저장 함수
def save_study_info(filename="best_study_info.json"):

    # Python 및 라이브러리 정보 가져오기
    python_version = platform.python_version()
    platform_info = platform.platform()
    installed_packages = {pkg.key: pkg.version for pkg in pkg_resources.working_set}

    # 데이터셋 정보 수집
    dataset_name = 'CIFAR10'  # 현재 사용 중인 데이터셋 이름
    dataset_info = {
        "dataset_name": dataset_name,
        "train_size": len(train_dataset),
        "validation_size": len(val_dataset),
        "test_size": len(test_dataset),
        "image_size": (32, 32),  # CIFAR-10 이미지 크기
        "num_classes": NUM_CLASSES
    }

    # 모델 정보 수집
    model_info = {
        "model_name": model.__class__.__name__,
        "model_structure": str(model),
        "input_size": INPUT_SIZE,
        "num_classes": NUM_CLASSES
    }

    # 정보 저장
    data = {
        "python_version": python_version,
        "platform_info": platform_info,
        "installed_packages": installed_packages,
        "dataset_info": dataset_info,
        "model_info": model_info
    }

    with open(filename, "w") as f:
        json.dump(data, f, indent=4)

    print(f"Best study info saved to {filename}")


if __name__ == "__main__":
    # load_model()
    train()
    test()

    # 최적의 정보 저장
    save_study_info()

Validation - Precision: 0.4357, Recall: 0.4322, F1 Score: 0.4309
Epoch [1/20], Train Loss: 1.7769, Val Loss: 1.7876, Val Acc: 43.24%
New Best Model Saved! Validation Accuracy: 43.24%
Validation - Precision: 0.4652, Recall: 0.4618, F1 Score: 0.4577
Epoch [2/20], Train Loss: 1.5758, Val Loss: 1.5997, Val Acc: 46.19%
New Best Model Saved! Validation Accuracy: 46.19%
Validation - Precision: 0.4862, Recall: 0.4863, F1 Score: 0.4796
Epoch [3/20], Train Loss: 1.4930, Val Loss: 1.5601, Val Acc: 48.70%
New Best Model Saved! Validation Accuracy: 48.70%
Validation - Precision: 0.4958, Recall: 0.4915, F1 Score: 0.4877
Epoch [4/20], Train Loss: 1.4359, Val Loss: 1.3529, Val Acc: 49.17%
New Best Model Saved! Validation Accuracy: 49.17%
Validation - Precision: 0.5065, Recall: 0.5065, F1 Score: 0.5027
Epoch [5/20], Train Loss: 1.3889, Val Loss: 1.2718, Val Acc: 50.73%
New Best Model Saved! Validation Accuracy: 50.73%
Validation - Precision: 0.5260, Recall: 0.5231, F1 Score: 0.5205
Epoch [6/20], Train 

###📌 **CUSTOM DATASET**

1. `CustomDataset` 클래스 추가:
- 지정된 경로에서 이미지 데이터를 폴더 구조로 로드
- 폴더 이름이 클래스 이름으로 간주 (예: ./data/train/class1, ./data/train/class2 등)
- 지원되는 이미지 확장자: .png, .jpg, .jpeg

 📌 **사용 방법**
- 학습 데이터 경로: `./data/train`
- 테스트 데이터 경로: `./data/test`
- **폴더 구조**는 다음과 같은 방식으로 준비해야 합니다:

```bash
./data/train/class1/ 이미지1.jpg, 이미지2.jpg, ...
./data/train/class2/ 이미지1.jpg, 이미지2.jpg, ...
./data/test/class1/ 이미지1.jpg, 이미지2.jpg, ...
./data/test/class2/ 이미지1.jpg, 이미지2.jpg, ...
```

2. 보편적 정규화 값 적용:

```python
CUSTOM_MEAN = (0.5, 0.5, 0.5)
CUSTOM_STD = (0.5, 0.5, 0.5)


In [None]:
# 라이브러리 임포트
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, random_split
import os
from sklearn.metrics import precision_score, recall_score, f1_score
from PIL import Image

# 하이퍼파라미터 설정
BATCH_SIZE = 64
EPOCHS = 20
LEARNING_RATE = 0.001
WEIGHT_DECAY = 1e-5
DROPOUT_RATE = 0.3
INPUT_SIZE = 3 * 32 * 32
NUM_CLASSES = 10
MODEL_PATH = './best_model.pth'
PATIENCE = 3
SPLIT_RATIO = 0.8

# CIFAR10 데이터셋 사용 시 정규화 평균과 표준편차 설정
CIFAR10_MEAN = (0.4914, 0.4822, 0.4465)
CIFAR10_STD = (0.2023, 0.1994, 0.2010)
# 자체 데이터셋 사용 시 정규화 평균과 표준편차 설정
CUSTOM_MEAN = (0.5, 0.5, 0.5)
CUSTOM_STD = (0.5, 0.5, 0.5)

# 약한 데이터 증강 설정 (초기 학습용)
weak_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize(CUSTOM_MEAN, CUSTOM_STD)
])

# 강한 데이터 증강 설정 (후반 학습용)
strong_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05),
    transforms.RandomRotation(5),
    transforms.ToTensor(),
    transforms.Normalize(CUSTOM_MEAN, CUSTOM_STD)
])

# 테스트 데이터 변환 (데이터 증강 없이 정규화만 적용)
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(CUSTOM_MEAN, CUSTOM_STD)
])

# 자체 데이터셋 로드 클래스 정의
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []

        classes = os.listdir(root_dir)  # 폴더 이름을 클래스로 사용
        self.class_to_idx = {cls_name: idx for idx, cls_name in enumerate(classes)}

        for cls_name in classes:
            cls_folder = os.path.join(root_dir, cls_name)
            if os.path.isdir(cls_folder):
                for img_name in os.listdir(cls_folder):
                    img_path = os.path.join(cls_folder, img_name)
                    if img_path.endswith(('.png', '.jpg', '.jpeg')):  # 이미지 파일만 추가
                        self.image_paths.append(img_path)
                        self.labels.append(self.class_to_idx[cls_name])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label


# 데이터셋 경로 설정
train_dir = './data/train'
test_dir = './data/test'

# 자체 데이터셋 로드
full_train_dataset = CustomDataset(root_dir=train_dir, transform=weak_transform)
test_dataset = CustomDataset(root_dir=test_dir, transform=transform_test)

# full_train_dataset = datasets.CIFAR10(root="./data", train=True, download=True, transform=weak_transform)
# test_dataset = datasets.CIFAR10(root="./data", train=False, transform=transform_test, download=True)

# 학습 및 검증 데이터셋 분리
train_size = int(SPLIT_RATIO * len(full_train_dataset))
val_size = len(full_train_dataset) - train_size
train_dataset, val_dataset = random_split(full_train_dataset, [train_size, val_size])
val_dataset.dataset.transform = transform_test

# DataLoader 정의
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# 모델 정의 (MLP)
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(INPUT_SIZE, 512), nn.BatchNorm1d(512), nn.ReLU(), nn.Dropout(DROPOUT_RATE),
            nn.Linear(512, 256), nn.BatchNorm1d(256), nn.ReLU(), nn.Dropout(DROPOUT_RATE),
            nn.Linear(256, 256), nn.BatchNorm1d(256), nn.ReLU(), nn.Dropout(DROPOUT_RATE),
            nn.Linear(256, 128), nn.BatchNorm1d(128), nn.ReLU(),
            nn.Linear(128, NUM_CLASSES)
        )

    def forward(self, x):
        return self.model(x)

def load_model():
    if os.path.exists(MODEL_PATH):
        model.load_state_dict(torch.load(MODEL_PATH))
        print('Model loaded from checkpoint.')
    else:
        print('No checkpoint found, training from scratch.')

# 모델 초기화
model = MLP()

# 모델, 손실 함수, 옵티마이저 설정
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

best_val_acc = 0
patience_counter = 0

# 학습 함수 정의
def train():
    global best_val_acc, patience_counter
    model.train()
    for epoch in range(EPOCHS):
        if epoch == EPOCHS // 2:  # 에폭 절반 이후 강한 데이터 증강으로 전환
            train_dataset.dataset.transform = strong_transform

        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        scheduler.step()

        avg_loss = running_loss / len(train_loader)
        val_loss, val_acc = validate()

        print(f"Epoch [{epoch+1}/{EPOCHS}], Train Loss: {avg_loss:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")

        if val_acc > best_val_acc:  # Early Stopping 체크
            best_val_acc = val_acc
            patience_counter = 0
            torch.save(model.state_dict(), MODEL_PATH)
            print(f"New Best Model Saved! Validation Accuracy: {best_val_acc:.2f}%")
        else:
            patience_counter += 1
            if patience_counter >= PATIENCE:
                print("Early stopping triggered.")
                break

def validate():
    model.eval()
    correct = 0
    total = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_acc = 100 * correct / total
    val_loss = criterion(outputs, labels).item()

    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')

    print(f"Validation - Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}")

    model.train()
    return val_loss, val_acc


def test():
    model.load_state_dict(torch.load(MODEL_PATH))
    model.eval()
    correct = 0
    total = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')

    print(f'Test Accuracy: {accuracy:.2f}%')
    print(f'Test Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}')


if __name__ == "__main__":
    # load_model()  # 모델 로드 시도
    train()
    test()
