In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split, Subset
from torchvision import datasets, transforms, models
import numpy as np
from tqdm import tqdm 
import pandas as pd
import random
from PIL import Image, ImageOps
from torchvision.transforms import functional as F

In [2]:
def set_seed(seed):
    os.environ['PYTHONHASHSEED'] = str(seed) 
    random.seed(seed)  
    np.random.seed(seed)  
    torch.manual_seed(seed)  
    torch.cuda.manual_seed(seed)  
    torch.cuda.manual_seed_all(seed)  
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


In [3]:
# 데이터셋 로드 및 변환 정의
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # 크기를 224x224 리사이즈
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # 정규화
])

# 증강된 데이터셋 경로
augmented_dir = "/home/student/workspace/data/train"

dataset = datasets.ImageFolder(root=augmented_dir, transform=transform)

# Train/Validation Split
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# DataLoader 생성
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)


In [4]:
import torch.nn as nn
from torchvision.models import efficientnet_b2

# EfficientNet-B2 모델 정의
class EfficientNetB2Scratch(nn.Module):
    def __init__(self, num_classes=300):  # 클래스 개수를 설정
        super(EfficientNetB2Scratch, self).__init__()
        self.efficientnet = efficientnet_b2(weights=None)  # Scratch 학습
        self.efficientnet.classifier[1] = nn.Linear(self.efficientnet.classifier[1].in_features, num_classes)

    def forward(self, x):
        return self.efficientnet(x)

# 모델 초기화
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = EfficientNetB2Scratch(num_classes=300).to(device)


In [5]:
import torch.optim as optim
from tqdm import tqdm

# 학습 및 검증 함수
def train_and_validate(model, train_loader, val_loader, epochs=5):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-4)  # AdamW 옵티마이저
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)  # 학습률 스케줄러

    best_val_accuracy = 0.0

    for epoch in range(epochs):
        # ===== 학습 단계 =====
        model.train()
        train_loss = 0.0
        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs} [Training]"):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        # ===== 검증 단계 =====
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{epochs} [Validation]"):
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_accuracy = 100 * correct / total
        print(f"Epoch {epoch+1}/{epochs}: Train Loss = {train_loss/len(train_loader):.4f}, "
              f"Val Loss = {val_loss/len(val_loader):.4f}, Val Accuracy = {val_accuracy:.2f}%")

        # Best Model 저장
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            torch.save(model.state_dict(), 'best_efficientnet_b2_scratch.pth')
            print(f"Best model saved with accuracy: {best_val_accuracy:.2f}%")

        scheduler.step()


In [6]:
# Step 1: 모델 학습
train_and_validate(model, train_loader, val_loader, epochs=5)

# Step 2: 모델 가중치 파일 확인
if not os.path.exists('./best_efficientnet_b2_scratch.pth'):
    raise FileNotFoundError("Model weights file 'best_efficientnet_b2_scratch.pth' not found. Please ensure training is completed.")

# Test data Load
import torchvision

test_dataset = torchvision.datasets.ImageFolder(root='/test/final_exam/challenge/test', transform=transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)

# Step 3: 테스트 단계 실행
def test_model(model, test_loader):
    model.eval()
    predictions = []
    with torch.no_grad():
        for images, _ in tqdm(test_loader, desc="Testing"):
            images = images.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            predictions.extend(predicted.cpu().numpy())
    return predictions

# 모델 로드 및 테스트 실행
model.load_state_dict(torch.load('./best_efficientnet_b2_scratch.pth'))
predictions = test_model(model, test_loader)

# Step 4: 제출 파일 생성
submission = pd.read_csv('./sample_submission.csv')
submission['Label'] = predictions
submission.to_csv('./submission_efficientnet_b2_scratch.csv', index=False)
print("Submission file saved as 'submission_efficientnet_b2_scratch.csv'.")

Epoch 1/5 [Training]: 100% 2347/2347 [12:34<00:00,  3.11it/s]
Epoch 1/5 [Validation]: 100% 587/587 [02:28<00:00,  3.95it/s]


Epoch 1/5: Train Loss = 4.6663, Val Loss = 4.0111, Val Accuracy = 7.44%
Best model saved with accuracy: 7.44%


Epoch 2/5 [Training]: 100% 2347/2347 [12:19<00:00,  3.18it/s]
Epoch 2/5 [Validation]: 100% 587/587 [02:29<00:00,  3.91it/s]


Epoch 2/5: Train Loss = 3.7467, Val Loss = 3.1814, Val Accuracy = 19.75%
Best model saved with accuracy: 19.75%


Epoch 3/5 [Training]: 100% 2347/2347 [12:18<00:00,  3.18it/s]
Epoch 3/5 [Validation]: 100% 587/587 [02:27<00:00,  3.97it/s]


Epoch 3/5: Train Loss = 2.9847, Val Loss = 2.5270, Val Accuracy = 33.57%
Best model saved with accuracy: 33.57%


Epoch 4/5 [Training]: 100% 2347/2347 [12:17<00:00,  3.18it/s]
Epoch 4/5 [Validation]: 100% 587/587 [02:26<00:00,  4.00it/s]


Epoch 4/5: Train Loss = 2.3142, Val Loss = 1.8324, Val Accuracy = 48.77%
Best model saved with accuracy: 48.77%


Epoch 5/5 [Training]: 100% 2347/2347 [12:22<00:00,  3.16it/s]
Epoch 5/5 [Validation]: 100% 587/587 [02:29<00:00,  3.93it/s]
  model.load_state_dict(torch.load('./best_efficientnet_b2_scratch.pth'))


Epoch 5/5: Train Loss = 1.7426, Val Loss = 1.3563, Val Accuracy = 60.71%
Best model saved with accuracy: 60.71%


Testing: 100% 66/66 [00:48<00:00,  1.35it/s]

Submission file saved as 'submission_efficientnet_b2_scratch.csv'.



