In [1]:

import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms, models
from tqdm import tqdm

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print("Using device:", device)

pin_memory = use_cuda
use_amp = use_cuda
if use_amp:
    from torch.cuda.amp import autocast, GradScaler
    scaler = GradScaler()


Using device: cpu


In [2]:

transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])


In [3]:

data_dir = "./data"  # 폴더 구조: ./data/class_name/image.jpg

dataset = datasets.ImageFolder(data_dir, transform=transform)

train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_set, val_set = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_set, batch_size=64, shuffle=True, num_workers=4, pin_memory=pin_memory)
val_loader = DataLoader(val_set, batch_size=64, shuffle=False, num_workers=4, pin_memory=pin_memory)
print("Train size:", len(train_set), "Val size:", len(val_set))


Train size: 2562 Val size: 641


In [4]:

model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, len(dataset.classes))  # 클래스 수에 맞게 조정
model = model.to(device)




In [5]:

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)


In [6]:

EPOCHS = 10

for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}"):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()

        if use_amp:
            with autocast():
                outputs = model(images)
                loss = criterion(outputs, labels)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        correct += predicted.eq(labels).sum().item()
        total += labels.size(0)

    scheduler.step()
    print(f"Train Loss: {running_loss / len(train_loader):.4f}, Accuracy: {correct / total * 100:.2f}%")


Epoch 1/10: 100%|██████████████████████████████████████████████████████████████████████| 41/41 [01:55<00:00,  2.82s/it]


Train Loss: 0.2578, Accuracy: 97.70%


Epoch 2/10: 100%|██████████████████████████████████████████████████████████████████████| 41/41 [01:59<00:00,  2.91s/it]


Train Loss: 0.0384, Accuracy: 99.61%


Epoch 3/10: 100%|██████████████████████████████████████████████████████████████████████| 41/41 [02:07<00:00,  3.11s/it]


Train Loss: 0.0460, Accuracy: 98.24%


Epoch 4/10: 100%|██████████████████████████████████████████████████████████████████████| 41/41 [01:55<00:00,  2.81s/it]


Train Loss: 0.0153, Accuracy: 99.96%


Epoch 5/10: 100%|██████████████████████████████████████████████████████████████████████| 41/41 [01:58<00:00,  2.89s/it]


Train Loss: 0.0815, Accuracy: 98.40%


Epoch 6/10: 100%|██████████████████████████████████████████████████████████████████████| 41/41 [02:03<00:00,  3.00s/it]


Train Loss: 0.1106, Accuracy: 97.97%


Epoch 7/10: 100%|██████████████████████████████████████████████████████████████████████| 41/41 [01:52<00:00,  2.75s/it]


Train Loss: 0.0212, Accuracy: 99.53%


Epoch 8/10: 100%|██████████████████████████████████████████████████████████████████████| 41/41 [02:06<00:00,  3.08s/it]


Train Loss: 0.0034, Accuracy: 99.92%


Epoch 9/10: 100%|██████████████████████████████████████████████████████████████████████| 41/41 [02:08<00:00,  3.14s/it]


Train Loss: 0.1825, Accuracy: 99.92%


Epoch 10/10: 100%|█████████████████████████████████████████████████████████████████████| 41/41 [01:55<00:00,  2.82s/it]

Train Loss: 0.0098, Accuracy: 99.84%





In [18]:

model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

print(f"Validation Accuracy: {correct / total * 100:.2f}%")


Validation Accuracy: 32.14%


In [12]:

# 학습이 완료된 후 모델 저장
model_path = "model_final.pt"
torch.save(model.state_dict(), model_path)
print(f"Model saved to {model_path}")


Model saved to model_final.pt


In [20]:

# 저장된 모델 불러오기 (학습 안 하고 추론만 하고 싶을 때)
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, len(class_names))
model.load_state_dict(torch.load("model_final.pt", map_location=device))
model.to(device)
model.eval()
print("Model loaded and ready for inference.")


RuntimeError: Error(s) in loading state_dict for ResNet:
	size mismatch for fc.weight: copying a param with shape torch.Size([1000, 512]) from checkpoint, the shape in current model is torch.Size([2, 512]).
	size mismatch for fc.bias: copying a param with shape torch.Size([1000]) from checkpoint, the shape in current model is torch.Size([2]).

In [None]:
from PIL import Image
import matplotlib.pyplot as plt
import os

# 테스트 이미지 폴더
test_dir = './test'
test_images = [f for f in os.listdir(test_dir) if f.lower().endswith(('jpg', 'jpeg', 'png'))]

# 모델 평가 모드
model.eval()
plt.figure(figsize=(20, 10))

for i, fname in enumerate(test_images[:10]):
    img_path = os.path.join(test_dir, fname)
    image = Image.open(img_path).convert("RGB")
    input_tensor = transform(image).unsqueeze(0).to(device)  # 배치 차원 추가

    with torch.no_grad():
        output = model(input_tensor)
        pred = torch.argmax(output, dim=1).item()
        pred_label = class_names[pred]

    # 시각화
    plt.subplot(2, 5, i + 1)
    img_disp = np.array(image.resize((128, 128))) / 255.0  # 정규화 없이 원본 보여줌
    plt.imshow(img_disp)
    plt.title(f"{fname}\\nPredicted: {pred_label}")
    plt.axis('off')

plt.tight_layout()
plt.show()
