# CNN 모델 만들어보자.

이미지 데이터는 다음과 같은 구조로 되어 있다.

img/
- heartsping/
  - heartsping0.jpg
  - heartsping1.jpg
  - heartsping2.jpg
  - ...
- shashaping/
  - shashaping0.jpg
  - shashaping1.jpg
  - shashaping2.jpg
  - ...
- fluffyping/
  - fluffyping0.jpg
  - fluffyping1.jpg
  - fluffyping2.jpg
  - ...
- jellyping/
  - jellyping0.jpg
  - jellyping1.jpg
  - jellyping2.jpg
  - ...
- donutping/
  - donutping0.jpg
  - donutping1.jpg
  - donutping2.jpg
  - ...
- puffping/
  - puffping0.jpg
  - puffping1.jpg
  - puffping2.jpg
  - ...

필요한 라이브러리를 불러온다.

In [7]:
import os
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
import torch.nn as nn
import torch.optim as optim

이미지를 변환하고 전처리한다.

In [11]:
# Get the current working directory (where your notebook is running)
current_dir = os.getcwd()

# Construct the path to the 'img' folder (relative path)
data_dir = os.path.join(current_dir, 'img')

# Define transformations (resize, convert to tensor, normalize)
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to 224x224 pixels
    transforms.ToTensor(),          # Convert images to tensors (PyTorch format)
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Normalize to [-1, 1]
])

# Load dataset from 'img' folder using ImageFolder
dataset = torchvision.datasets.ImageFolder(root=data_dir, transform=transform)

# Split dataset into training (80%) and validation (20%) sets
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size

train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create DataLoaders for batch processing during training and validation
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

CNN 모델을 구축한다.

In [12]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3)  # Input channels: 3 (RGB), Output channels: 16
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(32 * 54 * 54, 128)      # Adjust the size based on input dimensions after pooling.
        self.fc2 = nn.Linear(128, len(dataset.classes))  # Output size: number of classes

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 32 * 54 * 54)  # Flatten the tensor for fully connected layers.
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Instantiate the model and move it to GPU if available.
model = SimpleCNN()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

SimpleCNN(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=93312, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=6, bias=True)
)

손실 함수와 옵티마이저 설정하기.

In [13]:
criterion = nn.CrossEntropyLoss()  # Suitable for multi-class classification tasks.
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer with learning rate of 0.001.

모델 학습시키기.

In [14]:
num_epochs = 10

for epoch in range(num_epochs):
    running_loss = 0.0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()            # Zero the parameter gradients.
        outputs = model(inputs)          # Forward pass.
        loss = criterion(outputs, labels) # Compute loss.
        loss.backward()                  # Backward pass (compute gradients).
        optimizer.step()                 # Update weights.

        running_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')

print('Finished Training')



Epoch [1/10], Loss: 2.7319
Epoch [2/10], Loss: 0.7570
Epoch [3/10], Loss: 0.3092
Epoch [4/10], Loss: 0.1355
Epoch [5/10], Loss: 0.0535
Epoch [6/10], Loss: 0.0297
Epoch [7/10], Loss: 0.0192
Epoch [8/10], Loss: 0.0189
Epoch [9/10], Loss: 0.0141
Epoch [10/10], Loss: 0.0166
Finished Training


모델 성능 평가하기.

In [15]:
correct = 0
total = 0

with torch.no_grad():  # Disable gradient computation during evaluation.
    for inputs, labels in val_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)   # Get class with highest score.
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy on validation set: {100 * correct / total:.2f}%')

Accuracy on validation set: 86.36%


모델 저장하기.

In [16]:
torch.save(model.state_dict(), 'simple_cnn.pth')