# 102 Category Flower Dataset 

> Caltech101을 사용해서 최소한의 모델을 구성해봅시다!

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision
from torchvision import datasets, transforms, models

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"GPU/CPU: {device}")

import ssl
ssl._create_default_https_context = ssl._create_unverified_context

GPU/CPU: cuda


In [3]:
batch_size = 8
lr = 0.001
num_epochs = 20
num_classes = 102

## 1. 전처리(이미지)

In [4]:
train_transforms = transforms.Compose([
    transforms.Resize((224,224)),   # 사진이 클수록 성능이 좋아짐
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=15),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transforms = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor() , 
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  
])

## 2. 데이터 불러오기

In [5]:
train_dataset = datasets.Flowers102(root="./data", transform=train_transforms, download=True)
val_dataset = datasets.Flowers102(root="./data",  transform=val_transforms, download=True)
test_dataset = datasets.Flowers102(root="./data",  transform=val_transforms, download=True)

In [6]:
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [7]:
print(f"훈련 데이터: {len(train_loader)}")
print(f"검증 데이터: {len(val_loader)}")
print(f"테스트 데이터: {len(test_loader)}")

훈련 데이터: 128
검증 데이터: 128
테스트 데이터: 128


## 모델 설계

In [8]:
class LeNet5Classic(nn.Module):
    def __init__(self, *args, **kwargs):
        super(LeNet5Classic, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, kernel_size=5)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.conv3 = nn.Conv2d(16, 120, kernel_size=5)
        self.fc1 = nn.Linear(120,84)
        self.fc2 = nn.Linear(84,num_classes)
        self.relu = nn.ReLU()
        self.pool = nn.AvgPool2d(kernel_size=2, stride=2)
    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.relu(self.conv3(x))
        x = torch.flatten(x,1)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [9]:
class LeNet5Modern(nn.Module):
    def __init__(self, *args, **kwargs):
        super(LeNet5Modern, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5, stride=1, padding=0),
            nn.BatchNorm2d(6),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1, padding=0),
            nn.BatchNorm2d(16),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5, stride=1, padding=0),
            nn.BatchNorm2d(120),
            nn.ReLU(inplace=True),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(120,84),
            nn.ReLU(inplace=True),
            nn.Dropout(0.3),
            nn.Linear(84, num_classes)
        )

        self._init_weights()

    def _init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)


    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x,1)
        x = self.classifier(x)
        return x

In [10]:
class Flower102CNN(nn.Module):
    def __init__(self):
        super(Flower102CNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.fc1 = nn.Linear(64 * 7 * 7, 512)
        self.drop = nn.Dropout(0.5)
        self.fc2 = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = torch.flatten(x,1)
        x = self.fc1(x)
        x = self.drop(x)
        x = self.fc2(x)
        return x

- 전이학습은 기울기 동결
- 파인튜닝을 기울기 조절

In [11]:
class Flower102ResNet50(nn.Module): # 전이학습(되는 애들만 됨 __허깅페이스에 있어야됨(없고 깃허브에 있으면 걔버전에 다 맞춰야함))
    def __init__(self):
        super(Flower102ResNet50, self).__init__()
        self.resnet = models.resnet18(pretrained=True)  #모델 호출

        # 특징 추출
        for param in self.resnet.parameters():
            param.requires_grad = False                 # 기존의 가중치는 고정

        # 출력
        num_features = self.resnet.fc.in_features       # fullconnected에 입력되는 개수를 받아옴
        self.resnet.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(num_features, 84),
            nn.ReLU(inplace=True),
            nn.Dropout(0.3),
            nn.Linear(84, num_classes)
        )


    def forward(self, x):
        return self.resnet(x)

In [17]:
class Flower102VGG11(nn.Module):    # 추론속도 빠름(Resnet보다 많이씀)
    def __init__(self, *args, **kwargs):
        super(Flower102VGG11, self).__init__()
        self.vgg = models.vgg11(pretrained=True)

        # 특징 추출
        for param in self.vgg.parameters():
            param.requires_grad = True

        # 출력
        num_features = self.vgg.classifier[0].in_features   #classifier = nn.classifier(_,_,_,...) 라스트처럼 가져옴
        self.vgg.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(num_features, 84),
            nn.ReLU(inplace=True),
            nn.Dropout(0.3),
            nn.Linear(84, num_classes)
        )

    def forward(self, x):
        return self.vgg(x)

In [18]:
# class Flower102Vit(nn.Module):  # 트랜스포머 모델(요즘 많이씀)
#     def __init__(self):
#         super(Flower102Vit, self).__init__()
#         self.vit = models.vit_b_16(pretrained=True)

#         # 특징 추출
#         for param in self.vit.parameters():
#             param.requires_grad = False

#         # 출력
#         og_head_dim = self.vit.heads.head.in_features

#         self.vit.heads.head = nn.Sequential(
#             nn.LayerNorm(og_head_dim),
           
#         )

        

# 학습

In [20]:
def train_model(model, train_loader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device='cpu'), labels.to(device='cpu')   #vgg쓸때는 'cpu', gpu(4GB)는 터짐

            # 순전파
            outputs = model(images)
            loss = criterion(outputs, labels)

            # 역전파 및 최적화
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}")

# 학습 및 평가
# model = LeNet5Classic().to(device)
# model = LeNet5Modern().to(device)
# model = Flower102CNN().to(device)
# model = Flower102ResNet50().to(device)
model = Flower102VGG11().to(device='cpu')
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
train_model(model, train_loader, criterion, optimizer, num_epochs=10)

Epoch [1/10], Loss: 4.6466
Epoch [2/10], Loss: 4.6303
Epoch [3/10], Loss: 4.6376
Epoch [4/10], Loss: 4.6458
Epoch [5/10], Loss: 4.6280
Epoch [6/10], Loss: 4.6281
Epoch [7/10], Loss: 4.6280
Epoch [8/10], Loss: 4.6277
Epoch [9/10], Loss: 4.6279
Epoch [10/10], Loss: 4.6277


## 평가

In [22]:
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device='cpu'), labels.to(device='cpu')
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")
evaluate_model(model, test_loader)

Test Accuracy: 0.98%


In [None]:
# 1. 상태값만 저장하는 방법(배포용):: 잘쓰지는
# 2. 전체 모델 저장하는 방법(보관 및 배포):: 주니어단계에서 프로젝트 저장할때bb
# 3. 체크포인트 저장(학습 재개용):: 자원을 나눠써야할 때
# 4. 최고 성능만 선별 저장:: 체크포인트 저장은 할건데.. 일단 API부터 뽑음
# torch.save(model, '20250530_vgg11_epoch10.pth') # 2