<a href="https://colab.research.google.com/github/Onedory/yolov5-art-analyze/blob/main/CNN_validation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 사전 준비 - JSON 파일에서 라벨 정보 추출

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
pip install tqdm




In [None]:
import os
import json
import pandas as pd
from tqdm import tqdm

def extract_labels(json_dir, img_dir):
    if not os.path.exists(json_dir):
        raise FileNotFoundError(f"The JSON directory does not exist: {json_dir}")
    if not os.path.exists(img_dir):
        raise FileNotFoundError(f"The image directory does not exist: {img_dir}")

    labels = []
    json_files = [file for file in os.listdir(json_dir) if file.endswith('.json')]

    for json_file in tqdm(json_files, desc="Processing JSON files"):
        with open(os.path.join(json_dir, json_file), 'r') as f:
            data = json.load(f)
            img_path = data['meta']['img_path']  # JSON 파일 구조에 맞게 수정
            img_label = data['annotations']['class']  # JSON 파일 구조에 맞게 수정
            labels.append((img_path, img_label))

    return labels

# JSON 디렉토리와 이미지 디렉토리 경로
json_dir = '/content/drive/MyDrive/Data/validation/json'
img_dir = '/content/drive/MyDrive/Data/validation/images'

# 라벨 정보 추출
print("Extracting labels from JSON files...")
labels = extract_labels(json_dir, img_dir)

# 라벨 정보를 DataFrame으로 변환하여 CSV 파일로 저장
labels_df = pd.DataFrame(labels, columns=['image', 'label'])
csv_dir = '/content/drive/MyDrive/Data/validation/csv'
os.makedirs(csv_dir, exist_ok=True)
csv_path = os.path.join(csv_dir, 'annotations.csv')
labels_df.to_csv(csv_path, index=False)
print(f"CSV file saved to {csv_path}")


Extracting labels from JSON files...


Processing JSON files: 100%|██████████| 5600/5600 [02:27<00:00, 38.05it/s] 

CSV file saved to /content/drive/MyDrive/Data/validation/csv/annotations.csv





#1. 데이터셋 및 데이터로더 구성
csv 파일을 사용하여 데이터셋과 데이터로더를 구성합니다.

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import pandas as pd
import os

# 라벨 인코딩 매핑 생성
annotations_file = '/content/drive/MyDrive/Data/validation/csv/annotations.csv'
img_labels = pd.read_csv(annotations_file)
label_to_idx = {label: idx for idx, label in enumerate(img_labels['label'].unique())}

# 라벨 인코딩 함수 정의
def encode_label(label):
    return label_to_idx[label]

class ObjectDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0].split('/')[-1])
        try:
            image = Image.open(img_path).convert('RGB')
        except (OSError, IOError):
            return None, None

        label = self.img_labels.iloc[idx, 1]
        label = torch.tensor(encode_label(label))  # Label을 tensor로 변환
        if self.transform:
            image = self.transform(image)
        return image, label

def collate_fn(batch):
    batch = [b for b in batch if b[0] is not None and b[1] is not None]
    if len(batch) == 0:
        return torch.tensor([]), torch.tensor([])
    return torch.utils.data.dataloader.default_collate(batch)

# 데이터 변환 설정
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# 데이터셋 및 데이터로더 생성
val_dataset = ObjectDataset(annotations_file=annotations_file, img_dir='/content/drive/MyDrive/Data/validation/images', transform=transform)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)

# 데이터로더 설정
dataloaders = {
    'train': val_loader,  # 검증 데이터를 사용하여 모델을 학습
    'val': val_loader
}


# 2. CNN 모델 설계

기존의 SimpleCNN 모델에 Dropout을 추가합니다.

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(64 * 56 * 56, 128)  # assuming input size 224x224
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 56 * 56)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

num_classes = len(label_to_idx)
model = SimpleCNN(num_classes)

# 모델을 GPU로 이동 (가능한 경우)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)


#3. 모델 훈련

Early Stopping, L2 정규화, Dropout을 적용한 훈련 함수입니다.

In [None]:
import torch.optim as optim
from torch.optim import lr_scheduler
import copy
from tqdm import tqdm
import numpy as np

# 손실 함수 및 옵티마이저 설정
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)  # L2 정규화(weight_decay 추가)
scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)  # Learning rate decay

# Early Stopping 클래스 정의
class EarlyStopping:
    def __init__(self, patience=5, verbose=False):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf

    def __call__(self, val_loss, model):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score:
            self.counter += 1
            if self.verbose:
                print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), 'checkpoint.pt')
        self.val_loss_min = val_loss

# 모델 훈련 함수 정의
def train_model(model, dataloaders, criterion, optimizer, scheduler, num_epochs=25, patience=5):
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    early_stopping = EarlyStopping(patience=patience, verbose=True)

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        # 각 epoch에 대해 훈련 및 검증 단계 설정
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # 모델을 훈련 모드로 설정
            else:
                model.eval()   # 모델을 평가 모드로 설정

            running_loss = 0.0
            running_corrects = 0

            # 데이터 반복
            for inputs, labels in tqdm(dataloaders[phase], desc=f'{phase} phase', leave=False):
                if len(inputs) == 0:  # 빈 배치 무시
                    continue

                inputs = inputs.to(device)
                labels = labels.to(device)

                # 매개변수 경사도를 0으로 설정
                optimizer.zero_grad()

                # forward
                # 훈련 단계에서만 연산 기록 추적
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # 훈련 단계에서만 역전파 + 옵티마이저 단계 수행
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # 통계
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # 모델 복사 (검증 정확도가 개선된 경우)
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

            # Early stopping 적용
            if phase == 'val':
                early_stopping(epoch_loss, model)
                if early_stopping.early_stop:
                    print("Early stopping")
                    model.load_state_dict(torch.load('checkpoint.pt'))
                    return model

    print(f'Best val Acc: {best_acc:4f}')

    # 가장 좋은 모델 가중치 로드
    model.load_state_dict(best_model_wts)
    return model

# 모델 훈련
model = train_model(model, dataloaders, criterion, optimizer, scheduler, num_epochs=25, patience=5)


Epoch 0/24
----------




train Loss: 24.4942 Acc: 0.4841




val Loss: 1.1791 Acc: 0.2195
Validation loss decreased (inf --> 1.179051).  Saving model ...
Epoch 1/24
----------




train Loss: 3.2258 Acc: 0.3109




val Loss: 1.1798 Acc: 0.2304
EarlyStopping counter: 1 out of 5
Epoch 2/24
----------




train Loss: 1.1870 Acc: 0.1777




val Loss: 1.1766 Acc: 0.2304
Validation loss decreased (1.179051 --> 1.176609).  Saving model ...
Epoch 3/24
----------




train Loss: 1.1836 Acc: 0.2005




val Loss: 1.1740 Acc: 0.2304
Validation loss decreased (1.176609 --> 1.174022).  Saving model ...
Epoch 4/24
----------




train Loss: 1.1811 Acc: 0.1986




val Loss: 1.1719 Acc: 0.2304
Validation loss decreased (1.174022 --> 1.171929).  Saving model ...
Epoch 5/24
----------




train Loss: 1.1790 Acc: 0.2214




val Loss: 1.1702 Acc: 0.2500
Validation loss decreased (1.171929 --> 1.170211).  Saving model ...
Epoch 6/24
----------




train Loss: 1.1773 Acc: 0.2500




val Loss: 1.1688 Acc: 0.2500
Validation loss decreased (1.170211 --> 1.168788).  Saving model ...
Epoch 7/24
----------




train Loss: 1.1695 Acc: 0.2500




val Loss: 1.1687 Acc: 0.2500
Validation loss decreased (1.168788 --> 1.168652).  Saving model ...
Epoch 8/24
----------




train Loss: 1.1694 Acc: 0.2386




val Loss: 1.1685 Acc: 0.2500
Validation loss decreased (1.168652 --> 1.168518).  Saving model ...
Epoch 9/24
----------




train Loss: 1.1692 Acc: 0.2329




val Loss: 1.1684 Acc: 0.2500
Validation loss decreased (1.168518 --> 1.168387).  Saving model ...
Epoch 10/24
----------




train Loss: 1.1691 Acc: 0.2214




val Loss: 1.1683 Acc: 0.2500
Validation loss decreased (1.168387 --> 1.168258).  Saving model ...
Epoch 11/24
----------




train Loss: 1.1690 Acc: 0.2214




val Loss: 1.1681 Acc: 0.2500
Validation loss decreased (1.168258 --> 1.168132).  Saving model ...
Epoch 12/24
----------




train Loss: 1.1689 Acc: 0.2157




val Loss: 1.1680 Acc: 0.2500
Validation loss decreased (1.168132 --> 1.168009).  Saving model ...
Epoch 13/24
----------




train Loss: 1.1687 Acc: 0.2100




val Loss: 1.1679 Acc: 0.2500
Validation loss decreased (1.168009 --> 1.167887).  Saving model ...
Epoch 14/24
----------




train Loss: 1.1680 Acc: 0.2100




val Loss: 1.1679 Acc: 0.2500
Validation loss decreased (1.167887 --> 1.167875).  Saving model ...
Epoch 15/24
----------




train Loss: 1.1679 Acc: 0.2214




val Loss: 1.1679 Acc: 0.2500
Validation loss decreased (1.167875 --> 1.167864).  Saving model ...
Epoch 16/24
----------




train Loss: 1.1679 Acc: 0.2329




val Loss: 1.1679 Acc: 0.2500
Validation loss decreased (1.167864 --> 1.167852).  Saving model ...
Epoch 17/24
----------




train Loss: 1.1679 Acc: 0.2500




val Loss: 1.1678 Acc: 0.2500
Validation loss decreased (1.167852 --> 1.167839).  Saving model ...
Epoch 18/24
----------




train Loss: 1.1679 Acc: 0.2500




val Loss: 1.1678 Acc: 0.2500
Validation loss decreased (1.167839 --> 1.167827).  Saving model ...
Epoch 19/24
----------




train Loss: 1.1679 Acc: 0.2500




val Loss: 1.1678 Acc: 0.2500
Validation loss decreased (1.167827 --> 1.167815).  Saving model ...
Epoch 20/24
----------




train Loss: 1.1679 Acc: 0.2500




val Loss: 1.1678 Acc: 0.2500
Validation loss decreased (1.167815 --> 1.167803).  Saving model ...
Epoch 21/24
----------




train Loss: 1.1678 Acc: 0.2500




val Loss: 1.1678 Acc: 0.2500
Validation loss decreased (1.167803 --> 1.167802).  Saving model ...
Epoch 22/24
----------




train Loss: 1.1678 Acc: 0.2500




val Loss: 1.1678 Acc: 0.2500
Validation loss decreased (1.167802 --> 1.167801).  Saving model ...
Epoch 23/24
----------




train Loss: 1.1678 Acc: 0.2500




val Loss: 1.1678 Acc: 0.2500
Validation loss decreased (1.167801 --> 1.167800).  Saving model ...
Epoch 24/24
----------




train Loss: 1.1678 Acc: 0.2500




val Loss: 1.1678 Acc: 0.2500
Validation loss decreased (1.167800 --> 1.167799).  Saving model ...
Best val Acc: 0.250000
