In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install torchmetrics
!pip install ultralytics

Collecting torchmetrics
  Downloading torchmetrics-1.5.1-py3-none-any.whl.metadata (20 kB)
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.11.8-py3-none-any.whl.metadata (5.2 kB)
Downloading torchmetrics-1.5.1-py3-none-any.whl (890 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m890.6/890.6 kB[0m [31m38.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lightning_utilities-0.11.8-py3-none-any.whl (26 kB)
Installing collected packages: lightning-utilities, torchmetrics
Successfully installed lightning-utilities-0.11.8 torchmetrics-1.5.1
Collecting ultralytics
  Downloading ultralytics-8.3.23-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.9-py3-none-any.whl.metadata (9.3 kB)
Downloading ultralytics-8.3.23-py3-none-any.whl (877 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m877.6/877.6 kB[0m [31m53.5 MB/s[0m eta [36m0:00:00[0m

In [None]:
import os
import numpy as np
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, TensorDataset, Dataset
from torchvision import models
from PIL import Image
from tqdm import tqdm
import torchmetrics
from ultralytics import YOLO

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [None]:
classification = {
  0: ('M', 'hippie'),
  1: ('M', 'mods'),
  2: ('M', 'ivy'),
  3: ('M', 'hiphop'),
  4: ('M', 'metrosexual'),
  5: ('M', 'bold'),
  6: ('M', 'sportivecasual'),
  7: ('M', 'normcore'),
  8: ('W', 'sportivecasual'),
  9: ('W', 'feminine'),
  10: ('W', 'minimal'),
  11: ('W', 'powersuit'),
  12: ('W', 'bodyconscious'),
  13: ('W', 'classic'),
  14: ('W', 'kitsch'),
  15: ('W', 'normcore'),
  16: ('W', 'cityglam'),
  17: ('W', 'oriental'),
  18: ('W', 'ecology'),
  19: ('W', 'space'),
  20: ('W', 'athleisure'),
  21: ('W', 'hippie'),
  22: ('W', 'genderless'),
  23: ('W', 'punk'),
  24: ('W', 'grunge'),
  25: ('W', 'disco'),
  26: ('W', 'military'),
  27: ('W', 'hiphop'),
  28: ('W', 'popart'),
  29: ('W', 'lounge'),
  30: ('W', 'lingerie')
}

In [None]:
class CustomImageDataset(Dataset):
    def __init__(self, folder, classification, transform=None):
        self.folder = folder
        self.transform = transform
        self.classification = classification
        self.image_files = [f for f in os.listdir(folder) if f.endswith(('.jpg', '.png'))]

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = os.path.join(self.folder, self.image_files[idx])
        try:
            img = Image.open(img_path).convert('RGB')
        except Exception as e:
            print(f"Error loading image {img_path}: {e}")
            return None, None

        # 라벨 추출 (e.g., gender and style)
        filename = self.image_files[idx]
        style = filename.split('_')[-2]
        gender = filename.split('_')[-1].split('.')[0]

        # classification의 class와 일치하는지 확인
        index = next((i for i, (g, s) in self.classification.items() if g == gender and s == style), None)

        # 일치 X -> 0으로 인덱스 주기
        if index is None:
            print(f"Label for {filename} not found in classification.")
            index = 0

        # transform 적용
        if self.transform:
            img = self.transform(img)

        return img, index

In [None]:
# 객체 탐지
def detect_objects_and_get_largest_bbox(images, labels):
    train_data = []
    person_class_id = 0  # "person" class in YOLO is usually class ID 0

    for img, label in tqdm(zip(images, labels), total=len(images), desc="Processing Images"):
        img = np.array(img)

        # 이미지의 차원 확인
        print(f"Image shape: {img.shape}")  # 디버깅용

        # (C, H, W) 형식을 (H, W, C)로 변환
        if img.ndim == 3 and img.shape[0] == 3:
            img = np.transpose(img, (1, 2, 0))  # (C, H, W) -> (H, W, C)

        # 3채널 이미지인지 확인
        if img.ndim != 3 or img.shape[2] != 3:
            print(f"Skipping image due to unexpected shape: {img.shape}")
            continue

        img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        results = model_yolo.predict(img_bgr)
        
        # 사람인 클래스만 추출
        bboxes = []
        for result in results:
            boxes = result.boxes
            for box in boxes:
                class_id = int(box.cls.item())
                if class_id == person_class_id:
                    bbox = box.xyxy[0].cpu().numpy()
                    bboxes.append(bbox)

        if bboxes:
            # 바운딩 박스가 가장 큰 것만 추출 (사람이 많이 추출될 경우를 대비)
            largest_bbox = max(bboxes, key=lambda bbox: (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]))
            img_pil = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            train_data.append((img_pil, largest_bbox, label))
        else:
            print("No bounding boxes detected.")

    return train_data

In [None]:
def preprocess_data(train_data):
    train_images = []
    train_classes = []

    for data in tqdm(train_data, desc="Preprocessing Data"):
        image = data[0]
        class_id = data[2] if data[2] is not None else 0
        image_tensor = transform(image)

        train_images.append(image_tensor)
        train_classes.append(class_id)

    train_images_tensor = torch.stack(train_images).to(device)
    train_classes_tensor = torch.tensor(train_classes).to(device)

    return train_images_tensor, train_classes_tensor

In [None]:
# 모델 로드
model_yolo = YOLO('yolov10n.pt')

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov10n.pt to 'yolov10n.pt'...


100%|██████████| 5.59M/5.59M [00:00<00:00, 330MB/s]


In [None]:
# Transform 정의
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

In [None]:
# GPU 사용
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
train_image_folder = "/content/drive/MyDrive/dataset/2024 데이터 크리에이터 캠프 대학부 데이터셋/training_image"

In [None]:
train_dataset = CustomImageDataset(train_image_folder, classification=classification, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=False)

In [None]:
# 전처리 
detect_objects_and_get_largest_bbox(images, labels)

In [None]:
train_data = []
for images, labels in train_loader:
    batch_data = detect_objects_and_get_largest_bbox(images, labels)
    train_data.extend(batch_data)

In [None]:
train_images_tensor, train_classes_tensor = preprocess_data(train_data)

In [None]:
# Load images
train_image_folder = "/content/drive/MyDrive/dataset/2024 데이터 크리에이터 캠프 대학부 데이터셋/training_image"
image_path = []

for path in os.listdir(train_image_folder):
    if path.endswith('.jpg'):
      image_path.append(os.path.join(train_image_folder, path))

In [None]:
len(image_path)

4070

- 이미지를 모두 처리 완료

In [None]:
train_images, labels = load_images_from_folder(image_path)

Loading images:   0%|          | 0/4070 [00:00<?, ?it/s]

Attempting to load /content/drive/MyDrive/dataset/2024 데이터 크리에이터 캠프 대학부 데이터셋/training_image/W_13465_80_powersuit_W.jpg


In [None]:
# Detect objects and prepare data for training
train_data = detect_objects_and_get_largest_bbox(train_images, labels)

In [None]:
# Prepare tensors from the detected data
train_images_tensor, train_classes_tensor = preprocess_data(train_data)

In [None]:
# Create TensorDataset
train_dataset = TensorDataset(train_images_tensor, train_classes_tensor)