In [1]:
import os
import json
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import cv2

In [5]:
# 1. 디렉토리 설정
source_root = r"C:\Users\USER\Desktop\47.물류공간 예측 데이터\3.개방데이터\1.데이터\Validation\01.원천데이터"
label_root  = r"C:\Users\USER\Desktop\47.물류공간 예측 데이터\3.개방데이터\1.데이터\Validation\02.라벨링데이터"

In [6]:
# 2. 이미지 인덱싱
img_map = {}
for dirpath, _, filenames in os.walk(source_root):
    for fname in filenames:
        if fname.endswith('.jpg'):
            img_map[fname] = os.path.join(dirpath, fname)

In [7]:
# 3. JSON과 이미지 매칭
pairs = []
for dirpath, _, filenames in os.walk(label_root):
    for fname in filenames:
        if fname.endswith('.json'):
            json_path = os.path.join(dirpath, fname)
            try:
                with open(json_path, 'r', encoding='utf-8') as f:
                    data = json.load(f)
                file_name = data['images'][0]['file_name']
                if file_name in img_map:
                    pairs.append((img_map[file_name], json_path))
            except Exception as e:
                continue

print(f"✅ 총 매칭된 이미지-라벨 수: {len(pairs)}개")

✅ 총 매칭된 이미지-라벨 수: 6049개


In [8]:
# 🧠 1. 면적 → 클래스 매핑용 Dataset 정의
class AreaLabelDataset(Dataset):
    def __init__(self, pairs):
        self.samples = []
        for _, json_path in pairs:
            with open(json_path, 'r', encoding='utf-8') as f:
                data = json.load(f)

            for ann in data['annotations']:
                seg = np.array(ann['segmentation'][0]).reshape(-1, 2)
                area = cv2.contourArea(seg.astype(np.float32))

                # 레이블: 예를 들어 category_id 또는 size_id
                label = ann.get("size_id", None)
                if label is not None:
                    self.samples.append((area, label))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        area, label = self.samples[idx]
        area_tensor = torch.tensor([area], dtype=torch.float32)
        label_tensor = torch.tensor(label, dtype=torch.long)
        return area_tensor, label_tensor

In [9]:
# 🎯 아주 간단한 MLP
class AreaClassifier(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.fc = nn.Sequential(
            nn.Linear(1, 16),
            nn.ReLU(),
            nn.Linear(16, num_classes)
        )

    def forward(self, x):
        return self.fc(x)

In [10]:
# 데이터셋 구성
dataset = AreaLabelDataset(pairs)
loader = DataLoader(dataset, batch_size=32, shuffle=True)

ValueError: num_samples should be a positive integer value, but got num_samples=0

In [11]:
# 클래스 개수 예: size_id가 0~2인 경우
model = AreaClassifier(num_classes=3).to("cuda" if torch.cuda.is_available() else "cpu")
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [12]:
# 학습 루프
for epoch in range(5):
    model.train()
    total_loss = 0
    for area, label in loader:
        area, label = area.to(device), label.to(device)
        optimizer.zero_grad()
        output = model(area)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"[Epoch {epoch+1}] Loss: {total_loss:.4f}")

NameError: name 'loader' is not defined