In [3]:
import json
import os
import shutil
from PIL import Image

# padding 후 resize를 수행하는 함수
def resize_with_padding(image, desired_size=256, padding_color=(0, 0, 0)):
    width, height = image.size
    new_size = max(width, height)
    # 새로운 정사각형 이미지 생성 (padding_color: 검정)
    new_img = Image.new("RGB", (new_size, new_size), padding_color)
    offset_x = (new_size - width) // 2
    offset_y = (new_size - height) // 2
    new_img.paste(image, (offset_x, offset_y))
    # 최종 사이즈로 리사이즈
    resized_img = new_img.resize((desired_size, desired_size))
    return resized_img

# 미리 정의한 그룹핑 mapping (person 등 제외)
group_mapping = {
    # Food1 (4개)
    "banana": ("Food1", "banana"),
    "carrot": ("Food1", "carrot"),
    "broccoli": ("Food1", "broccoli"),
    "donut": ("Food1", "donut"),
    
    # Food2 (4개)
    "orange": ("Food2", "orange"),
    "cake": ("Food2", "cake"),
    "apple": ("Food2", "apple"),
    "pizza": ("Food2", "pizza"),
    
    # Animal1 (4개)
    "bird": ("Animal1", "bird"),
    "sheep": ("Animal1", "sheep"),
    "cow": ("Animal1", "cow"),
    "horse": ("Animal1", "horse"),
    
    # Animal2 (4개)
    "elephant": ("Animal2", "elephant"),
    "dog": ("Animal2", "dog"),
    "zebra": ("Animal2", "zebra"),
    "giraffe": ("Animal2", "giraffe"),
    
    # Furniture (4개)
    "chair": ("Furniture", "chair"),
    "dining table": ("Furniture", "dining table"),
    "potted plant": ("Furniture", "potted plant"),
    "couch": ("Furniture", "couch"),
    
    # Electronic (4개)
    "cell phone": ("Electronic", "cell phone"),
    "tv": ("Electronic", "tv"),
    "remote": ("Electronic", "remote"),
    "laptop": ("Electronic", "laptop"),
    
    # Kitchen (4개)
    "bottle": ("Kitchen", "bottle"),
    "cup": ("Kitchen", "cup"),
    "bowl": ("Kitchen", "bowl"),
    "wine glass": ("Kitchen", "wine glass"),
    
    # Vehicle1 (4개)
    "car": ("Vehicle1", "car"),
    "motorcycle": ("Vehicle1", "motorcycle"),
    "boat": ("Vehicle1", "boat"),
    "truck": ("Vehicle1", "truck"),
    
    # Vehicle2 (4개)
    "bicycle": ("Vehicle2", "bicycle"),
    "bus": ("Vehicle2", "bus"),
    "airplane": ("Vehicle2", "airplane"),
    "train": ("Vehicle2", "train"),
    
    # Outdoor (4개)
    "traffic light": ("Outdoor", "traffic light"),
    "bench": ("Outdoor", "bench"),
    "stop sign": ("Outdoor", "stop sign"),
    "fire hydrant": ("Outdoor", "fire hydrant"),
    
    # Accessory (4개)
    "handbag": ("Accessory", "handbag"),
    "umbrella": ("Accessory", "umbrella"),
    "backpack": ("Accessory", "backpack"),
    "tie": ("Accessory", "tie"),
    
    # Sports1 (4개)
    "kite": ("Sports1", "kite"),
    "skis": ("Sports1", "skis"),
    "sports ball": ("Sports1", "sports ball"),
    "surfboard": ("Sports1", "surfboard"),
    
    # Sports2 (4개)
    "skateboard": ("Sports2", "skateboard"),
    "tennis racket": ("Sports2", "tennis racket"),
    "baseball glove": ("Sports2", "baseball glove"),
    "baseball bat": ("Sports2", "baseball bat"),
    
    # Appliance (4개)
    "sink": ("Appliance", "sink"),
    "oven": ("Appliance", "oven"),
    "refrigerator": ("Appliance", "refrigerator"),
    "microwave": ("Appliance", "microwave"),
    
    # Indoor (4개)
    "book": ("Indoor", "book"),
    "base": ("Indoor", "base"),
    "clock": ("Indoor", "clock"),
    "teddy bear": ("Indoor", "teddy bear"),
}

# 데이터셋 분할 (MSCOCO는 train과 val로 나누어져 있음)
datasets = {
    "train": {
        "image_dir": "/data_library/mscoco/train/",
        "annotation_path": "/data_library/mscoco/annotations/instances_train2017.json",
        "caption_path": "/data_library/mscoco/annotations/captions_train2017.json"
    },
    "val": {
        "image_dir": "/data_library/mscoco/val/",
        "annotation_path": "/data_library/mscoco/annotations/instances_val2017.json",
        "caption_path": "/data_library/mscoco/annotations/captions_val2017.json"
    }
}

# 각 split별로 처리
for split_name, paths in datasets.items():
    image_dir = paths["image_dir"]
    annotation_path = paths["annotation_path"]
    caption_path = paths["caption_path"]
    
    # COCO annotation 및 caption 파일 로드
    with open(annotation_path, 'r') as f:
        coco_data = json.load(f)
    with open(caption_path, 'r') as f:
        caption_data = json.load(f)
    
    # category id -> 이름 매핑 생성
    category_mapping = {cat['id']: cat['name'] for cat in coco_data['categories']}
    
    # 이미지별로 가장 큰 객체(annotation) 선택 (bbox 면적 기준)
    annotations = {}
    for annotation in coco_data['annotations']:
        image_id = annotation['image_id']
        category_id = annotation['category_id']
        bbox = annotation['bbox']  # [x, y, width, height]
        area = bbox[2] * bbox[3]
        if image_id not in annotations or area > annotations[image_id][1]:
            annotations[image_id] = (category_mapping.get(category_id, "Unknown"), area)
    
    # 이미지별 첫 번째 캡션 저장 (불필요한 문자 제거)
    captions = {}
    for caption in caption_data['annotations']:
        image_id = caption['image_id']
        if image_id not in captions:
            captions[image_id] = []
        clean_caption = caption['caption'].replace("\"", "").replace(",", " ").replace("\n", " ")
        captions[image_id].append(clean_caption)
    
    # 해당 split 내 이미지 파일 리스트 ('.jpg' 파일만)
    image_files = sorted([f for f in os.listdir(image_dir) if f.endswith('.jpg')])
    
    # 현재 작업 디렉토리에 image/<split> 와 text/<split> 폴더 생성
    base_img_dir = os.path.join(os.getcwd(), "image", split_name)
    base_txt_dir = os.path.join(os.getcwd(), "text", split_name)
    os.makedirs(base_img_dir, exist_ok=True)
    os.makedirs(base_txt_dir, exist_ok=True)
    
    # 각 이미지에 대해 그룹핑 및 전처리 수행
    for image_file in image_files:
        # 파일명에서 image_id 추출 (예: "000000123456.jpg")
        image_id = int(image_file.lstrip("0").split(".")[0])
        
        # 해당 이미지의 라벨과 첫 번째 캡션 가져오기
        label = annotations.get(image_id, ("Unknown", 0))[0]
        caption_text = captions.get(image_id, ["No caption available"])[0]
        
        # label이 "Unknown"이거나 미리 정의한 mapping에 없으면 건너뜀 (예: person 등)
        if label == "Unknown" or label not in group_mapping:
            continue
        
        group, subclass = group_mapping[label]
        
        # 그룹 및 서브클래스에 해당하는 폴더 생성
        dest_img_dir = os.path.join(base_img_dir, group, subclass)
        dest_txt_dir = os.path.join(base_txt_dir, group, subclass)
        os.makedirs(dest_img_dir, exist_ok=True)
        os.makedirs(dest_txt_dir, exist_ok=True)
        
        src_image_path = os.path.join(image_dir, image_file)
        dest_image_path = os.path.join(dest_img_dir, image_file)
        
        # 이미지를 열고 padding 후 256x256으로 resize 후 저장
        try:
            with Image.open(src_image_path) as img:
                img_converted = img.convert("RGB")
                processed_img = resize_with_padding(img_converted, desired_size=256)
                processed_img.save(dest_image_path)
        except Exception as e:
            print(f"Error processing image {src_image_path}: {e}")
            continue
        
        # 캡션을 txt 파일로 저장 (이미지 파일명과 동일, 확장자는 .txt)
        base_filename = os.path.splitext(image_file)[0]
        txt_filename = base_filename + ".txt"
        dest_txt_path = os.path.join(dest_txt_dir, txt_filename)
        with open(dest_txt_path, 'w', encoding='utf-8') as f:
            f.write(caption_text)
    
    print(f"{split_name} 데이터셋 그룹핑 및 전처리 완료!")


train 데이터셋 그룹핑 및 전처리 완료!
val 데이터셋 그룹핑 및 전처리 완료!


In [4]:
from PIL import Image
import os

# 전처리된 이미지들이 저장된 최상위 디렉토리 (예: image 폴더)
image_root_dir = "/data_library/mscoco/image/"

unique_sizes = set()

# image 폴더를 재귀적으로 탐색
for root, dirs, files in os.walk(image_root_dir):
    for file in files:
        # 이미지 파일 확장자 확인 (필요에 따라 추가 가능)
        if file.lower().endswith((".jpg", ".jpeg", ".png")):
            image_path = os.path.join(root, file)
            try:
                with Image.open(image_path) as img:
                    unique_sizes.add(img.size)  # img.size는 (width, height)
            except Exception as e:
                print(f"이미지 열기 실패: {image_path}, 에러: {e}")

# unique한 사이즈 리스트로 변환 후 20개 출력
unique_sizes_list = list(unique_sizes)
print("Unique image sizes (최대 20개):")
for size in unique_sizes_list[:20]:
    print(size)


Unique image sizes (최대 20개):
(256, 256)
