In [None]:
#data_food에 있는 train, val, test 데이터들을 일단 모두 train에 모으고 다시 0.6, 0.2, 0.2 비율로 이미지와 라벨데이터를 쌍으로 분배

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import shutil
import random

def move_files(file_pairs, dest_image_dir, dest_label_dir):
    if not os.path.exists(dest_image_dir):
        os.makedirs(dest_image_dir)
    if not os.path.exists(dest_label_dir):
        os.makedirs(dest_label_dir)

    total_files = len(file_pairs)
    for idx, (img_path, lbl_path) in enumerate(file_pairs, 1):
        shutil.move(img_path, os.path.join(dest_image_dir, os.path.basename(img_path)))
        shutil.move(lbl_path, os.path.join(dest_label_dir, os.path.basename(lbl_path)))
        percent_complete = (idx / total_files) * 100
        print(f'Moved: {img_path} -> {dest_image_dir} ({percent_complete:.2f}%)')
        print(f'Moved: {lbl_path} -> {dest_label_dir} ({percent_complete:.2f}%)')

def gather_all_files(image_dirs, label_dirs, target_image_dir, target_label_dir):
    if not os.path.exists(target_image_dir):
        os.makedirs(target_image_dir)
    if not os.path.exists(target_label_dir):
        os.makedirs(target_label_dir)

    file_pairs = []
    for img_dir, lbl_dir in zip(image_dirs, label_dirs):
        if os.path.exists(img_dir) and os.path.exists(lbl_dir):
            image_files = {os.path.splitext(f)[0]: os.path.join(img_dir, f) for f in os.listdir(img_dir) if os.path.isfile(os.path.join(img_dir, f))}
            label_files = {os.path.splitext(f)[0]: os.path.join(lbl_dir, f) for f in os.listdir(lbl_dir) if os.path.isfile(os.path.join(lbl_dir, f))}

            common_files = set(image_files.keys()) & set(label_files.keys())  # 같은 이름을 가진 파일 찾기

            for file_name in common_files:
                shutil.move(image_files[file_name], os.path.join(target_image_dir, os.path.basename(image_files[file_name])))
                shutil.move(label_files[file_name], os.path.join(target_label_dir, os.path.basename(label_files[file_name])))
                file_pairs.append((os.path.join(target_image_dir, os.path.basename(image_files[file_name])),
                                   os.path.join(target_label_dir, os.path.basename(label_files[file_name]))))
    return file_pairs

def split_and_move(file_pairs, train_ratio, test_ratio, val_ratio, train_img_dest, test_img_dest, val_img_dest, train_lbl_dest, test_lbl_dest, val_lbl_dest, seed=42):
    random.seed(seed)
    random.shuffle(file_pairs)

    train_split = int(len(file_pairs) * train_ratio)
    test_split = train_split + int(len(file_pairs) * test_ratio)

    train_files = file_pairs[:train_split]
    test_files = file_pairs[train_split:test_split]
    val_files = file_pairs[test_split:]

    move_files(train_files, train_img_dest, train_lbl_dest)
    move_files(test_files, test_img_dest, test_lbl_dest)
    move_files(val_files, val_img_dest, val_lbl_dest)

def restore_data():
    base_dir = "/content/drive/MyDrive/Bigproject/data_food"
    print("Restoring all files to train folder...")

    for subset in ["test", "val"]:
        for category in ["images", "labels"]:
            src_dir = os.path.join(base_dir, category, subset)
            dest_dir = os.path.join(base_dir, category, "train")

            if os.path.exists(src_dir):
                files = os.listdir(src_dir)
                total_files = len(files)
                for idx, file_name in enumerate(files, 1):
                    src_path = os.path.join(src_dir, file_name)
                    dest_path = os.path.join(dest_dir, file_name)
                    shutil.move(src_path, dest_path)
                    percent_complete = (idx / total_files) * 100
                    print(f"Moved: {src_path} -> {dest_path} ({percent_complete:.2f}%)")
    print("All files are now in the train folder.")

def process_data(seed=42):
    restore_data()
    image_dirs = [
        "/content/drive/MyDrive/Bigproject/data_food/images/train",
        "/content/drive/MyDrive/Bigproject/data_food/images/test",
        "/content/drive/MyDrive/Bigproject/data_food/images/val"
    ]
    label_dirs = [
        "/content/drive/MyDrive/Bigproject/data_food/labels/train",
        "/content/drive/MyDrive/Bigproject/data_food/labels/test",
        "/content/drive/MyDrive/Bigproject/data_food/labels/val"
    ]

    target_image_dir = "/content/drive/MyDrive/Bigproject/data_food/images/train"
    target_label_dir = "/content/drive/MyDrive/Bigproject/data_food/labels/train"

    file_pairs = gather_all_files(image_dirs, label_dirs, target_image_dir, target_label_dir)

    split_and_move(
        file_pairs, 0.6, 0.2, 0.2,
        "/content/drive/MyDrive/Bigproject/data_food/images/train",
        "/content/drive/MyDrive/Bigproject/data_food/images/test",
        "/content/drive/MyDrive/Bigproject/data_food/images/val",
        "/content/drive/MyDrive/Bigproject/data_food/labels/train",
        "/content/drive/MyDrive/Bigproject/data_food/labels/test",
        "/content/drive/MyDrive/Bigproject/data_food/labels/val",
        seed
    )
    print("Data processing completed!")

process_data(seed=42)


[1;30;43m스트리밍 출력 내용이 길어서 마지막 5000줄이 삭제되었습니다.[0m
Moved: /content/drive/MyDrive/Bigproject/data_food/labels/train/IMG_8144_jpg.rf.8155509f0dd28543e54df1a55596a114.txt -> /content/drive/MyDrive/Bigproject/data_food/labels/val (44.69%)
Moved: /content/drive/MyDrive/Bigproject/data_food/images/train/IMG_4375_JPG.rf.88014010c587ceedc6fd4f3791df93b5.jpg -> /content/drive/MyDrive/Bigproject/data_food/images/val (44.71%)
Moved: /content/drive/MyDrive/Bigproject/data_food/labels/train/IMG_4375_JPG.rf.88014010c587ceedc6fd4f3791df93b5.txt -> /content/drive/MyDrive/Bigproject/data_food/labels/val (44.71%)
Moved: /content/drive/MyDrive/Bigproject/data_food/images/train/IMG20231129152331_halfbright_jpg.rf.d4569d21b38eb89916c953a1db7fce51.jpg -> /content/drive/MyDrive/Bigproject/data_food/images/val (44.73%)
Moved: /content/drive/MyDrive/Bigproject/data_food/labels/train/IMG20231129152331_halfbright_jpg.rf.d4569d21b38eb89916c953a1db7fce51.txt -> /content/drive/MyDrive/Bigproject/data_food/labels/val

In [None]:
ㅁㅁ

In [None]:
import os
import shutil
import random

def move_files(src_files, dest_dir):
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)

    for file_path in src_files:
        dest_path = os.path.join(dest_dir, os.path.basename(file_path))
        shutil.move(file_path, dest_path)
        print(f'Moved: {file_path} -> {dest_path}')

def gather_all_files(source_dirs, target_dir):
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)

    all_files = []
    for src_dir in source_dirs:
        if os.path.exists(src_dir):
            for file_name in os.listdir(src_dir):
                file_path = os.path.join(src_dir, file_name)
                if os.path.isfile(file_path):
                    shutil.move(file_path, os.path.join(target_dir, file_name))
                    all_files.append(os.path.join(target_dir, file_name))
    return all_files

def split_and_move(files, train_ratio, test_ratio, val_ratio, train_dest, test_dest, val_dest, seed=42):
    random.seed(seed)
    shuffled_files = files.copy()
    random.shuffle(shuffled_files)

    train_split = int(len(shuffled_files) * train_ratio)
    test_split = train_split + int(len(shuffled_files) * test_ratio)

    train_files = shuffled_files[:train_split]
    test_files = shuffled_files[train_split:test_split]
    val_files = shuffled_files[test_split:]

    move_files(train_files, train_dest)
    move_files(test_files, test_dest)
    move_files(val_files, val_dest)

def process_data(seed=42):
    image_dirs = [
        "/content/drive/MyDrive/Bigproject/data_food/images/train",
        "/content/drive/MyDrive/Bigproject/data_food/images/test",
        "/content/drive/MyDrive/Bigproject/data_food/images/val"
    ]
    label_dirs = [
        "/content/drive/MyDrive/Bigproject/data_food/labels/train",
        "/content/drive/MyDrive/Bigproject/data_food/labels/test",
        "/content/drive/MyDrive/Bigproject/data_food/labels/val"
    ]

    target_image_dir = "/content/drive/MyDrive/Bigproject/data_food/images/train"
    target_label_dir = "/content/drive/MyDrive/Bigproject/data_food/labels/train"

    all_images = gather_all_files(image_dirs, target_image_dir)
    all_labels = gather_all_files(label_dirs, target_label_dir)

    split_and_move(
        all_images, 0.6, 0.2, 0.2,
        "/content/drive/MyDrive/Bigproject/data_food/images/train",
        "/content/drive/MyDrive/Bigproject/data_food/images/test",
        "/content/drive/MyDrive/Bigproject/data_food/images/val",
        seed
    )

    split_and_move(
        all_labels, 0.6, 0.2, 0.2,
        "/content/drive/MyDrive/Bigproject/data_food/labels/train",
        "/content/drive/MyDrive/Bigproject/data_food/labels/test",
        "/content/drive/MyDrive/Bigproject/data_food/labels/val",
        seed
    )

process_data(seed=42)
print("Data processing completed!")


[1;30;43m스트리밍 출력 내용이 길어서 마지막 5000줄이 삭제되었습니다.[0m
Moved: /content/drive/MyDrive/Bigproject/data_food/labels/train/IMG20231129143443_jpg.rf.f3826bf98f9042d712655415a5206744.txt -> /content/drive/MyDrive/Bigproject/data_food/labels/test/IMG20231129143443_jpg.rf.f3826bf98f9042d712655415a5206744.txt
Moved: /content/drive/MyDrive/Bigproject/data_food/labels/train/IMG_9153_halfbright_jpg.rf.4a510d61ca285bcf4ee752dcae87d3a6.txt -> /content/drive/MyDrive/Bigproject/data_food/labels/test/IMG_9153_halfbright_jpg.rf.4a510d61ca285bcf4ee752dcae87d3a6.txt
Moved: /content/drive/MyDrive/Bigproject/data_food/labels/train/IMG20231129131705_halfbright_jpg.rf.2a78cd5ce94860e7f5f95d1a208d0b60.txt -> /content/drive/MyDrive/Bigproject/data_food/labels/test/IMG20231129131705_halfbright_jpg.rf.2a78cd5ce94860e7f5f95d1a208d0b60.txt
Moved: /content/drive/MyDrive/Bigproject/data_food/labels/train/IMG20231129143544_jpg.rf.53eb2b6a9312446003b6f90a26e3b9ca.txt -> /content/drive/MyDrive/Bigproject/data_food/labels/tes

In [None]:
ㅁㅁ

NameError: name 'ᄆᄆ' is not defined

In [None]:
import os
import shutil
import random

def move_files(src_files, dest_dir):
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)

    for file_path in src_files:
        dest_path = os.path.join(dest_dir, os.path.basename(file_path))
        shutil.move(file_path, dest_path)
        print(f'Moved: {file_path} -> {dest_path}')

def gather_all_files(source_dirs, target_dir):
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)

    all_files = []
    for src_dir in source_dirs:
        if os.path.exists(src_dir):
            for file_name in os.listdir(src_dir):
                file_path = os.path.join(src_dir, file_name)
                if os.path.isfile(file_path):
                    shutil.move(file_path, os.path.join(target_dir, file_name))
                    all_files.append(os.path.join(target_dir, file_name))
    return all_files

def split_and_move(files, train_ratio, test_ratio, val_ratio, train_dest, test_dest, val_dest, seed=42):
    random.seed(seed)
    random.shuffle(files)

    train_split = int(len(files) * train_ratio)
    test_split = train_split + int(len(files) * test_ratio)

    train_files = files[:train_split]
    test_files = files[train_split:test_split]
    val_files = files[test_split:]

    move_files(train_files, train_dest)
    move_files(test_files, test_dest)
    move_files(val_files, val_dest)

def process_data():
    image_dirs = [
        "/content/drive/MyDrive/Bigproject/data_food/images/train",
        "/content/drive/MyDrive/Bigproject/data_food/images/test",
        "/content/drive/MyDrive/Bigproject/data_food/images/val"
    ]
    label_dirs = [
        "/content/drive/MyDrive/Bigproject/data_food/labels/train",
        "/content/drive/MyDrive/Bigproject/data_food/labels/test",
        "/content/drive/MyDrive/Bigproject/data_food/labels/val"
    ]

    target_image_dir = "/content/drive/MyDrive/Bigproject/data_food/images/train"
    target_label_dir = "/content/drive/MyDrive/Bigproject/data_food/labels/train"

    all_images = gather_all_files(image_dirs, target_image_dir)
    all_labels = gather_all_files(label_dirs, target_label_dir)

    split_and_move(
        all_images, 0.6, 0.2, 0.2,
        "/content/drive/MyDrive/Bigproject/data_food/images/train",
        "/content/drive/MyDrive/Bigproject/data_food/images/test",
        "/content/drive/MyDrive/Bigproject/data_food/images/val"
    )

    split_and_move(
        all_labels, 0.6, 0.2, 0.2,
        "/content/drive/MyDrive/Bigproject/data_food/labels/train",
        "/content/drive/MyDrive/Bigproject/data_food/labels/test",
        "/content/drive/MyDrive/Bigproject/data_food/labels/val"
    )

process_data()
print("Data processing completed!")
