### 01. MP4 -> Image

In [3]:
'''
    mp4 to image
'''

import os
import cv2
import shutil
from tqdm import tqdm

# 동영상 경로와 저장 경로 설정
name = 'car3'
video_path = f"dataset/video/{name}.mp4"
save_dir = f"dataset/image/{name}"

# 저장 경로가 없으면 생성
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

def get_frame_from_video(video_path, save_dir):
    # 동영상 파일 열기
    video = cv2.VideoCapture(video_path)
    if not video.isOpened():
        raise Exception("Video load error")

    len_video = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = int(video.get(cv2.CAP_PROP_FPS))

    # 저장할 폴더 설정
    images_save_folder = save_dir
    if os.path.exists(images_save_folder):
        shutil.rmtree(images_save_folder)
    os.makedirs(images_save_folder)
    
    # 비디오 프레임 저장
    count = 0
    success = True
    with tqdm(total=len_video) as pbar:
        while success:
            success, image = video.read()
            if not success:
                break
            save_idx = str(count + 1).zfill(5)
            save_image_path = os.path.join(images_save_folder, f"frame_{save_idx}.jpg")
            cv2.imwrite(save_image_path, image)
            count += 1
            pbar.update(1)
    video.release()
    print("Success!")

# 함수 호출
get_frame_from_video(video_path, save_dir)

100%|█████████▉| 522/523 [00:02<00:00, 184.44it/s]

Success!





### 02. 각 폴더 이미지 개수 맞추기 

In [4]:
'''
    각 폴더의 초기 이미지 개수를 출력
    모든 폴더에서 이미지 파일 개수를 가장 작은 폴더에 맞추어 조정
    조정 후 각 폴더의 최종 이미지 개수를 출력
'''

import os
import random

# 기본 경로 설정
base_path = "dataset/image"

def get_image_files(folder_path):
    return [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]

def equalize_image_counts(base_path):
    folder_paths = [os.path.join(base_path, folder) for folder in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, folder))]
    image_counts = {folder: len(get_image_files(folder)) for folder in folder_paths}

    # 각 폴더의 초기 이미지 개수 출력
    print("Initial image counts:")
    for folder, count in image_counts.items():
        print(f"{folder}: {count} images")

    # 가장 적은 이미지 개수 찾기
    min_count = min(image_counts.values())

    # 각 폴더의 이미지 개수를 min_count에 맞추기
    for folder, count in image_counts.items():
        if count > min_count:
            image_files = get_image_files(folder)
            images_to_delete = random.sample(image_files, count - min_count)
            for image in images_to_delete:
                os.remove(os.path.join(folder, image))
            print(f"\nFolder: {folder} - Deleted {count - min_count} images")
        else:
            print(f"\nFolder: {folder} - No images deleted")

    # 각 폴더의 최종 이미지 개수 출력
    final_image_counts = {folder: len(get_image_files(folder)) for folder in folder_paths}
    print("\nFinal image counts:")
    for folder, count in final_image_counts.items():
        print(f"{folder}: {count} images")

# 함수 호출
equalize_image_counts(base_path)


Initial image counts:
dataset/image/car2: 481 images
dataset/image/car1: 597 images
dataset/image/car3: 522 images

Folder: dataset/image/car2 - No images deleted

Folder: dataset/image/car1 - Deleted 116 images

Folder: dataset/image/car3 - Deleted 41 images

Final image counts:
dataset/image/car2: 481 images
dataset/image/car1: 481 images
dataset/image/car3: 481 images


### 03. Train / Valid / Test

In [5]:
import os
import shutil
import random

# 데이터셋 경로 설정
dataset_dir = "dataset/image"
train_dir = "dataset/v1/train"
valid_dir = "dataset/v1/valid"
test_dir = "dataset/v1/test"

# 디렉토리 생성 함수
def create_dir(dir_path):
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

# 디렉토리 생성
create_dir(train_dir)
create_dir(valid_dir)
create_dir(test_dir)

# 각 클래스별로 데이터셋 분할 및 이미지 개수 출력
for class_folder in os.listdir(dataset_dir):
    class_path = os.path.join(dataset_dir, class_folder)
    if os.path.isdir(class_path):
        images = os.listdir(class_path)
        random.shuffle(images)
        
        train_split = int(0.6 * len(images))
        valid_split = int(0.8 * len(images))
        
        train_images = images[:train_split]
        valid_images = images[train_split:valid_split]
        test_images = images[valid_split:]
        
        # 각 데이터셋에 해당하는 디렉토리 생성
        create_dir(os.path.join(train_dir, class_folder))
        create_dir(os.path.join(valid_dir, class_folder))
        create_dir(os.path.join(test_dir, class_folder))
        
        # 이미지 파일 이동
        for img in train_images:
            shutil.move(os.path.join(class_path, img), os.path.join(train_dir, class_folder, img))
        for img in valid_images:
            shutil.move(os.path.join(class_path, img), os.path.join(valid_dir, class_folder, img))
        for img in test_images:
            shutil.move(os.path.join(class_path, img), os.path.join(test_dir, class_folder, img))
        
        # 각 데이터셋의 이미지 개수 출력
        print(f"Class '{class_folder}' - Train: {len(train_images)}, Valid: {len(valid_images)}, Test: {len(test_images)}")



Class 'car2' - Train: 288, Valid: 96, Test: 97
Class 'car1' - Train: 288, Valid: 96, Test: 97
Class 'car3' - Train: 288, Valid: 96, Test: 97
