In [1]:
import os
from pathlib import Path
from rembg import remove
from PIL import Image
from helpers import *

In [2]:
classes = ['bream', 'dusky_flathead', 'silver_trevally', 'snapper', 'yellowtail_amberjack'] 

In [3]:
data_dir = 'data'
processed_data_dir = 'processed_data'

In [4]:
allowed_image_exts = {'.jpeg', '.jpg', '.png'}
min_image_size_kb = 5
max_image_size_kb = 10000 # 10 mb

In [5]:
def process_class_dir(class_dir_path, 
                      processed_class_dir_path, 
                      allowed_image_exts, 
                      min_image_size_kb, 
                      max_image_size_kb):
    
    for root, dirs, files in os.walk(class_dir_path):
        if dirs:
            print("Only files in data class directories")
            return False
        if not files:
            print(f"No files found in {root}")
            return False
        else:
            process_files(files, processed_class_dir_path, root, allowed_image_exts, min_image_size_kb, max_image_size_kb)
            return True

def process_class_data_dirs(data_dir, 
                            processed_data_dir, 
                            classes, 
                            allowed_image_exts, 
                            min_image_size_kb, 
                            max_image_size_kb):
    
    if not is_existing_dir(data_dir):
        print(f"Data directory not found")
        return False
    
    Path(processed_data_dir).mkdir(parents=True, exist_ok=True)
    
    for class_name in classes:

        class_dir_path = os.path.join(data_dir, class_name)
        
        if not is_existing_dir(class_dir_path):
            print(f"There is no directory containing data for class: {class_name}.")
            continue

        processed_class_dir_path = os.path.join(processed_data_dir, "processed_" + class_name)
        Path(processed_class_dir_path).mkdir(parents=True, exist_ok=True)
        
        print(f"Processing class directory: {class_name}")
        process_class_dir(class_dir_path, processed_class_dir_path, allowed_image_exts, min_image_size_kb, max_image_size_kb)
    
    print("Data processed")
    
    return True


In [6]:
process_class_data_dirs(data_dir, processed_data_dir, classes, allowed_image_exts, min_image_size_kb, max_image_size_kb)  

Processing class directory: bream
Processing file: data/bream/bream1.jpeg
Output file already exists: processed_data/processed_bream/no_bg_bream1.png. Skipping process.
Processing file: data/bream/bream2.jpeg
Output file already exists: processed_data/processed_bream/no_bg_bream2.png. Skipping process.
Processing file: data/bream/bream3.jpeg
Output file already exists: processed_data/processed_bream/no_bg_bream3.png. Skipping process.
Processing file: data/bream/bream4.jpeg
Output file already exists: processed_data/processed_bream/no_bg_bream4.png. Skipping process.
Processing file: data/bream/bream5.jpeg
Output file already exists: processed_data/processed_bream/no_bg_bream5.png. Skipping process.
Processing class directory: dusky_flathead
Processing file: data/dusky_flathead/dusky_flathead2.jpeg
Output file already exists: processed_data/processed_dusky_flathead/no_bg_dusky_flathead2.png. Skipping process.
Processing file: data/dusky_flathead/dusky_flathead3.jpeg
Output file already

True