In [None]:
import os
from PIL import Image
from rembg import remove
from io import BytesIO
import numpy as np


In [None]:
BASE_DIR = "../resources"
OUTPUT_DIR = "../resources/final_dataset"

DATASET_1 = os.path.join(BASE_DIR, "garbage-classification-v2")
DATASET_2 = os.path.join(BASE_DIR, "garbage-dataset-classification/Garbage_Dataset_Classification/images")
DATASET_3 = os.path.join(BASE_DIR, "garbage-detection/GARBAGE CLASSIFICATION")

# Classes from dataset 1 (source of truth)
CLASSES = [
    "battery", "biological", "cardboard", "clothes", "glass",
    "metal", "paper", "plastic", "shoes", "trash"
]

def ensure_dirs():
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    for cls in CLASSES:
        os.makedirs(os.path.join(OUTPUT_DIR, cls), exist_ok=True)

ensure_dirs()

In [None]:
from numba import njit
import math

@njit
def is_gray_numba(r: float, g: float, b: float, tolerance: float = 10.0) -> bool:
    mean = (r + g + b) / 3
    return abs(r - mean) <= tolerance and abs(mean - b) <= tolerance and abs(g - mean) <= tolerance

def is_uniform_background(img: Image.Image, median_threshold=240, resize=50):

    img_small = img.resize((resize, resize))
    arr = np.array(img_small.convert("RGB"))

    medians = np.quantile(arr, q=0.7, axis=(0, 1))  # median for R, G, B
    return is_gray_numba(*medians)


def remove_background_if_uniform(img_path, alpha_matting=True):
    """
    Removes background only if it is uniform.
    Returns RGBA PIL image.
    """
    img = Image.open(img_path)
    
    if is_uniform_background(img, median_threshold=170):
        # Background is uniform â†’ remove
        with open(img_path, "rb") as f:
            input_bytes = f.read()
        output_bytes = remove(
            input_bytes,
            alpha_matting=alpha_matting,
            alpha_matting_foreground_threshold=100,
            alpha_matting_background_threshold=15,
            alpha_matting_erode_size=4,
            alpha_matting_base_size=2000

        )
        return Image.open(BytesIO(output_bytes)).convert("RGBA")
    else:
        # Background is complex â†’ keep original
        return img.convert("RGBA")


def process_and_copy(src_path, class_name, prefix, output_dir):
    """
    Processes one image (removes background if uniform) and saves to target folder.
    """
    try:
        dst_name = f"{prefix}.png"
        dst_path = os.path.join(output_dir, class_name, dst_name)
        if os.path.exists(dst_path):
            return
        img = remove_background_if_uniform(src_path)
        
        # Construct destination path
        os.makedirs(os.path.join(output_dir, class_name), exist_ok=True)
        print('saving')

        img.save(dst_path)
    except Exception as e:
        print(f"Skipping {src_path}: {e}")


In [None]:
examples = ['resources/garbage-classification-v2/paper/202512-paper1.jpg',
            'resources/garbage-classification-v2/paper/202512-paper31.jpg',
            'resources/garbage-classification-v2/paper/202512-paper48.jpg',
            'resources/garbage-classification-v2/metal/202512-metal4.jpg',
            'resources/garbage-classification-v2/metal/202512-metal46.jpg',
            'resources/garbage-classification-v2/clothes/clothes_2.jpg',
            'resources/garbage-classification-v2/clothes/clothes_23.jpg',
            'resources/garbage-classification-v2/clothes/clothes_32.jpg',
            'resources/garbage-classification-v2/clothes/clothes_46.jpg',
            'resources/garbage-classification-v2/biological/biological_8.jpg',
            'resources/garbage-classification-v2/biological/biological_27.jpg',
            'resources/garbage-classification-v2/battery/battery_1.jpg',
            'resources/garbage-classification-v2/battery/battery_5.jpg',
            'resources/garbage-detection/GARBAGE CLASSIFICATION/train/images/biodegradable98_jpg.rf.f54eef9954e1d5c7d23707653769a097.jpg',
            'resources/garbage-classification-v2/biological/biological_5.jpg',
            ]

In [None]:
import matplotlib.pyplot as plt

for example in examples:
    name = f"../{example}"
    plt.imshow(Image.open(name))
    plt.show()

    bg_removed = remove_background_if_uniform(name).convert("RGBA")
    plt.imshow(np.array(bg_removed))
    ax = plt.gca()
    ax.set_facecolor('xkcd:salmon')
    plt.show()

In [None]:
def ingest_dataset_1():
    for cls in CLASSES:
        class_dir = os.path.join(DATASET_1, cls)
        print(class_dir)
        if not os.path.isdir(class_dir):
            continue
        for i,fname in enumerate(os.listdir(class_dir)):
            process_and_copy(os.path.join(class_dir, fname), cls, f"ds1_{i}_", OUTPUT_DIR)


def ingest_dataset_2():
    for cls in CLASSES:
        class_dir = os.path.join(DATASET_2, cls)
        print(class_dir)
        if not os.path.isdir(class_dir):
            continue
        for i,fname in enumerate(os.listdir(class_dir)):
            process_and_copy(os.path.join(class_dir, fname), cls, f"ds2_{i}_", OUTPUT_DIR)


def extract_class_from_filename(filename):
    lower = filename.lower()
    if lower.startswith("biodegradable"):
        return 'biological'
    for cls in CLASSES:
        if lower.startswith(cls):
            return cls
    
    return None


def ingest_dataset_3():
    start = 0
    for split in ["train", "test", "valid"]:
        print(split)
        img_dir = os.path.join(DATASET_3, split, "images")
        if not os.path.isdir(img_dir):
            continue
        for i,fname in enumerate(sorted(os.listdir(img_dir)), start=start):
            print(i, fname)
            cls = extract_class_from_filename(fname)
            if cls is None:
                continue
            process_and_copy(os.path.join(img_dir, fname), cls, f"ds3_{i}_", OUTPUT_DIR)
        start = i




In [None]:
ingest_dataset_1()

In [None]:
ingest_dataset_2()

In [None]:
ingest_dataset_3()