In [None]:
import cv2
import numpy as np
from PIL import Image
import os
from glob import glob

input_dir = "./Plant_Disease_Dataset_Unified/train/images"
output_dir = "./Plant_Disease_Dataset_Cleaned/train/images"

os.makedirs(output_dir, exist_ok=True)

for class_folder in glob(f"{input_dir}/*"):
    class_name = os.path.basename(class_folder)
    os.makedirs(os.path.join(output_dir, class_name), exist_ok=True)

    for image_path in glob(f"{class_folder}/*.jpg"):
        image = cv2.imread(image_path)
        hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

        # Range for green leaves (tune if needed)
        lower = np.array([25, 40, 40])
        upper = np.array([85, 255, 255])

        mask = cv2.inRange(hsv, lower, upper)
        mask_inv = cv2.bitwise_not(mask)

        # Optional: remove small noise
        mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, np.ones((5,5), np.uint8))

        # Apply mask
        result = cv2.bitwise_and(image, image, mask=mask)

        # Save result
        out_path = os.path.join(output_dir, class_name, os.path.basename(image_path))
        Image.fromarray(cv2.cvtColor(result, cv2.COLOR_BGR2RGB)).save(out_path)


In [None]:
import cv2
import numpy as np
from PIL import Image
import os
from glob import glob
from tqdm import tqdm

input_dir = "./Plant_Disease_Dataset_Unified/train/images"
output_cleaned = "./Plant_Disease_Dataset_Cleaned/train/images"
output_comparison = "./Plant_Disease_Dataset_Cleaned/train/comparisons"

os.makedirs(output_cleaned, exist_ok=True)
os.makedirs(output_comparison, exist_ok=True)

class_folders = glob(f"{input_dir}/*")

for class_folder in tqdm(class_folders, desc="Processing classes"):
    class_name = os.path.basename(class_folder)
    class_output_path = os.path.join(output_cleaned, class_name)
    class_cmp_path = os.path.join(output_comparison, class_name)

    os.makedirs(class_output_path, exist_ok=True)
    os.makedirs(class_cmp_path, exist_ok=True)

    image_paths = glob(f"{class_folder}/*.jpg")
    
    for image_path in tqdm(image_paths, desc=f"  -> {class_name}", leave=False):
        img_name = os.path.basename(image_path)

        # Read image
        image = cv2.imread(image_path)
        hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

        # HSV range — broad to include green + yellow/brown
        lower = np.array([15, 30, 30])
        upper = np.array([100, 255, 255])
        mask = cv2.inRange(hsv, lower, upper)

        # Morphological cleaning
        kernel = np.ones((5, 5), np.uint8)
        mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
        mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)

        # Apply mask to original
        result = cv2.bitwise_and(image, image, mask=mask)

        # White background
        white_bg = np.full_like(image, 255)
        inv_mask = cv2.bitwise_not(mask)
        background = cv2.bitwise_and(white_bg, white_bg, mask=inv_mask)
        final = cv2.add(result, background)

        # Save cleaned
        cleaned_path = os.path.join(class_output_path, img_name)
        Image.fromarray(cv2.cvtColor(final, cv2.COLOR_BGR2RGB)).save(cleaned_path)

        # Create side-by-side comparison
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        final_rgb = cv2.cvtColor(final, cv2.COLOR_BGR2RGB)
        side_by_side = np.hstack((image_rgb, final_rgb))
        cmp_path = os.path.join(class_cmp_path, f"cmp_{img_name}")
        Image.fromarray(side_by_side).save(cmp_path)


In [None]:
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from torchvision import models, transforms
import torch
from tqdm import tqdm

# Load Pretrained DeepLabV3
def load_model():
    model = models.segmentation.deeplabv3_resnet101(pretrained=True)
    model.eval()
    return model

# Preprocess image
def preprocess_image(image_path):
    input_image = Image.open(image_path).convert("RGB")
    preprocess = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    return preprocess(input_image).unsqueeze(0), input_image

# Background removal using DeepLabV3
def remove_background(image_path, model):
    input_tensor, original_image = preprocess_image(image_path)
    with torch.no_grad():
        output = model(input_tensor)['out'][0]
    mask = output.argmax(0).byte().cpu().numpy()
    image_np = np.array(original_image)
    image_np[mask == 0] = 255  # white background
    return Image.fromarray(image_np), mask, original_image

# Save processed image
def save_processed_image(output_image, save_path):
    output_image.save(save_path)

# Save comparison image
def visualize_results(image_path, output_image, original_image, mask, comparison_save_path):
    fig, axes = plt.subplots(1, 2, figsize=(12, 6))
    axes[0].imshow(original_image)
    axes[0].set_title("Original")
    axes[0].axis("off")
    axes[1].imshow(output_image)
    axes[1].set_title("Background Removed")
    axes[1].axis("off")
    plt.tight_layout()
    plt.savefig(comparison_save_path)
    plt.close()

# Preserve original class structure
def create_class_structure(image_path, base_folder):
    class_name = os.path.normpath(image_path).split(os.sep)[-2]
    class_folder = os.path.join(base_folder, class_name)
    os.makedirs(class_folder, exist_ok=True)
    return class_folder

# Main processing function
def process_images_in_folder(image_folder, output_folder, comparison_folder):
    model = load_model()

    # Collect all image paths recursively
    image_paths = []
    for root, _, files in os.walk(image_folder):
        for f in files:
            if f.lower().endswith(('.jpg', '.png')):
                image_paths.append(os.path.join(root, f))

    print(f"[INFO] Found {len(image_paths)} images.")
    os.makedirs(output_folder, exist_ok=True)
    os.makedirs(comparison_folder, exist_ok=True)

    for img_path in tqdm(image_paths, desc="Processing images"):
        try:
            output_image, mask, original_image = remove_background(img_path, model)

            # Get class structure
            class_output_folder = create_class_structure(img_path, output_folder)
            class_comparison_folder = create_class_structure(img_path, comparison_folder)

            filename = os.path.basename(img_path)
            save_path = os.path.join(class_output_folder, filename)
            comparison_path = os.path.join(class_comparison_folder, f"comparison_{filename}")

            save_processed_image(output_image, save_path)
            visualize_results(img_path, output_image, original_image, mask, comparison_path)
        except Exception as e:
            print(f"[ERROR] Failed to process {img_path}: {e}")

# Example usage
image_folder = "./Plant_Disease_Dataset_Unified/train/images"  # Original images
output_folder = "./Processed_Images"  # Background removed images
comparison_folder = "./Comparison_Images"  # Side-by-side comparisons

process_images_in_folder(image_folder, output_folder, comparison_folder)


In [None]:
!pip install rembg

In [7]:
import os
from rembg import remove
from PIL import Image
from tqdm import tqdm

input_base = './Plant_Disease_Dataset_Unified/train/images'
output_base = './Plant_Disease_Dataset_Unified/train/images_nobg'
comparison_base = './Plant_Disease_Dataset_Unified/train/images_comparison'

os.makedirs(output_base, exist_ok=True)
os.makedirs(comparison_base, exist_ok=True)

# Count total images
total_images = sum(
    len([f for f in os.listdir(os.path.join(input_base, class_dir)) if f.lower().endswith(('.jpg', '.jpeg', '.png'))])
    for class_dir in os.listdir(input_base)
    if os.path.isdir(os.path.join(input_base, class_dir))
)

with tqdm(total=total_images, desc="Removing backgrounds", ncols=100) as pbar:
    for class_name in os.listdir(input_base):
        class_input_path = os.path.join(input_base, class_name)
        class_output_path = os.path.join(output_base, class_name)
        class_comparison_path = os.path.join(comparison_base, class_name)

        if not os.path.isdir(class_input_path):
            continue

        os.makedirs(class_output_path, exist_ok=True)
        os.makedirs(class_comparison_path, exist_ok=True)

        image_files = [f for f in os.listdir(class_input_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]

        for image_file in image_files:
            input_image_path = os.path.join(class_input_path, image_file)
            output_image_path = os.path.join(class_output_path, image_file)
            comparison_image_path = os.path.join(class_comparison_path, image_file)

            try:
                input_image = Image.open(input_image_path).convert("RGB")
                output_image = remove(input_image)

                ext = os.path.splitext(output_image_path)[1].lower()
                if ext in ['.jpg', '.jpeg']:
                    output_image.convert("RGB").save(output_image_path, format="JPEG")
                elif ext == '.png':
                    output_image.save(output_image_path, format="PNG")
                else:
                    output_image.save(output_image_path)

                # Comparison image
                comp_width = input_image.width + output_image.width
                comp_height = max(input_image.height, output_image.height)
                comparison = Image.new("RGB", (comp_width, comp_height))
                comparison.paste(input_image, (0, 0))
                comparison.paste(output_image, (input_image.width, 0))
                comparison.save(comparison_image_path)

            except Exception:
                pass

            pbar.update(1)


Removing backgrounds:   0%|                                   | 16/70295 [00:38<47:02:52,  2.41s/it]


KeyboardInterrupt: 