In [1]:
import os
import cv2
import numpy as np
from PIL import Image
from tqdm import tqdm

# Define the root directory containing pest images
root_dir = "./"

# Target size
IMG_SIZE = (224, 224)

def smart_crop_and_resize(image_path, save_path):
    """Smartly crops around the pest and resizes the image to (224, 224) while keeping color."""
    try:
        # Read the image in color
        img = cv2.imread(image_path, cv2.IMREAD_COLOR)

        if img is None:
            print(f"❌ Skipping {image_path} (could not read)")
            return
        
        # Convert to grayscale only for processing (not saving)
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        # Apply thresholding to detect the main object
        _, thresh = cv2.threshold(gray, 30, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

        # Find contours
        contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        if contours:
            # Get the largest contour (assuming it's the pest)
            c = max(contours, key=cv2.contourArea)
            x, y, w, h = cv2.boundingRect(c)
            
            # Crop to detected region (expand slightly for better framing)
            padding = 10
            x = max(0, x - padding)
            y = max(0, y - padding)
            w = min(img.shape[1] - x, w + 2 * padding)
            h = min(img.shape[0] - y, h + 2 * padding)
            
            cropped_img = img[y:y+h, x:x+w]  # Keep original color
        else:
            cropped_img = img  # If no object found, use the full image

        # Convert to RGB format for EfficientNetV2 compatibility
        pil_img = Image.fromarray(cv2.cvtColor(cropped_img, cv2.COLOR_BGR2RGB))

        # Resize using LANCZOS for high quality
        resized_img = pil_img.resize(IMG_SIZE, Image.LANCZOS)

        # Save the processed image
        resized_img.save(save_path, "JPEG", quality=95)

    except Exception as e:
        print(f"❌ Error processing {image_path}: {e}")

# Process all images in dataset
for folder in os.listdir(root_dir):
    folder_path = os.path.join(root_dir, folder)
    
    if os.path.isdir(folder_path):  # Ensure it's a folder
        print(f"Processing folder: {folder}")
        
        for filename in tqdm(os.listdir(folder_path), desc=f"Resizing in {folder}"):
            if filename.lower().endswith(".jpg"):
                img_path = os.path.join(folder_path, filename)
                smart_crop_and_resize(img_path, img_path)  # Overwrite the image

print("\n✅ All images resized and centered successfully while keeping color!")


Processing folder: .dist


Resizing in .dist: 0it [00:00, ?it/s]


Processing folder: .qodo


Resizing in .qodo: 100%|██████████| 1/1 [00:00<?, ?it/s]


Processing folder: aphids


Resizing in aphids: 100%|██████████| 500/500 [00:10<00:00, 47.60it/s] 


Processing folder: aphis craccivora


Resizing in aphis craccivora: 100%|██████████| 440/440 [00:08<00:00, 50.73it/s]


Processing folder: beet armyworm


Resizing in beet armyworm: 100%|██████████| 500/500 [00:05<00:00, 88.90it/s] 


Processing folder: catterpillar


Resizing in catterpillar: 100%|██████████| 500/500 [00:05<00:00, 89.76it/s] 


Processing folder: Empoasca fabae


Resizing in Empoasca fabae: 100%|██████████| 500/500 [00:11<00:00, 43.52it/s]


Processing folder: grasshopper


Resizing in grasshopper: 100%|██████████| 499/499 [00:05<00:00, 84.11it/s] 


Processing folder: Helicoverpa armigera


Resizing in Helicoverpa armigera: 100%|██████████| 495/495 [00:15<00:00, 31.36it/s]


Processing folder: mites


Resizing in mites: 100%|██████████| 498/498 [00:20<00:00, 24.21it/s]


Processing folder: Schistocerca gregaria


Resizing in Schistocerca gregaria: 100%|██████████| 500/500 [00:06<00:00, 72.19it/s]


Processing folder: Spodoptera exigua


Resizing in Spodoptera exigua: 100%|██████████| 500/500 [00:06<00:00, 76.71it/s] 


Processing folder: Spodoptera litura


Resizing in Spodoptera litura: 100%|██████████| 500/500 [00:09<00:00, 55.51it/s]


Processing folder: Tetranychus urticae


Resizing in Tetranychus urticae: 100%|██████████| 476/476 [00:14<00:00, 32.52it/s]


Processing folder: Thrips tabaci


Resizing in Thrips tabaci: 100%|██████████| 500/500 [00:08<00:00, 60.48it/s]


Processing folder: weevil


Resizing in weevil: 100%|██████████| 500/500 [00:04<00:00, 115.96it/s]


✅ All images resized and centered successfully while keeping color!





In [2]:
import os
import cv2
import numpy as np
import hashlib
from tqdm import tqdm


# Dictionary to store image hashes
image_hashes = {}

def compute_image_hash(image_path):
    """Computes a hash for an image using OpenCV and hashlib."""
    try:
        img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)  # Read as grayscale
        img = cv2.resize(img, (64, 64))  # Resize for hashing consistency
        img_hash = hashlib.md5(img.tobytes()).hexdigest()  # Compute hash
        return img_hash
    except Exception as e:
        print(f"❌ Error processing {image_path}: {e}")
        return None

# Scan and remove duplicate images
for folder in os.listdir(root_dir):
    folder_path = os.path.join(root_dir, folder)
    
    if os.path.isdir(folder_path):  # Ensure it's a folder
        print(f"Checking duplicates in: {folder}")

        for filename in tqdm(os.listdir(folder_path), desc=f"Scanning {folder}"):
            if filename.lower().endswith(".jpg"):
                img_path = os.path.join(folder_path, filename)
                img_hash = compute_image_hash(img_path)

                if img_hash:
                    if img_hash in image_hashes:
                        print(f"🗑️ Removing duplicate: {img_path}")
                        os.remove(img_path)  # Delete duplicate
                    else:
                        image_hashes[img_hash] = img_path  # Store unique image hash

print("\n✅ Duplicate images removed successfully!")


Checking duplicates in: .dist


Scanning .dist: 0it [00:00, ?it/s]


Checking duplicates in: .qodo


Scanning .qodo: 100%|██████████| 1/1 [00:00<?, ?it/s]


Checking duplicates in: aphids


Scanning aphids:  26%|██▌       | 130/500 [00:00<00:00, 1298.39it/s]

🗑️ Removing duplicate: ./aphids\362438593 (18).jpg
🗑️ Removing duplicate: ./aphids\362438593 (9).jpg
🗑️ Removing duplicate: ./aphids\Image_1.jpg
🗑️ Removing duplicate: ./aphids\Image_10.jpg
🗑️ Removing duplicate: ./aphids\Image_100.jpg
🗑️ Removing duplicate: ./aphids\Image_101.jpg
🗑️ Removing duplicate: ./aphids\Image_102.jpg
🗑️ Removing duplicate: ./aphids\Image_108.jpg
🗑️ Removing duplicate: ./aphids\Image_110 (2).jpg
🗑️ Removing duplicate: ./aphids\Image_110.jpg
🗑️ Removing duplicate: ./aphids\Image_112 (2).jpg
🗑️ Removing duplicate: ./aphids\Image_112.jpg
🗑️ Removing duplicate: ./aphids\Image_114 (2).jpg
🗑️ Removing duplicate: ./aphids\Image_114.jpg
🗑️ Removing duplicate: ./aphids\Image_115 (2).jpg
🗑️ Removing duplicate: ./aphids\Image_115.jpg
🗑️ Removing duplicate: ./aphids\Image_120 (2).jpg
🗑️ Removing duplicate: ./aphids\Image_120.jpg
🗑️ Removing duplicate: ./aphids\Image_15.jpg
🗑️ Removing duplicate: ./aphids\Image_16.jpg
🗑️ Removing duplicate: ./aphids\Image_17.jpg
🗑️ Removing

Scanning aphids:  55%|█████▌    | 276/500 [00:00<00:00, 1390.79it/s]

🗑️ Removing duplicate: ./aphids\Image_33.jpg
🗑️ Removing duplicate: ./aphids\Image_35 (1).jpg
🗑️ Removing duplicate: ./aphids\Image_35.jpg
🗑️ Removing duplicate: ./aphids\Image_4.jpg


Scanning aphids:  83%|████████▎ | 416/500 [00:00<00:00, 1272.95it/s]

🗑️ Removing duplicate: ./aphids\Image_53.jpg
🗑️ Removing duplicate: ./aphids\Image_55 (1).jpg
🗑️ Removing duplicate: ./aphids\Image_56.jpg
🗑️ Removing duplicate: ./aphids\Image_58 (2).jpg
🗑️ Removing duplicate: ./aphids\Image_58.jpg
🗑️ Removing duplicate: ./aphids\Image_61.jpg
🗑️ Removing duplicate: ./aphids\Image_69 (2).jpg
🗑️ Removing duplicate: ./aphids\Image_69.jpg
🗑️ Removing duplicate: ./aphids\Image_7.jpg
🗑️ Removing duplicate: ./aphids\Image_75.jpg
🗑️ Removing duplicate: ./aphids\Image_77.jpg
🗑️ Removing duplicate: ./aphids\Image_82.jpg
🗑️ Removing duplicate: ./aphids\Image_86 (1).jpg
🗑️ Removing duplicate: ./aphids\Image_86.jpg
🗑️ Removing duplicate: ./aphids\Image_88.jpg
🗑️ Removing duplicate: ./aphids\Image_89 (2).jpg
🗑️ Removing duplicate: ./aphids\Image_89.jpg
🗑️ Removing duplicate: ./aphids\Image_93.jpg
🗑️ Removing duplicate: ./aphids\Image_95.jpg
🗑️ Removing duplicate: ./aphids\Image_97.jpg
🗑️ Removing duplicate: ./aphids\Image_99 (2).jpg
🗑️ Removing duplicate: ./aphids\

Scanning aphids: 100%|██████████| 500/500 [00:00<00:00, 1116.80it/s]


🗑️ Removing duplicate: ./aphids\jpg_5 - Copy (1).jpg
🗑️ Removing duplicate: ./aphids\jpg_5 - Copy.jpg
🗑️ Removing duplicate: ./aphids\jpg_5.jpg
🗑️ Removing duplicate: ./aphids\jpg_50.jpg
🗑️ Removing duplicate: ./aphids\jpg_51.jpg
🗑️ Removing duplicate: ./aphids\jpg_52.jpg
🗑️ Removing duplicate: ./aphids\jpg_54 (1).jpg
🗑️ Removing duplicate: ./aphids\jpg_54 - Copy (1).jpg
🗑️ Removing duplicate: ./aphids\jpg_54 - Copy.jpg
🗑️ Removing duplicate: ./aphids\jpg_54.jpg
🗑️ Removing duplicate: ./aphids\jpg_55 - Copy.jpg
🗑️ Removing duplicate: ./aphids\jpg_55.jpg
🗑️ Removing duplicate: ./aphids\jpg_56.jpg
🗑️ Removing duplicate: ./aphids\jpg_6 - Copy.jpg
🗑️ Removing duplicate: ./aphids\jpg_6.jpg
🗑️ Removing duplicate: ./aphids\jpg_64.jpg
🗑️ Removing duplicate: ./aphids\jpg_7 - Copy.jpg
🗑️ Removing duplicate: ./aphids\jpg_72 - Copy.jpg
🗑️ Removing duplicate: ./aphids\jpg_74 - Copy (2).jpg
🗑️ Removing duplicate: ./aphids\jpg_79 - Copy.jpg
🗑️ Removing duplicate: ./aphids\jpg_9 - Copy (1).jpg
Checkin

Scanning aphis craccivora: 100%|██████████| 440/440 [00:00<00:00, 961.50it/s] 


🗑️ Removing duplicate: ./aphis craccivora\Image_101.jpg
🗑️ Removing duplicate: ./aphis craccivora\Image_108.jpg
🗑️ Removing duplicate: ./aphis craccivora\Image_110.jpg
🗑️ Removing duplicate: ./aphis craccivora\Image_22.jpg
🗑️ Removing duplicate: ./aphis craccivora\Image_53.jpg
🗑️ Removing duplicate: ./aphis craccivora\Image_69.jpg
🗑️ Removing duplicate: ./aphis craccivora\Image_82.jpg
🗑️ Removing duplicate: ./aphis craccivora\Image_93.jpg
Checking duplicates in: beet armyworm


Scanning beet armyworm: 100%|██████████| 500/500 [00:00<00:00, 1084.34it/s]


Checking duplicates in: catterpillar


Scanning catterpillar: 100%|██████████| 500/500 [00:00<00:00, 1005.12it/s]


Checking duplicates in: Empoasca fabae


Scanning Empoasca fabae: 100%|██████████| 500/500 [00:00<00:00, 1116.82it/s]


Checking duplicates in: grasshopper


Scanning grasshopper: 100%|██████████| 499/499 [00:00<00:00, 1195.41it/s]


Checking duplicates in: Helicoverpa armigera


Scanning Helicoverpa armigera:  21%|██▏       | 106/495 [00:00<00:00, 1056.55it/s]

🗑️ Removing duplicate: ./Helicoverpa armigera\000034.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000035.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000067.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000083.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000096.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000098.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000134.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000141.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000146.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000148.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000152.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000153.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000158.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000164.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000166.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000167.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000170.jpg
🗑️ Removing duplicate: ./Helico

Scanning Helicoverpa armigera:  44%|████▍     | 218/495 [00:00<00:00, 1093.71it/s]

🗑️ Removing duplicate: ./Helicoverpa armigera\000253.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000254.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000263.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000265.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000266.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000268.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000269.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000270.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000272.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000273.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000278.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000279.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000281.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000283.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000284.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000286.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000287.jpg
🗑️ Removing duplicate: ./Helico

Scanning Helicoverpa armigera:  66%|██████▋   | 328/495 [00:00<00:00, 1020.53it/s]

🗑️ Removing duplicate: ./Helicoverpa armigera\000304.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000305.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000306.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000307.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000308.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000310.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000312.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000315.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000317.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000318.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000319.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000320.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000321.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000322.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000323.JPG
🗑️ Removing duplicate: ./Helicoverpa armigera\000325.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\000326.jpg
🗑️ Removing duplicate: ./Helico

Scanning Helicoverpa armigera:  87%|████████▋ | 431/495 [00:00<00:00, 991.06it/s] 

🗑️ Removing duplicate: ./Helicoverpa armigera\Image_1.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\Image_11.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\Image_16.jpg


Scanning Helicoverpa armigera: 100%|██████████| 495/495 [00:00<00:00, 987.93it/s]


🗑️ Removing duplicate: ./Helicoverpa armigera\Image_27.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\Image_38.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\Image_4.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\Image_49.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\Image_5.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\Image_50.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\Image_55.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\Image_57.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\Image_58.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\Image_63.JPG
🗑️ Removing duplicate: ./Helicoverpa armigera\Image_64.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\Image_66.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\Image_67.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\Image_71.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\Image_72.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\Image_74.jpg
🗑️ Removing duplicate: ./Helicoverpa armigera\Image_75.jpg

Scanning mites:   0%|          | 0/498 [00:00<?, ?it/s]

🗑️ Removing duplicate: ./mites\000037.jpg
🗑️ Removing duplicate: ./mites\000067.jpg


Scanning mites:  21%|██        | 103/498 [00:00<00:00, 1017.07it/s]

🗑️ Removing duplicate: ./mites\000133.jpg
🗑️ Removing duplicate: ./mites\000143.JPG
🗑️ Removing duplicate: ./mites\000154.jpg
🗑️ Removing duplicate: ./mites\000157.jpg
🗑️ Removing duplicate: ./mites\000158.jpg
🗑️ Removing duplicate: ./mites\000162.jpg
🗑️ Removing duplicate: ./mites\000174.jpg
🗑️ Removing duplicate: ./mites\000176.jpg
🗑️ Removing duplicate: ./mites\000181.jpg
🗑️ Removing duplicate: ./mites\000193.jpg
🗑️ Removing duplicate: ./mites\000198.jpg
🗑️ Removing duplicate: ./mites\000209.jpg
🗑️ Removing duplicate: ./mites\000213.jpg
🗑️ Removing duplicate: ./mites\000220.jpg
🗑️ Removing duplicate: ./mites\000229.jpg
🗑️ Removing duplicate: ./mites\000232.jpg
🗑️ Removing duplicate: ./mites\000233.jpg
🗑️ Removing duplicate: ./mites\000259.jpg


Scanning mites:  41%|████      | 205/498 [00:00<00:00, 827.20it/s] 

🗑️ Removing duplicate: ./mites\000274.jpg
🗑️ Removing duplicate: ./mites\000285.jpg
🗑️ Removing duplicate: ./mites\000292.jpg
🗑️ Removing duplicate: ./mites\000303.jpg
🗑️ Removing duplicate: ./mites\000307.jpg
🗑️ Removing duplicate: ./mites\000317.jpg
🗑️ Removing duplicate: ./mites\000325.jpg
🗑️ Removing duplicate: ./mites\000336.jpg
🗑️ Removing duplicate: ./mites\000338.jpg
🗑️ Removing duplicate: ./mites\000341.jpg
🗑️ Removing duplicate: ./mites\000342.jpg
🗑️ Removing duplicate: ./mites\000347.jpg
🗑️ Removing duplicate: ./mites\000348.jpg
🗑️ Removing duplicate: ./mites\000349.jpg
🗑️ Removing duplicate: ./mites\000350.jpg
🗑️ Removing duplicate: ./mites\000351.jpg
🗑️ Removing duplicate: ./mites\000358.jpg
🗑️ Removing duplicate: ./mites\000359.jpg
🗑️ Removing duplicate: ./mites\000361.jpg
🗑️ Removing duplicate: ./mites\000365.jpg
🗑️ Removing duplicate: ./mites\000366.jpg
🗑️ Removing duplicate: ./mites\000368.jpg
🗑️ Removing duplicate: ./mites\000369.jpg


Scanning mites:  58%|█████▊    | 290/498 [00:00<00:00, 624.42it/s]

🗑️ Removing duplicate: ./mites\000371.jpg
🗑️ Removing duplicate: ./mites\000372.jpg
🗑️ Removing duplicate: ./mites\000376.jpg
🗑️ Removing duplicate: ./mites\000378.jpg
🗑️ Removing duplicate: ./mites\000379.jpg
🗑️ Removing duplicate: ./mites\000383.jpg
🗑️ Removing duplicate: ./mites\000384.jpg
🗑️ Removing duplicate: ./mites\000385.jpg
🗑️ Removing duplicate: ./mites\000386.jpg
🗑️ Removing duplicate: ./mites\000388.jpg
🗑️ Removing duplicate: ./mites\000391.jpg
🗑️ Removing duplicate: ./mites\000392.jpg
🗑️ Removing duplicate: ./mites\000394.jpg
🗑️ Removing duplicate: ./mites\000396.jpg
🗑️ Removing duplicate: ./mites\000402.jpg
🗑️ Removing duplicate: ./mites\000403.jpg
🗑️ Removing duplicate: ./mites\000404.jpg
🗑️ Removing duplicate: ./mites\000415.jpg
🗑️ Removing duplicate: ./mites\000419.jpg


Scanning mites:  72%|███████▏  | 358/498 [00:00<00:00, 553.52it/s]

🗑️ Removing duplicate: ./mites\Image_1.jpg


Scanning mites: 100%|██████████| 498/498 [00:00<00:00, 632.51it/s]


🗑️ Removing duplicate: ./mites\Image_11.jpg
🗑️ Removing duplicate: ./mites\Image_13.jpg
🗑️ Removing duplicate: ./mites\Image_2.jpg
🗑️ Removing duplicate: ./mites\Image_3.jpg
🗑️ Removing duplicate: ./mites\Image_30.jpg
🗑️ Removing duplicate: ./mites\Image_32.jpg
🗑️ Removing duplicate: ./mites\Image_33.jpg
🗑️ Removing duplicate: ./mites\Image_41.jpg
🗑️ Removing duplicate: ./mites\Image_53.jpg
🗑️ Removing duplicate: ./mites\Image_59.jpg
🗑️ Removing duplicate: ./mites\Image_6.jpg
🗑️ Removing duplicate: ./mites\Image_65.jpg
🗑️ Removing duplicate: ./mites\Image_66.jpg
🗑️ Removing duplicate: ./mites\Image_68.jpg
🗑️ Removing duplicate: ./mites\Image_70.jpg
🗑️ Removing duplicate: ./mites\Image_74.jpg
🗑️ Removing duplicate: ./mites\Image_81.jpg
🗑️ Removing duplicate: ./mites\Image_82.jpg
🗑️ Removing duplicate: ./mites\Image_83.jpg
🗑️ Removing duplicate: ./mites\Image_91.jpg
Checking duplicates in: Schistocerca gregaria


Scanning Schistocerca gregaria: 100%|██████████| 500/500 [00:00<00:00, 978.07it/s] 


Checking duplicates in: Spodoptera exigua


Scanning Spodoptera exigua:  29%|██▉       | 144/500 [00:00<00:00, 718.32it/s]

🗑️ Removing duplicate: ./Spodoptera exigua\27605.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\27609.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\27611.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\27614.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\27617.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\27618.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\27621.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\27622.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\27623.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\27626.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\27630.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\27635.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\27637.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\27639.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\27640.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\27644.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\27645.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\27648.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\276

Scanning Spodoptera exigua:  59%|█████▉    | 295/500 [00:00<00:00, 728.00it/s]

🗑️ Removing duplicate: ./Spodoptera exigua\28072.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28075.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28079.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28080.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28081.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28087.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28091.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28094.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28101.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28106.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28109.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28115.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28121.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28122.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28123.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28124.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28127.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28132.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\281

Scanning Spodoptera exigua:  89%|████████▉ | 445/500 [00:00<00:00, 731.74it/s]

🗑️ Removing duplicate: ./Spodoptera exigua\28585.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28586.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28592.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28593.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28594.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28601.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28610.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28613.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28615.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28618.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28626.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28628.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28632.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28639.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28642.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28643.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28651.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\28653.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\286

Scanning Spodoptera exigua: 100%|██████████| 500/500 [00:00<00:00, 719.48it/s]


🗑️ Removing duplicate: ./Spodoptera exigua\29071.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\29073.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\29074.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\29076.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\29081.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\29087.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\29091.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\29094.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\29100.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\29103.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\29104.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\29108.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\29110.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\29111.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\29114.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\29115.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\29118.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\29122.jpg
🗑️ Removing duplicate: ./Spodoptera exigua\291

Scanning Spodoptera litura: 100%|██████████| 500/500 [00:00<00:00, 735.48it/s]


Checking duplicates in: Tetranychus urticae


Scanning Tetranychus urticae: 100%|██████████| 476/476 [00:00<00:00, 1051.77it/s]


🗑️ Removing duplicate: ./Tetranychus urticae\Image_13.jpg
🗑️ Removing duplicate: ./Tetranychus urticae\Image_14.jpg
🗑️ Removing duplicate: ./Tetranychus urticae\Image_19.jpg
🗑️ Removing duplicate: ./Tetranychus urticae\Image_21.jpg
🗑️ Removing duplicate: ./Tetranychus urticae\Image_35.jpg
🗑️ Removing duplicate: ./Tetranychus urticae\Image_49.jpg
🗑️ Removing duplicate: ./Tetranychus urticae\Image_52.jpg
🗑️ Removing duplicate: ./Tetranychus urticae\Image_6.jpg
🗑️ Removing duplicate: ./Tetranychus urticae\Image_68.jpg
🗑️ Removing duplicate: ./Tetranychus urticae\Image_8.jpg
🗑️ Removing duplicate: ./Tetranychus urticae\Image_82.jpg
🗑️ Removing duplicate: ./Tetranychus urticae\oo000001.jpg
🗑️ Removing duplicate: ./Tetranychus urticae\oo000003.jpg
🗑️ Removing duplicate: ./Tetranychus urticae\oo000005.jpg
🗑️ Removing duplicate: ./Tetranychus urticae\oo000006.jpg
🗑️ Removing duplicate: ./Tetranychus urticae\oo000008.jpg
🗑️ Removing duplicate: ./Tetranychus urticae\oo000010.jpg
🗑️ Removing dupl

Scanning Thrips tabaci: 100%|██████████| 500/500 [00:00<00:00, 1063.72it/s]


Checking duplicates in: weevil


Scanning weevil:  25%|██▌       | 127/500 [00:00<00:00, 1264.03it/s]

🗑️ Removing duplicate: ./weevil\Weevil (111).jpg
🗑️ Removing duplicate: ./weevil\Weevil (179).jpg
🗑️ Removing duplicate: ./weevil\Weevil (199).jpg


Scanning weevil: 100%|██████████| 500/500 [00:00<00:00, 1319.23it/s]


✅ Duplicate images removed successfully!





In [14]:
import os
import numpy as np
import pandas as pd
from glob import glob

VALID_EXTENSIONS = {".jpg"}  # Valid image formats

# Dictionary to store image counts per class
image_counts = {}

# Loop through each class folder
for pest_class in os.listdir(root_dir):
    class_dir = os.path.join(root_dir, pest_class)
    if os.path.isdir(class_dir):  # Ensure it's a directory
        num_images = sum(
            len(glob(os.path.join(class_dir, f"*{ext}"))) for ext in VALID_EXTENSIONS
        )
        image_counts[pest_class] = num_images

# Convert to DataFrame for better visualization
df_counts = pd.DataFrame(image_counts.items(), columns=["Class", "Image Count"])

# Display total images in the dataset
total_images = df_counts["Image Count"].sum()
print(f"📌 Total images in dataset: {total_images}")
df_counts

📌 Total images in dataset: 5471


Unnamed: 0,Class,Image Count
0,.dist,0
1,.qodo,0
2,aphids,389
3,aphis craccivora,411
4,beet armyworm,455
5,catterpillar,474
6,Empoasca fabae,463
7,grasshopper,462
8,Helicoverpa armigera,310
9,mites,364


In [15]:
%pip install Pillow tqdm

Note: you may need to restart the kernel to use updated packages.


In [16]:
import os
import random
from PIL import Image, ImageEnhance, ImageOps
from tqdm import tqdm

# Settings
target_classes = {
    "Helicoverpa armigera": 450,
    "Thrips tabaci": 450,
    "mites": 450
}
base_dir = "./" 

def augment_image(image):
    """Apply random augmentation."""
    operations = [
        lambda x: x.rotate(random.randint(-25, 25)),
        lambda x: ImageOps.mirror(x),
        lambda x: ImageOps.flip(x),
        lambda x: ImageEnhance.Brightness(x).enhance(random.uniform(0.7, 1.3)),
        lambda x: ImageEnhance.Contrast(x).enhance(random.uniform(0.7, 1.3)),
        lambda x: ImageEnhance.Color(x).enhance(random.uniform(0.7, 1.3)),
    ]
    op = random.choice(operations)
    return op(image)

for class_name, target_count in target_classes.items():
    class_dir = os.path.join(base_dir, class_name)
    images = [f for f in os.listdir(class_dir) if f.lower().endswith(('.jpg', '.png', '.jpeg'))]
    current_count = len(images)
    print(f"{class_name}: {current_count} ➡ {target_count}")

    if current_count >= target_count:
        continue

    augment_needed = target_count - current_count
    for i in tqdm(range(augment_needed), desc=f"Augmenting {class_name}"):
        img_name = random.choice(images)
        img_path = os.path.join(class_dir, img_name)

        try:
            img = Image.open(img_path)
            img = augment_image(img)
            new_img_name = f"aug_{i}_{img_name}"
            img.save(os.path.join(class_dir, new_img_name))
        except Exception as e:
            print(f"❌ Error processing {img_name}: {e}")


Helicoverpa armigera: 310 ➡ 450


Augmenting Helicoverpa armigera: 100%|██████████| 140/140 [00:01<00:00, 113.31it/s]


Thrips tabaci: 357 ➡ 450


Augmenting Thrips tabaci: 100%|██████████| 93/93 [00:00<00:00, 136.71it/s]


mites: 364 ➡ 450


Augmenting mites: 100%|██████████| 86/86 [00:00<00:00, 133.39it/s]


In [17]:
import os
import numpy as np
import pandas as pd
from glob import glob

VALID_EXTENSIONS = {".jpg"}  # Valid image formats

# Dictionary to store image counts per class
image_counts = {}

# Loop through each class folder
for pest_class in os.listdir(root_dir):
    class_dir = os.path.join(root_dir, pest_class)
    if os.path.isdir(class_dir):  # Ensure it's a directory
        num_images = sum(
            len(glob(os.path.join(class_dir, f"*{ext}"))) for ext in VALID_EXTENSIONS
        )
        image_counts[pest_class] = num_images

# Convert to DataFrame for better visualization
df_counts = pd.DataFrame(image_counts.items(), columns=["Class", "Image Count"])

# Display total images in the dataset
total_images = df_counts["Image Count"].sum()
print(f"📌 Total images in dataset: {total_images}")
df_counts

📌 Total images in dataset: 5790


Unnamed: 0,Class,Image Count
0,.dist,0
1,.qodo,0
2,aphids,389
3,aphis craccivora,411
4,beet armyworm,455
5,catterpillar,474
6,Empoasca fabae,463
7,grasshopper,462
8,Helicoverpa armigera,450
9,mites,450
