In [1]:
# -------------------------------------
# 📦 AppleLeaf Disease Detection Preprocessing
# -------------------------------------

import os
import shutil
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Parameters
IMG_SIZE = (224, 224)
RAW_DIR = '../data/disease_detection/raw/AppleLeaf'
PROCESSED_DIR = '../data/disease_detection/processed'

# Get Class Labels (exclude 'images' unlabeled folder)
LABELS = [label for label in os.listdir(RAW_DIR) if os.path.isdir(os.path.join(RAW_DIR, label)) and label != 'images']
print(f"Found {len(LABELS)} classes: {LABELS}")

# Helper: Create directories
def create_dirs(base_dir, labels):
    for subset in ['train', 'val', 'test']:
        for label in labels:
            os.makedirs(os.path.join(base_dir, subset, label), exist_ok=True)
    os.makedirs(os.path.join(base_dir, 'unlabeled', 'images'), exist_ok=True)

# Helper: Process & Copy Images
def process_and_copy(img_paths, dest_dir):
    for img_path in tqdm(img_paths, desc=f"Processing {os.path.basename(dest_dir)}"):
        try:
            img = load_img(img_path, target_size=IMG_SIZE)
            img_array = img_to_array(img)
            img_array = img_array / 255.0  # Normalize to [0,1]
            
            # Save image in destination folder
            filename = os.path.basename(img_path)
            img.save(os.path.join(dest_dir, filename))
        
        except Exception as e:
            print(f"Error processing {img_path}: {e}")

# Main: Split & Process Dataset
def preprocess_dataset():
    create_dirs(PROCESSED_DIR, LABELS)
    
    for label in LABELS:
        label_dir = os.path.join(RAW_DIR, label)
        img_files = [os.path.join(label_dir, f) for f in os.listdir(label_dir) if f.lower().endswith(('.jpg', '.png', '.jpeg'))]
        
        # Split into train, val, test (70%, 15%, 15%)
        train_imgs, test_imgs = train_test_split(img_files, test_size=0.15, random_state=42)
        train_imgs, val_imgs = train_test_split(train_imgs, test_size=0.15 / 0.85, random_state=42)

        # Process & Save
        process_and_copy(train_imgs, os.path.join(PROCESSED_DIR, 'train', label))
        process_and_copy(val_imgs, os.path.join(PROCESSED_DIR, 'val', label))
        process_and_copy(test_imgs, os.path.join(PROCESSED_DIR, 'test', label))

    # Move unlabeled test images
    unlabeled_src = os.path.join(RAW_DIR, 'images')
    unlabeled_dest = os.path.join(PROCESSED_DIR, 'unlabeled', 'images')
    for img_file in tqdm(os.listdir(unlabeled_src), desc="Copying Unlabeled Images"):
        src_path = os.path.join(unlabeled_src, img_file)
        dest_path = os.path.join(unlabeled_dest, img_file)
        shutil.copy(src_path, dest_path)

    print("✅ Data preprocessing completed!")

# Run Preprocessing
preprocess_dataset()


Found 9 classes: ['Alternaria leaf spot', 'Brown spot', 'Frogeye leaf spot', 'Grey spot', 'Health', 'Mosaic', 'Powdery mildew', 'Rust', 'Scab']


Processing Alternaria leaf spot: 100%|██████████████████████████████████████████████| 291/291 [00:01<00:00, 208.61it/s]
Processing Alternaria leaf spot: 100%|████████████████████████████████████████████████| 63/63 [00:00<00:00, 214.64it/s]
Processing Alternaria leaf spot: 100%|████████████████████████████████████████████████| 63/63 [00:00<00:00, 212.33it/s]
Processing Brown spot: 100%|████████████████████████████████████████████████████████| 287/287 [00:01<00:00, 181.99it/s]
Processing Brown spot: 100%|██████████████████████████████████████████████████████████| 62/62 [00:00<00:00, 174.92it/s]
Processing Brown spot: 100%|██████████████████████████████████████████████████████████| 62/62 [00:00<00:00, 207.92it/s]
Processing Frogeye leaf spot: 100%|███████████████████████████████████████████████| 2225/2225 [00:16<00:00, 135.05it/s]
Processing Frogeye leaf spot: 100%|█████████████████████████████████████████████████| 478/478 [00:03<00:00, 141.14it/s]
Processing Frogeye leaf spot: 100%|█████

✅ Data preprocessing completed!



