In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Data Loading

In [None]:
import os
import h5py
import cv2
import numpy as np
from tqdm import tqdm

In [None]:
# === CONFIG ===
base_path = "/content/drive/MyDrive/Marcus Colab/marcus_plant_disease_detection/dataset"
output_h5_file = "/content/drive/MyDrive/Marcus Colab/marcus_plant_disease_detection/dataset/dataset.h5"
image_size = (144, 144)


In [None]:
class_folders = {
    "apple_leaves": ["apple_black_rot", "apple_healthy", "apple_scab"],
    "bell_pepper_leaves": ["pepper_bacterial_spot", "pepper_healthy"],
    "crop_leaves": ["corn_common_rust", "corn_healthy", "corn_northern_blight"],
    "grape_leaves": ["grape_black_rot", "grape_esca", "grape_healthy"],
    "strawberry_leaves": ["strawberry_healthy", "strawberry_leaf_scorch"],
    "tomato_leaves": ["tomato_bacterial_spot", "tomato_healthy", "tomato_septoria_leaf_spot"]
}

# Flatten all class names and build label map
class_names = []
for group in class_folders.values():
    class_names.extend(group)
class_name_label = {name: idx for idx, name in enumerate(sorted(class_names))}

# === Load and Process Images ===
images = []
labels = []

print("🔄 Loading and processing images...")
for main_folder, subfolders in class_folders.items():
    for class_name in subfolders:
        folder_path = os.path.join(base_path, main_folder, class_name)
        if not os.path.isdir(folder_path):
            print(f"⚠️ Skipping missing folder: {folder_path}")
            continue
        for img_file in tqdm(os.listdir(folder_path), desc=f"{class_name}"):
            if img_file.lower().endswith(('.jpg', '.jpeg', '.png')):
                img_path = os.path.join(folder_path, img_file)
                img = cv2.imread(img_path)
                if img is not None:
                    img = cv2.resize(img, image_size)
                    images.append(img)
                    labels.append(class_name_label[class_name])

🔄 Loading and processing images...


apple_black_rot: 100%|██████████| 200/200 [00:04<00:00, 41.39it/s] 
apple_healthy: 100%|██████████| 200/200 [00:04<00:00, 45.66it/s] 
apple_scab: 100%|██████████| 200/200 [00:04<00:00, 42.99it/s] 
pepper_bacterial_spot: 100%|██████████| 200/200 [00:05<00:00, 39.70it/s]
pepper_healthy: 100%|██████████| 200/200 [00:04<00:00, 42.35it/s] 
corn_common_rust: 100%|██████████| 200/200 [00:04<00:00, 41.83it/s] 
corn_healthy: 100%|██████████| 200/200 [00:05<00:00, 37.94it/s] 
corn_northern_blight: 100%|██████████| 200/200 [00:04<00:00, 43.16it/s] 
grape_black_rot: 100%|██████████| 200/200 [00:04<00:00, 40.53it/s] 
grape_esca: 100%|██████████| 200/200 [00:04<00:00, 45.48it/s] 
grape_healthy: 100%|██████████| 200/200 [00:04<00:00, 41.16it/s] 
strawberry_healthy: 100%|██████████| 200/200 [00:05<00:00, 38.22it/s] 
strawberry_leaf_scorch: 100%|██████████| 200/200 [00:04<00:00, 40.57it/s] 
tomato_bacterial_spot: 100%|██████████| 200/200 [00:05<00:00, 39.43it/s] 
tomato_healthy: 100%|██████████| 200/20

In [None]:
# === Convert and Save to H5 ===
X = np.array(images, dtype=np.uint8)
y = np.array(labels, dtype=np.int32)

print(f"✅ Loaded {len(X)} images. Saving to H5...")

with h5py.File(output_h5_file, "w") as h5f:
    h5f.create_dataset("X", data=X)
    h5f.create_dataset("y", data=y)

print(f"✅ Saved to {output_h5_file} successfully. Classes: {len(class_name_label)}")

✅ Loaded 3200 images. Saving to H5...
✅ Saved to /content/drive/MyDrive/Marcus Colab/marcus_plant_disease_detection/dataset/dataset.h5 successfully. Classes: 16
