In [1]:
import os
import cv2
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

ModuleNotFoundError: No module named 'tensorflow'

In [2]:
def preprocess_images(input_dir, output_dir, target_size=(224, 224)):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    datagen = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        horizontal_flip=True,
        zoom_range=0.2,
        rescale=1./255
    )
    for class_name in ['bacterial_leaf_blight', 'brown_spot', 'leaf_smut']:
        class_input = os.path.join(input_dir, class_name)
        class_output = os.path.join(output_dir, class_name)
        if not os.path.exists(class_output):
            os.makedirs(class_output)
        for img_name in os.listdir(class_input):
            img_path = os.path.join(class_input, img_name)
            img = cv2.imread(img_path)
            if img is not None:
                img = cv2.resize(img, target_size)
                img = np.expand_dims(img, axis=0)
                datagen.fit(img)
                i = 0
                for batch in datagen.flow(img, batch_size=1, save_to_dir=class_output, save_prefix='aug', save_format='jpg'):
                    i += 1
                    if i >= 5:  # Generate 5 augmented images per original
                        break

In [None]:
def split_dataset(data_dir, train_dir, val_dir, test_dir, test_size=0.2, val_size=0.25):
    for class_name in ['bacterial_leaf_blight', 'brown_spot', 'leaf_smut']:
        class_path = os.path.join(data_dir, class_name)
        images = [os.path.join(class_path, img) for img in os.listdir(class_path)]
        train_val, test = train_test_split(images, test_size=test_size, random_state=42)
        train, val = train_test_split(train_val, test_size=val_size/(1-test_size), random_state=42)
        for img in train: os.replace(img, os.path.join(train_dir, class_name, os.path.basename(img)))
        for img in val: os.replace(img, os.path.join(val_dir, class_name, os.path.basename(img)))
        for img in test: os.replace(img, os.path.join(test_dir, class_name, os.path.basename(img)))

In [None]:
# Run preprocessing and splitting
raw_dir = "data/raw/mendeley"  # Adjust if using kaggle or combined data
augmented_dir = "data/augmented"
processed_dir = "data/processed"
preprocess_images(raw_dir, augmented_dir)
split_dataset(augmented_dir, os.path.join(processed_dir, "train"), os.path.join(processed_dir, "validation"), os.path.join(processed_dir, "test"))
print("Preprocessing and splitting completed.")