#Download Data

In [None]:
!gdown 'https://drive.google.com/uc?id=Gknhsbdfyus'

!unzip plantvillage.zip -d PlantVillage


# Split the Data into Train, Validation, and Test

In [None]:
import os
import shutil
import random
from pathlib import Path

# Set paths
dataset_dir = Path("PlantVillage/PlantVillage")
output_base = Path("PlantVillage_Split")
train_dir = output_base / "train"
val_dir = output_base / "val"
test_dir = output_base / "test"

# Create output dirs
for split_dir in [train_dir, val_dir, test_dir]:
    split_dir.mkdir(parents=True, exist_ok=True)

# Split ratios
train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15

# Process each class folder
for class_dir in dataset_dir.iterdir():
    if class_dir.is_dir():
        images = list(class_dir.glob("*.jpg"))
        random.shuffle(images)
        n = len(images)
        n_train = int(n * train_ratio)
        n_val = int(n * val_ratio)
        n_test = n - n_train - n_val

        split_data = {
            train_dir / class_dir.name: images[:n_train],
            val_dir / class_dir.name: images[n_train:n_train+n_val],
            test_dir / class_dir.name: images[n_train+n_val:]
        }

        for split_path, image_list in split_data.items():
            split_path.mkdir(parents=True, exist_ok=True)
            for img_path in image_list:
                shutil.copy(img_path, split_path / img_path.name)



#Data Preprocessing

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np

# Define paths to raw dataset (replace with actual paths)
train_dir = 'PlantVillage_Split/train'
val_dir = 'PlantVillage_Split/val'
test_dir = 'PlantVillage_Split/test'

# Image preprocessing (resizing, normalization)
def preprocess_image(image, target_size=(224, 224)):
    image = tf.image.resize(image, target_size)
    image = tf.keras.applications.efficientnet.preprocess_input(image)  # Normalize for EfficientNet
    return image

# Load and preprocess datasets
train_datagen = ImageDataGenerator(preprocessing_function=preprocess_image)
val_datagen = ImageDataGenerator(preprocessing_function=preprocess_image)
test_datagen = ImageDataGenerator(preprocessing_function=preprocess_image)

train_data = train_datagen.flow_from_directory(
    train_dir, target_size=(224, 224), batch_size=50, class_mode='categorical')
val_data = val_datagen.flow_from_directory(
    val_dir, target_size=(224, 224), batch_size=50, class_mode='categorical')
test_data = test_datagen.flow_from_directory(
    test_dir, target_size=(224, 224), batch_size=50, class_mode='categorical')

 # Data Augmentation

In [None]:
# Augmentation configuration (applied only to training data)
augmentation = ImageDataGenerator(
    rotation_range=40,
    shear_range=0.2,
    zoom_range=0.2,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    preprocessing_function=preprocess_image  # Ensure augmentation respects EfficientNet norms
)

augmented_train_data = augmentation.flow_from_directory(
    train_dir, target_size=(224, 224), batch_size=50, class_mode='categorical')