In [9]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("vipoooool/new-plant-diseases-dataset")

print("Path to dataset files:", path)

Using Colab cache for faster access to the 'new-plant-diseases-dataset' dataset.
Path to dataset files: /kaggle/input/new-plant-diseases-dataset


In [11]:
!pip install -q kaggle "tensorflow>=2.17.0" gradio matplotlib

In [10]:
import os

# The 'path' variable holds the correct root directory of the downloaded dataset.
# Based on the diagnostic prints, the 'train' and 'valid' directories are nested within
# a second 'New Plant Diseases Dataset(Augmented)' folder inside the first one.
base_path = os.path.join(path, "New Plant Diseases Dataset(Augmented)", "New Plant Diseases Dataset(Augmented)")

train_dir = os.path.join(base_path, "train")
valid_dir = os.path.join(base_path, "valid")

print("Train path:", train_dir)
print("Valid path:", valid_dir)

Train path: /kaggle/input/new-plant-diseases-dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/train
Valid path: /kaggle/input/new-plant-diseases-dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/valid


In [12]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os # Import os for directory listing

# Diagnostic prints to inspect the filesystem structure
print(f"Attempting to access train_dir: {train_dir}")
print(f"Attempting to access valid_dir: {valid_dir}")

# List contents of the base download path
try:
    print(f"Contents of {path}: {os.listdir(path)}")
except FileNotFoundError:
    print(f"Error: The base path {path} was not found.")

# List contents of the assumed intermediate directory
intermediate_dir_candidate = os.path.join(path, "New Plant Diseases Dataset(Augmented)")
try:
    print(f"Contents of {intermediate_dir_candidate}: {os.listdir(intermediate_dir_candidate)}")
except FileNotFoundError:
    print(f"Error: The intermediate directory {intermediate_dir_candidate} was not found. This is a likely cause of the FileNotFoundError.")

train_datagen = ImageDataGenerator(rescale=1./255)
valid_datagen = ImageDataGenerator(rescale=1./255)

train_data = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224,224),
    batch_size=32,
    class_mode='categorical'
)

valid_data = valid_datagen.flow_from_directory(
    valid_dir,
    target_size=(224,224),
    batch_size=32,
    class_mode='categorical'
)

Attempting to access train_dir: /kaggle/input/new-plant-diseases-dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/train
Attempting to access valid_dir: /kaggle/input/new-plant-diseases-dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/valid
Contents of /kaggle/input/new-plant-diseases-dataset: ['New Plant Diseases Dataset(Augmented)', 'new plant diseases dataset(augmented)', 'test']
Contents of /kaggle/input/new-plant-diseases-dataset/New Plant Diseases Dataset(Augmented): ['New Plant Diseases Dataset(Augmented)']
Found 70295 images belonging to 38 classes.
Found 17572 images belonging to 38 classes.


In [13]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

# Define the model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(train_data.num_classes, activation='softmax') # Output layer with number of classes
])

# Compile the model
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


The model is now defined and compiled. The next step is to train the model using the `train_data` and validate it using `valid_data`.

In [15]:
# User / experiment configuration

IMG_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS = 5
LAST_LAYERS = 10 # Unfreeze last 10 layers for fine-tuning
SEED = 1337

# Dataset paths
# The paths are already defined in previous cells (e.g., cell YT9toJha8Sys)
# and are correctly pointing to the downloaded dataset. We will use the existing
# variables (`train_dir` and `valid_dir`) to ensure consistency.
# If you intend to redefine these paths, ensure they are correct for your environment
# and uncomment/modify the lines below.

import os
import sys # Needed for sys.exit

# Verify paths exist (using the already defined train_dir and valid_dir from previous cells)
for p in (train_dir, valid_dir):
    if not os.path.exists(p):
        print(f"ERROR: path not found: {p}")
        sys.exit(1)

print("train_dir:", train_dir)
print("valid_dir:", valid_dir)


train_dir: /kaggle/input/new-plant-diseases-dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/train
valid_dir: /kaggle/input/new-plant-diseases-dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/valid


In [17]:
import random
import os
import matplotlib.pyplot as plt # Added import for matplotlib

from IPython.display import Image, display

# Function to display random samples from each class
def show_sample_images(base_path, num_classes=5, images_per_class=3):
    """Display random sample images from the dataset"""

    class_names = os.listdir(base_path)

    # Filter out any non-directory files or hidden files if present
    class_names = [name for name in class_names if os.path.isdir(os.path.join(base_path, name))]

    # Ensure we don't try to sample more classes than available
    random_classes = random.sample(class_names, min(num_classes, len(class_names)))

    fig, axes = plt.subplots(num_classes, images_per_class, figsize=(12, 3*num_classes))

    fig.suptitle('Sample Images from Dataset', fontsize=16)

    for i, class_name in enumerate(random_classes):
        class_path = os.path.join(base_path, class_name)

        images = os.listdir(class_path) # Added assignment operator
        # Filter for actual image files, not directories or hidden files
        images = [img for img in images if os.path.isfile(os.path.join(class_path, img)) and img.lower().endswith(('.png', '.jpg', '.jpeg'))]

        random_images = random.sample(images, min(images_per_class, len(images))) # Added assignment operator

        for j, img_name in enumerate(random_images):
            img_path = os.path.join(class_path, img_name) # Corrected variable name and added assignment operator

            img = plt.imread(img_path)

            axes[i, j].imshow(img)
            axes[i, j].axis('off')

            if j == 0:
                axes[i, j].set_title(class_name.replace('_', ' ').replace('', '\n'), fontsize=10) # Corrected assignment

    plt.tight_layout()
    plt.show()

# Example usage (you can call this function after this cell is run)
# show_sample_images(train_dir) # Assuming train_dir is defined and correct

In [19]:
#Count images in each split

def count_images(directory):
    """Count total images and images per class"""
    total_images = 0
    class_counts = {}
    for class_name in os.listdir(directory):
        class_path = os.path.join(directory, class_name)
        if os.path.isdir(class_path):
            num_images = len(os.listdir(class_path))
            class_counts[class_name] = num_images
            total_images += num_images
    return total_images, class_counts

#Get statistics
train_total, train_counts = count_images(train_dir)
valid_total, valid_counts = count_images(valid_dir)

print(f"Training images: {train_total}")
print(f"Validation images: {valid_total}")
print(f"Number of classes: {len(train_counts)}")

print(f"\nClass distribution (first 10):")
for i, (class_name, count) in enumerate(list(train_counts.items())[:10]):
    print(f" {class_name}: {count} images")

Training images: 70295
Validation images: 17572
Number of classes: 38

Class distribution (first 10):
 Tomato___Late_blight: 1851 images
 Tomato___healthy: 1926 images
 Grape___healthy: 1692 images
 Orange___Haunglongbing_(Citrus_greening): 2010 images
 Soybean___healthy: 2022 images
 Squash___Powdery_mildew: 1736 images
 Potato___healthy: 1824 images
 Corn_(maize)___Northern_Leaf_Blight: 1908 images
 Tomato___Early_blight: 1920 images
 Tomato___Septoria_leaf_spot: 1745 images


In [21]:
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Training data generator with light augmentation
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    horizontal_flip=True,
    rotation_range=20,
    zoom_range=0.15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    fill_mode='reflect'
)

# Validation data generator (no augmentation)
valid_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

# Create data generators
train_gen = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True,
    seed=SEED
)

valid_gen = valid_datagen.flow_from_directory(
    valid_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

Found 70295 images belonging to 38 classes.
Found 17572 images belonging to 38 classes.
