In [1]:
import tensorflow as tf
import numpy as np
import os
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.preprocessing.image import load_img, img_to_array

IMAGE_SIZE = 224
BATCH_SIZE = 32
CHANNELS = 3
EPOCHS = 30
EXTRACT_PATH = "PlantVillage"




In [2]:
# Define tomato classes
TOMATO_CLASSES = [
    'Tomato_Bacterial_spot',
    'Tomato_Early_blight',
    'Tomato_Late_blight',
    'Tomato_Leaf_Mold',
    'Tomato_Septoria_leaf_spot',
    'Tomato_Spider_mites_Two_spotted_spider_mite',
    'Tomato__Target_Spot',
    'Tomato__Tomato_YellowLeaf__Curl_Virus',
    'Tomato__Tomato_mosaic_virus',
    'Tomato_healthy'
]

In [3]:
#STEP 1: DATASET PREPARATION 

def prepare_binary_dataset():
    """
    Prepares a dataset for binary classification (Tomato vs. Non-Tomato leaf).
    """
    all_images = []
    binary_labels = []
    
    for class_name in os.listdir(EXTRACT_PATH):
        image_dir = os.path.join(EXTRACT_PATH, class_name)
        if os.path.isdir(image_dir):
            is_tomato = 1 if class_name.startswith('Tomato') else 0
            for image_file in os.listdir(image_dir):
                if image_file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    image_path = os.path.join(image_dir, image_file)
                    img = load_img(image_path, target_size=(IMAGE_SIZE, IMAGE_SIZE))
                    img_array = img_to_array(img) / 255.0  
                    all_images.append(img_array)
                    binary_labels.append(is_tomato)

    X = np.array(all_images, dtype=np.float32)
    y = np.array(binary_labels, dtype=np.float32)

    dataset = tf.data.Dataset.from_tensor_slices((X, y)).shuffle(1000).batch(BATCH_SIZE)
    train_size = int(0.8 * len(y))
    
    train_ds = dataset.take(train_size)
    val_ds = dataset.skip(train_size)

    return train_ds, val_ds



In [4]:
def prepare_disease_dataset():
    """
    Prepares a dataset for tomato disease classification.
    """
    tomato_images = []
    disease_labels = []

    for idx, class_name in enumerate(TOMATO_CLASSES):
        image_dir = os.path.join(EXTRACT_PATH, class_name)
        if os.path.isdir(image_dir):
            for image_file in os.listdir(image_dir):
                if image_file.endswith(('.jpg', '.jpeg', '.png')):
                    image_path = os.path.join(image_dir, image_file)
                    img = load_img(image_path, target_size=(IMAGE_SIZE, IMAGE_SIZE))
                    img_array = img_to_array(img) / 255.0  # Normalize
                    tomato_images.append(img_array)
                    disease_labels.append(idx)

    X = np.array(tomato_images, dtype=np.float32)
    y = np.array(disease_labels, dtype=np.float32)

    dataset = tf.data.Dataset.from_tensor_slices((X, y)).shuffle(1000).batch(BATCH_SIZE)
    train_size = int(0.8 * len(y))

    train_ds = dataset.take(train_size)
    val_ds = dataset.skip(train_size)

    return train_ds, val_ds



In [5]:
##STEP 2: CREATE MODELS

def create_binary_model(input_shape):
    """
    Creates a binary classification model to identify tomato vs non-tomato leaves.
    """
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=input_shape),
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')  
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

def create_disease_model(input_shape, num_classes):
    """
    Creates a multi-class classification model for tomato leaf diseases.
    """
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=input_shape),
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(num_classes, activation='softmax')  
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [28]:
def show_sample_images(dataset, class_names, num_images=9):
    """
    Display a grid of sample images from the dataset
    """
    plt.figure(figsize=(10, 10))
    for images, labels in dataset.take(1):
        for i in range(min(num_images, len(images))):
            plt.subplot(3, 3, i + 1)
            plt.imshow(images[i].numpy().astype("uint8"))
            plt.title(class_names[int(labels[i])])
            plt.axis("off")
    plt.tight_layout()
    plt.show()

In [6]:
input_shape = (IMAGE_SIZE, IMAGE_SIZE, CHANNELS)


In [8]:
# Load datasets
binary_train_ds, binary_val_ds = prepare_binary_dataset()
disease_train_ds, disease_val_ds = prepare_disease_dataset()

In [9]:
# Initialize models
binary_model = create_binary_model(input_shape)
disease_model = create_disease_model(input_shape, len(TOMATO_CLASSES))


In [10]:

# Train the binary model
binary_model.fit(binary_train_ds, validation_data=binary_val_ds, epochs=EPOCHS)

# Train the disease classification model
disease_model.fit(disease_train_ds, validation_data=disease_val_ds, epochs=EPOCHS)


Epoch 1/30
[1m568/568[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.9288 - loss: 0.3310



[1m568/568[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m768s[0m 1s/step - accuracy: 0.9289 - loss: 0.3307
Epoch 2/30
[1m568/568[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m667s[0m 1s/step - accuracy: 0.7787 - loss: 2.0883
Epoch 3/30
[1m568/568[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m694s[0m 1s/step - accuracy: 0.7924 - loss: 1.6742
Epoch 4/30
[1m568/568[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m713s[0m 1s/step - accuracy: 0.7799 - loss: 0.9968
Epoch 5/30
[1m568/568[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m662s[0m 1s/step - accuracy: 0.6413 - loss: 0.6718
Epoch 6/30
[1m568/568[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m692s[0m 1s/step - accuracy: 0.6406 - loss: 0.7072
Epoch 7/30
[1m568/568[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m553s[0m 973ms/step - accuracy: 0.6407 - loss: 0.7373
Epoch 8/30
[1m568/568[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m584s[0m 1s/step - accuracy: 0.6413 - loss: 0.7619
Epoch 9/30
[1m568/568[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x24bde8e56f0>

In [24]:
# Save the binary classification model
binary_model.save("../saved-models/binary_model.keras")

# Save the disease classification model
disease_model.save("../saved-models/disease_model.keras")


In [25]:
## STEP 4: PREDICTION PIPELINE

def classify_image(image_path):
    """
    Predicts whether the image is a tomato leaf and, if so, classifies its disease.
    """
    img = load_img(image_path, target_size=(IMAGE_SIZE, IMAGE_SIZE))
    img_array = img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)  

    # Step 1: Check if it's a tomato leaf
    tomato_prob = binary_model.predict(img_array)[0][0]

    if tomato_prob < 0.5:
        return "Non-tomato leaf"

    # Step 2: Predict disease classification
    disease_prediction = disease_model.predict(img_array)
    disease_class = np.argmax(disease_prediction)
    
    return TOMATO_CLASSES[disease_class]

In [26]:

image_path = './PlantVillage/Tomato_Late_blight/005a2c1f-4e15-49e4-9e5c-61dc3ecf9708___RS_Late.B 5096.JPG'
result = classify_image(image_path)
print("Prediction:", result)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
Prediction: Tomato_Late_blight


In [29]:
def plot_training_history(history, title):
    """
    Plot training & validation accuracy and loss
    """
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']

    epochs_range = range(len(acc))

    plt.figure(figsize=(12, 4))
    
    plt.subplot(1, 2, 1)
    plt.plot(epochs_range, acc, label='Training Accuracy')
    plt.plot(epochs_range, val_acc, label='Validation Accuracy')
    plt.title(f'{title} - Accuracy')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(epochs_range, loss, label='Training Loss')
    plt.plot(epochs_range, val_loss, label='Validation Loss')
    plt.title(f'{title} - Loss')
    plt.legend()
    
    plt.tight_layout()
    plt.show()

In [31]:
print("Binary Classification Model Summary:")
binary_model.summary()

print("\nDisease Classification Model Summary:")
disease_model.summary()

Binary Classification Model Summary:



Disease Classification Model Summary:
