# Disease detection from images using CNN

The data set was retrieved from [Plant Village on Kaggle](https://www.kaggle.com/datasets/emmarex/plantdisease).

In [None]:
# CELL 1
#Import necessary libraries
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models

In [None]:
#CELL 2
!gdown --id 1NpuJFX3XqOVE69sO4OXKERgy8jueTXrO --output data.zip

# Unzip the dataset
!unzip -q data.zip

In [None]:
#CELL 3
# Define image dimensions
img_width, img_height = 150, 150
target_size = (img_width, img_height)
batch_size = 32

# Set directories
train_dir = '/content/train'
validation_dir = '/content/validation'

# Data augmentation for training data
train_datagen = ImageDataGenerator(
    rescale=1./255,            # Rescale pixel values from [0, 255] to [0, 1]
    rotation_range=40,         # Random rotation between 0 and 40 degrees
    width_shift_range=0.2,     # Random horizontal shift
    height_shift_range=0.2,    # Random vertical shift
    shear_range=0.2,           # Shear transformations
    zoom_range=0.2,            # Zoom in/out
    horizontal_flip=True,      # Randomly flip images horizontally
    fill_mode='nearest'        # Fill in missing pixels after transformations
)

# Validation data should not be augmented
validation_datagen = ImageDataGenerator(rescale=1./255)

# Create generators
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=target_size,
    batch_size=batch_size,
    class_mode='binary'  # Since it's a binary classification problem
)

validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=target_size,
    batch_size=batch_size,
    class_mode='binary',
    shuffle=False  # Important to keep data in the same order for evaluation
)

In [None]:
#CELL 4
# Visualize some healthy and diseased images
def plot_sample_images(generator, class_names, title):
    x_batch, y_batch = next(generator)
    plt.figure(figsize=(10, 10))
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(x_batch[i])
        class_idx = int(y_batch[i])
        plt.title(class_names[class_idx])
        plt.axis('off')
    plt.suptitle(title)
    plt.show()

# Get class indices
class_indices = train_generator.class_indices
# Mapping from class index to class label
class_names = {v: k for k, v in class_indices.items()}

# Plot sample images from training data
plot_sample_images(train_generator, class_names, 'Sample Training Images')

In [None]:
#CELL 5
# Initialize the model
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(img_width, img_height, 3)),
    layers.MaxPooling2D((2, 2)),

    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),

    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),

    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.Dense(1, activation='sigmoid')  # Binary classification
])

# Compile the model
model.compile(
    loss='binary_crossentropy',
    optimizer=tf.keras.optimizers.RMSprop(learning_rate=1e-4),
    metrics=['accuracy']
)

# View model summary
model.summary()

In [None]:
#CELL 6
# --------------------------------------------------
# Configure training
# --------------------------------------------------
# @title 🚀 Training settings
EPOCHS = 10  # @param {type:"slider", min:1, max:50, step:1}
print(f"Training for {EPOCHS} epoch(s)…")

Training for 50 epoch(s)…


In [None]:
# CELL 7
# Fit the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    epochs=EPOCHS,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // batch_size
)

In [None]:
# CELL 8
# Plot training & validation accuracy/loss
def plot_training_history(history):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']

    loss = history.history['loss']
    val_loss = history.history['val_loss']

    epochs_range = range(len(acc))

    plt.figure(figsize=(12, 4))
    plt.subplot(1, 2, 1)
    plt.plot(epochs_range, acc, label='Training Accuracy')
    plt.plot(epochs_range, val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.title('Training and Validation Accuracy')

    plt.subplot(1, 2, 2)
    plt.plot(epochs_range, loss, label='Training Loss')
    plt.plot(epochs_range, val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.title('Training and Validation Loss')
    plt.show()

plot_training_history(history)

In [None]:
# CELL 9
# Evaluate on validation data
validation_loss, validation_accuracy = model.evaluate(validation_generator)
print(f'Test loss: {validation_loss}')
print(f'Test accuracy: {validation_accuracy}')

# Make predictions on validation data
validation_generator.reset()  # Reset the generator
predictions = model.predict(validation_generator, steps=validation_generator.samples // batch_size + 1)
predicted_classes = (predictions > 0.5).astype(int).reshape(-1)

# Get true labels
true_classes = validation_generator.classes
class_labels = list(validation_generator.class_indices.keys())

# Identify correctly and incorrectly classified images
correct = np.where(predicted_classes == true_classes)[0]
incorrect = np.where(predicted_classes != true_classes)[0]

# Function to plot images
def plot_images(indices, generator, title):
    plt.figure(figsize=(15, 15))
    for i, idx in enumerate(indices[:9]):  # Plot up to 9 images
        ax = plt.subplot(3, 3, i + 1)
        img_path = validation_generator.filepaths[idx]
        img = tf.keras.preprocessing.image.load_img(img_path, target_size=target_size)
        img_array = tf.keras.preprocessing.image.img_to_array(img) / 255.0
        plt.imshow(img_array)
        pred_label = class_names[predicted_classes[idx]]
        true_label = class_names[true_classes[idx]]
        plt.title(f'True: {true_label}\nPredicted: {pred_label}')
        plt.axis('off')
    plt.suptitle(title)
    plt.show()

# Plot some correctly classified images
plot_images(correct, validation_generator, 'Correctly Classified Images')

In [None]:
# CELL 10
# Plot some incorrectly classified images
plot_images(incorrect, validation_generator, 'Incorrectly Classified Images')

### Hands-On Section: Choosing the Number of Epochs

The notebook begins with 10 Epochs. Pick several Epochs to improve the performance of the model on the test data

### Test your model!

Upload a leaf photo and get a prediction

In [None]:
# CELL 11
# --------------------------------------------------
# 🔍 Upload your own leaf image and predict
# --------------------------------------------------
from google.colab import files
from tensorflow.keras.preprocessing import image as keras_image

# 1. Upload
print("Select a JPG/PNG of a single leaf…")
uploaded = files.upload()   # opens a file-picker in the browser

# 2. Pre-process & predict
for fname in uploaded.keys():
    # Load -> resize -> scale
    img = keras_image.load_img(fname, target_size=target_size)
    img_arr = keras_image.img_to_array(img) / 255.0
    img_arr = np.expand_dims(img_arr, axis=0)           # shape (1, H, W, 3)

    # Predict
    score = model.predict(img_arr)[0][0]                # sigmoid output
    label = "Diseased" if score > 0.5 else "Healthy"

    # 3. Show result
    plt.figure(figsize=(4,4))
    plt.imshow(img)
    plt.axis("off")
    plt.title(f"Predicted: {label}  (score={score:.2f})")
    plt.show()