In [None]:
import tensorflow as tf
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import sys

# Configuration
IMG_SIZE = 256
MODEL_PATH = "models/potato_disease_model.h5"
CLASS_NAMES = ['Early_Blight', 'Late_Blight', 'Healthy']

def load_and_preprocess_image(image_path):
    """Load and preprocess a single image"""
    img = Image.open(image_path)
    img = img.resize((IMG_SIZE, IMG_SIZE))
    img_array = np.array(img)
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
    return img_array, img

def predict_disease(image_path, model):
    """Predict disease from image"""
    img_array, original_img = load_and_preprocess_image(image_path)

    predictions = model.predict(img_array)
    predicted_class = CLASS_NAMES[np.argmax(predictions[0])]
    confidence = np.max(predictions[0]) * 100

    return predicted_class, confidence, predictions[0], original_img

def display_prediction(image_path, predicted_class, confidence, predictions, original_img):
    """Display image with prediction results"""
    plt.figure(figsize=(12, 5))

    # Display image
    plt.subplot(1, 2, 1)
    plt.imshow(original_img)
    plt.title(f"Predicted: {predicted_class}\nConfidence: {confidence:.2f}%")
    plt.axis('off')

    # Display prediction probabilities
    plt.subplot(1, 2, 2)
    plt.barh(CLASS_NAMES, predictions * 100)
    plt.xlabel('Confidence (%)')
    plt.title('Prediction Probabilities')
    plt.tight_layout()
    plt.show()

def main():
    if len(sys.argv) < 2:
        print("Usage: python predict.py <image_path>")
        print("Example: python predict.py data/test/sample_leaf.jpg")
        return

    image_path = sys.argv[1]

    print("Loading model...")
    model = tf.keras.models.load_model(MODEL_PATH)

    print(f"Predicting disease for: {image_path}")
    predicted_class, confidence, predictions, original_img = predict_disease(image_path, model)

    print(f"\n{'='*50}")
    print(f"Prediction: {predicted_class}")
    print(f"Confidence: {confidence:.2f}%")
    print(f"{'='*50}")
    print("\nAll class probabilities:")
    for class_name, prob in zip(CLASS_NAMES, predictions):
        print(f"  {class_name}: {prob*100:.2f}%")

    # Display visualization
    display_prediction(image_path, predicted_class, confidence, predictions, original_img)

if __name__ == "__main__":
    main()

In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import os

# --- Configuration Constants ---
# These should match your training script
IMG_SIZE = 256
BATCH_SIZE = 32
SEED = 42  # for reproducibility
VAL_SPLIT = 0.2  # 20% of training data for validation
TRAIN_DIR = "data/train"
TEST_DIR = "data/test" # Optional: if you have a separate test set


def build_augmentation_layer():
    """
    Creates a Sequential model with data augmentation layers.
    These augmentations are applied randomly to the training images
    to help prevent overfitting.
    """
    return tf.keras.Sequential([
        tf.keras.layers.RandomFlip("horizontal_and_vertical", seed=SEED),
        tf.keras.layers.RandomRotation(0.2, seed=SEED),
        tf.keras.layers.RandomZoom(0.2, seed=SEED),
        tf.keras.layers.RandomContrast(0.2, seed=SEED),
    ], name="data_augmentation")


def load_datasets(train_dir=TRAIN_DIR, img_size=IMG_SIZE, batch_size=BATCH_SIZE, val_split=VAL_SPLIT):
    """
    Loads, splits, and preprocesses the training and validation datasets.

    Uses `image_dataset_from_directory` to efficiently load images from
    the folder structure (e.g., data/train/Healthy, data/train/Late_Blight).

    Returns:
        tuple: (train_ds, val_ds, class_names)
    """
    print(f"Loading training and validation data from: {train_dir}")

    # Create the training dataset
    train_ds = tf.keras.utils.image_dataset_from_directory(
        train_dir,
        label_mode='categorical',  # Assumes multi-class classification
        validation_split=val_split,
        subset="training",
        seed=SEED,
        image_size=(img_size, img_size),
        batch_size=batch_size
    )

    # Create the validation dataset
    val_ds = tf.keras.utils.image_dataset_from_directory(
        train_dir,
        label_mode='categorical',
        validation_split=val_split,
        subset="validation",
        seed=SEED,
        image_size=(img_size, img_size),
        batch_size=batch_size
    )

    class_names = train_ds.class_names
    print(f"Classes found: {class_names}")

    return train_ds, val_ds, class_names


def load_test_dataset(test_dir=TEST_DIR, img_size=IMG_SIZE, batch_size=BATCH_SIZE):
    """
    Loads the test dataset.
    Assumes it's in a similar folder structure or a single folder.
    If no subfolders, set `labels=None`.
    """
    if not os.path.exists(test_dir):
        print(f"Test directory not found: {test_dir}. Skipping test set.")
        return None

    print(f"Loading test data from: {test_dir}")
    test_ds = tf.keras.utils.image_dataset_from_directory(
        test_dir,
        label_mode='categorical', # Change if your test set structure is different
        shuffle=False,      # No need to shuffle test data
        image_size=(img_size, img_size),
        batch_size=batch_size
    )
    return test_ds


def configure_datasets(train_ds, val_ds, test_ds=None, augment=True):
    """
    Applies data augmentation and performance optimizations (caching, prefetching).

    - Rescaling: Normalizes pixel values from [0, 255] to [0, 1].
    - Augmentation: Applies random transformations to the training set.
    - Prefetching: Loads the next batch of data while the GPU is busy.
    """

    # Define the Rescaling layer
    rescale_layer = tf.keras.layers.Rescaling(1./255)

    # Get the augmentation layer
    augmentation_layer = build_augmentation_layer()

    # --- Apply transformations ---

    # Apply rescaling to all datasets
    train_ds = train_ds.map(lambda x, y: (rescale_layer(x), y),
                            num_parallel_calls=tf.data.AUTOTUNE)
    val_ds = val_ds.map(lambda x, y: (rescale_layer(x), y),
                          num_parallel_calls=tf.data.AUTOTUNE)

    # Apply augmentation ONLY to the training dataset
    if augment:
        train_ds = train_ds.map(lambda x, y: (augmentation_layer(x, training=True), y),
                                num_parallel_calls=tf.data.AUTOTUNE)

    # Apply prefetching for performance
    train_ds = train_ds.prefetch(buffer_size=tf.data.AUTOTUNE)
    val_ds = val_ds.prefetch(buffer_size=tf.data.AUTOTUNE)

    if test_ds:
        test_ds = test_ds.map(lambda x, y: (rescale_layer(x), y),
                              num_parallel_calls=tf.data.AUTOTUNE)
        test_ds = test_ds.prefetch(buffer_size=tf.data.AUTOTUNE)
        return train_ds, val_ds, test_ds

    return train_ds, val_ds


def plot_sample_batch(dataset, class_names):
    """
    Visualizes a single batch of images from a dataset.
    Useful for checking if data augmentation is working.
    """
    plt.figure(figsize=(10, 10))
    for images, labels in dataset.take(1):  # Takes one batch
        for i in range(9):  # Display first 9 images
            ax = plt.subplot(3, 3, i + 1)
            plt.imshow(images[i].numpy().astype("uint8")) # Change to "float" if rescaled
            # Find the index of the '1' in the one-hot encoded label
            label_index = tf.argmax(labels[i])
            plt.title(class_names[label_index])
            plt.axis("off")
    plt.suptitle("Sample Batch Visualization")
    plt.show()


# --- Main execution block (for testing) ---
if __name__ == "__main__":
    """
    This block runs ONLY when you execute this script directly
    (e.g., `python src/preprocess.py`).
    It's useful for testing the script independently.
    """
    print("Testing preprocessing script...")

    # 1. Load the data
    train_dataset, val_dataset, class_names_list = load_datasets()

    # 2. Get a sample batch *before* augmentation to show
    # Note: We don't apply rescaling here, just for visualization
    print("Visualizing a sample batch *before* augmentation and rescaling...")
    plot_sample_batch(train_dataset, class_names_list)

    # 3. Configure datasets (apply rescaling and augmentation)
    # Set augment=True to see the augmented images
    train_dataset, val_dataset = configure_datasets(train_dataset, val_dataset, augment=True)

    # 4. Visualize a batch *after* augmentation
    # Note: Images will be rescaled (float 0-1), so we just show them.
    print("Visualizing a sample batch *after* augmentation and rescaling...")
    plt.figure(figsize=(10, 10))
    for images, labels in train_dataset.take(1):
        for i in range(9):
            ax = plt.subplot(3, 3, i + 1)
            plt.imshow(images[i].numpy())  # Images are now floats [0, 1]
            label_index = tf.argmax(labels[i])
            plt.title(class_names_list[label_index])
            plt.axis("off")
    plt.suptitle("Sample Batch (After Augmentation)")
    plt.show()

    print("\nPreprocessing script test complete.")
    print(f"Class Names: {class_names_list}")
    print(f"Train dataset element spec: {train_dataset.element_spec}")
    print(f"Validation dataset element spec: {val_dataset.element_spec}")

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import numpy as np
import os

# Configuration
IMG_SIZE = 256
BATCH_SIZE = 32
EPOCHS = 20
DATA_DIR = "data/train"

# Load and preprocess data
def load_data():
    train_ds = tf.keras.preprocessing.image_dataset_from_directory(
        DATA_DIR,
        validation_split=0.2,
        subset="training",
        seed=123,
        image_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH_SIZE
    )

    val_ds = tf.keras.preprocessing.image_dataset_from_directory(
        DATA_DIR,
        validation_split=0.2,
        subset="validation",
        seed=123,
        image_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH_SIZE
    )

    class_names = train_ds.class_names
    print(f"Classes found: {class_names}")

    return train_ds, val_ds, class_names

# Create CNN model
def create_model(num_classes):
    model = keras.Sequential([
        layers.Rescaling(1./255, input_shape=(IMG_SIZE, IMG_SIZE, 3)),

        layers.Conv2D(32, 3, activation='relu'),
        layers.MaxPooling2D(),

        layers.Conv2D(64, 3, activation='relu'),
        layers.MaxPooling2D(),

        layers.Conv2D(128, 3, activation='relu'),
        layers.MaxPooling2D(),

        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation='softmax')
    ])

    return model

# Main training function
def main():
    print("Loading data...")
    train_ds, val_ds, class_names = load_data()

    print("\nBuilding model...")
    model = create_model(len(class_names))

    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    print("\nModel Summary:")
    model.summary()

    print("\nTraining model...")
    history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=EPOCHS
    )

    # Save model
    model.save('models/potato_disease_model.h5')
    print("\nModel saved to models/potato_disease_model.h5")

    # Plot training history
    plt.figure(figsize=(12, 4))

    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.title('Model Accuracy')

    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.title('Model Loss')

    plt.tight_layout()
    plt.savefig('models/training_history.png')
    print("Training history plot saved to models/training_history.png")

if __name__ == "__main__":
    main()