# üåø Plant Disease Detection System - Training Notebook

This comprehensive Jupyter notebook covers the entire workflow for building a plant disease detection system using deep learning:

1. **Dataset Preparation**: Loading and preprocessing leaf images
2. **Model Training**: Training multiple CNN architectures (MobileNetV2, ResNet50, EfficientNetB0)
3. **Evaluation**: Analyzing model performance with metrics and visualizations
4. **Deployment**: Creating a Streamlit web application
5. **Integration**: Instructions for custom dataset integration and deployment

**Author**: AI Development Team  
**Date**: 2024  
**Framework**: TensorFlow/Keras

## Section 1: Import Required Libraries and Setup

In [None]:
# Import core deep learning libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
import numpy as np
import pandas as pd
import cv2
from pathlib import Path
import os
import json
import warnings
warnings.filterwarnings('ignore')

# Import visualization libraries
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.model_selection import train_test_split

# Configure for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Check TensorFlow version and GPU availability
print(f"TensorFlow Version: {tf.__version__}")
print(f"GPU Available: {tf.test.is_built_with_cuda()}")
if tf.config.list_physical_devices('GPU'):
    print(f"GPUs detected: {len(tf.config.list_physical_devices('GPU'))}")
    for gpu in tf.config.list_physical_devices('GPU'):
        print(f"  - {gpu}")
else:
    print("No GPU detected. Using CPU.")

# Set up matplotlib style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

## Section 2: Dataset Preparation and Preprocessing

Dataset Structure Expected:
```
dataset/
‚îú‚îÄ‚îÄ Healthy/
‚îÇ   ‚îú‚îÄ‚îÄ img1.jpg
‚îÇ   ‚îú‚îÄ‚îÄ img2.jpg
‚îÇ   ‚îî‚îÄ‚îÄ ...
‚îú‚îÄ‚îÄ Powdery_Mildew/
‚îÇ   ‚îú‚îÄ‚îÄ img1.jpg
‚îÇ   ‚îú‚îÄ‚îÄ img2.jpg
‚îÇ   ‚îî‚îÄ‚îÄ ...
‚îú‚îÄ‚îÄ Leaf_Spot/
‚îÇ   ‚îú‚îÄ‚îÄ img1.jpg
‚îÇ   ‚îú‚îÄ‚îÄ img2.jpg
‚îÇ   ‚îî‚îÄ‚îÄ ...
```

In [None]:
# Configuration parameters
IMG_SIZE = 224
CHANNELS = 3
BATCH_SIZE = 32

# Set your data directory here
DATA_DIR = "../data/raw_data"  # Change this to your dataset path

# Check if data directory exists
if not os.path.exists(DATA_DIR):
    print(f"‚ö†Ô∏è Data directory not found at {DATA_DIR}")
    print("Please ensure your dataset is organized as shown above.")
else:
    print(f"‚úì Data directory found at {DATA_DIR}")

# Function to load dataset from folder structure
def load_dataset_from_folder(data_dir, img_size=224):
    """
    Load all images from nested folder structure (class folders)
    Returns: images array, labels array, class names list
    """
    images = []
    labels = []
    class_names = []
    
    # Get all disease class directories
    disease_dirs = sorted([d for d in os.listdir(data_dir) 
                          if os.path.isdir(os.path.join(data_dir, d))])
    
    class_names = disease_dirs
    print(f"Found {len(class_names)} disease classes: {class_names}\n")
    
    # Load images from each disease folder
    for class_idx, disease_name in enumerate(disease_dirs):
        disease_path = os.path.join(data_dir, disease_name)
        image_files = [f for f in os.listdir(disease_path) 
                      if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.gif'))]
        
        print(f"Loading {len(image_files)} images from {disease_name}...")
        
        for img_file in image_files:
            try:
                img_path = os.path.join(disease_path, img_file)
                # Read image
                img = cv2.imread(img_path)
                if img is not None:
                    # Convert BGR to RGB
                    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                    # Resize
                    img = cv2.resize(img, (img_size, img_size))
                    # Normalize to [0, 1]
                    img = img.astype(np.float32) / 255.0
                    
                    images.append(img)
                    labels.append(class_idx)
            except Exception as e:
                print(f"Error loading {img_file}: {e}")
    
    images = np.array(images)
    labels = np.array(labels)
    
    print(f"\n‚úì Loaded {len(images)} images total")
    print(f"Image shape: {images.shape}")
    print(f"Label distribution:\n{pd.Series(labels).value_counts().sort_index()}\n")
    
    return images, labels, class_names

# Load dataset (uncomment after providing data)
# images, labels, class_names = load_dataset_from_folder(DATA_DIR)
print("Ready to load dataset. Update DATA_DIR path and uncomment the load_dataset_from_folder() call.")

## Section 3: Data Augmentation and Train/Validation/Test Split