In [None]:
# Install required packages
!pip install tensorflow==2.13.0
!pip install tensorflow_addons shap  matplotlib seaborn pandas numpy scikit-learn
#gradcam

In [1]:
import tensorflow as tf
import tensorflow_addons as tfa
import numpy as np
import os
import shap
import datetime
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import KFold
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

# Update the dataset path for local system
BASE_PATH = "C:/Users/Hassan/Documents/Colocasia_Dataset/Dataset"
CLASS_NAMES = [
    'Disease_Leaf_Blight_Dorsal',
    'Disease_Leaf_Blight_Ventral',
    'Disease_Mosaic_Dorsal',
    'Disease_Mosaic_Ventral',
    'Healthy_Dorsal',
    'Healthy_Ventral'
]

CONFIG = {
    'BATCH_SIZE': 32,
    'IMAGE_SIZE': (224, 224),
    'NUM_CLASSES': len(CLASS_NAMES),
    'EPOCHS': 50,
    'K_FOLDS': 6,
    'SEED': 42,
    'LEARNING_RATE': 1e-4
}

TIMESTAMP = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
CHECKPOINT_DIR = "C:/Users/Hassan/Documents/colocasia_checkpoints"
RESULTS_DIR = "C:/Users/Hassan/Documents/colocasia_results"
os.makedirs(CHECKPOINT_DIR, exist_ok=True)
os.makedirs(RESULTS_DIR, exist_ok=True)

def get_all_image_paths():
    image_paths, labels = []
    
    for label, class_name in enumerate(CLASS_NAMES):
        class_path = os.path.join(BASE_PATH, class_name)
        for subdir in ['augmented_images', 'original_images']:
            subdir_path = os.path.join(class_path, subdir)
            if os.path.exists(subdir_path):
                for filename in os.listdir(subdir_path):
                    if filename.endswith(('.jpg', '.jpeg', '.png')):
                        image_paths.append(os.path.join(subdir_path, filename))
                        labels.append(label)
    
    return image_paths, labels

def load_and_prepare_dataset():
    image_paths, labels = get_all_image_paths()
    if not image_paths:
        print("❌ No images found! Check dataset structure.")
        return None, None

    images = [tf.keras.preprocessing.image.img_to_array(
        tf.keras.preprocessing.image.load_img(img_path, target_size=CONFIG['IMAGE_SIZE'])
    ) / 255.0 for img_path in image_paths]
    
    return np.array(images), np.array(labels)

def create_convnext_model():
    base_model = tf.keras.applications.ConvNeXtBase(
        include_top=False, weights="imagenet",
        input_shape=(*CONFIG['IMAGE_SIZE'], 3), pooling='avg'
    )
    base_model.trainable = True
    for layer in base_model.layers[:-30]:
        layer.trainable = False

    model = tf.keras.Sequential([
        base_model,
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dense(512, activation="swish"),
        tf.keras.layers.Dropout(0.4),
        tf.keras.layers.Dense(CONFIG['NUM_CLASSES'], activation="softmax")
    ])

    model.compile(
        optimizer=tf.keras.optimizers.AdamW(
            learning_rate=CONFIG['LEARNING_RATE'], weight_decay=0.0001
        ),
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy", tfa.metrics.F1Score(num_classes=CONFIG['NUM_CLASSES'], average="weighted")]
    )
    return model

def train_and_evaluate():
    images, labels = load_and_prepare_dataset()
    if images is None:
        return

    kf = KFold(n_splits=CONFIG['K_FOLDS'], shuffle=True, random_state=CONFIG['SEED'])
    histories = []

    for fold, (train_idx, val_idx) in enumerate(kf.split(images)):
        print(f"\n🚀 Training Fold {fold + 1}/{CONFIG['K_FOLDS']}")
        model_path = os.path.join(CHECKPOINT_DIR, f'model_fold_{fold+1}.h5')

        model = create_convnext_model()
        if os.path.exists(model_path):
            print(f"🔄 Resuming training from {model_path}")
            model.load_weights(model_path)

        train_ds = tf.data.Dataset.from_tensor_slices((images[train_idx], labels[train_idx]))
        val_ds = tf.data.Dataset.from_tensor_slices((images[val_idx], labels[val_idx]))

        train_ds = train_ds.batch(CONFIG['BATCH_SIZE'])
        val_ds = val_ds.batch(CONFIG['BATCH_SIZE'])

        callbacks = [
            ModelCheckpoint(model_path, monitor='val_f1_score', save_best_only=True),
            EarlyStopping(monitor='val_f1_score', patience=5, restore_best_weights=True),
            ReduceLROnPlateau(monitor='val_f1_score', factor=0.2, patience=3)
        ]

        history = model.fit(train_ds, validation_data=val_ds, epochs=CONFIG['EPOCHS'], callbacks=callbacks, verbose=1)
        histories.append(history.history)
        plot_fold_results(history, fold)
        tf.keras.backend.clear_session()

def plot_fold_results(history, fold):
    metrics = ['accuracy', 'loss', 'f1_score']
    plt.figure(figsize=(15, 5))
    for i, metric in enumerate(metrics):
        plt.subplot(1, 3, i+1)
        plt.plot(history.history[metric], label=f'Training {metric}')
        plt.plot(history.history[f'val_{metric}'], label=f'Validation {metric}')
        plt.title(f'Model {metric} - Fold {fold+1}')
        plt.xlabel('Epoch')
        plt.ylabel(metric)
        plt.legend()
    plt.tight_layout()
    plt.savefig(os.path.join(RESULTS_DIR, f'metrics_fold_{fold+1}.png'))
    plt.close()

if __name__ == "__main__":
    print("🌿 Starting Colocasia Plant Disease Classification Training")
    train_and_evaluate()


ModuleNotFoundError: No module named 'tensorflow_addons'

In [None]:

from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Dataset Configuration
BASE_PATH = "/content/drive/MyDrive/Colocasia Dataset/Dataset"
CLASS_NAMES = [
    'Disease_Leaf_Blight_Dorsal',
    'Disease_Leaf_Blight_Ventral',
    'Disease_Mosaic_Dorsal',
    'Disease_Mosaic_Ventral',
    'Healthy_Dorsal',
    'Healthy_Ventral'
]

CONFIG = {
    'BATCH_SIZE': 32,
    'IMAGE_SIZE': (224, 224),
    'NUM_CLASSES': len(CLASS_NAMES),
    'EPOCHS': 50,
    'K_FOLDS': 6,
    'SEED': 42,
    'LEARNING_RATE': 1e-4
}

# Create timestamp for unique run identification
TIMESTAMP = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

# Setup output directories
CHECKPOINT_DIR = '/content/drive/MyDrive/colocasia_checkpoints'
RESULTS_DIR = '/content/drive/MyDrive/colocasia_results'
os.makedirs(CHECKPOINT_DIR, exist_ok=True)
os.makedirs(RESULTS_DIR, exist_ok=True)

def get_all_image_paths():
    """Retrieve all image paths inside augmented_images and original_images folders."""
    image_paths = []
    labels = []

    for label, class_name in enumerate(CLASS_NAMES):
        class_path = os.path.join(BASE_PATH, class_name)
        for subdir in ['augmented_images', 'original_images']:
            subdir_path = os.path.join(class_path, subdir)
            if os.path.exists(subdir_path):
                for filename in os.listdir(subdir_path):
                    if filename.endswith(('.jpg', '.jpeg', '.png')):
                        image_paths.append(os.path.join(subdir_path, filename))
                        labels.append(label)

    return image_paths, labels

def load_and_prepare_dataset():
    """Load dataset paths and preprocess images."""
    print("\nLoading dataset...")

    image_paths, labels = get_all_image_paths()
    if not image_paths:
        print("❌ No images found! Check dataset structure.")
        return None, None

    # Load and preprocess images
    images = []
    for img_path in image_paths:
        img = tf.keras.preprocessing.image.load_img(img_path, target_size=CONFIG['IMAGE_SIZE'])
        img = tf.keras.preprocessing.image.img_to_array(img) / 255.0
        images.append(img)

    return np.array(images), np.array(labels)

def create_convnext_model():
    """Create and compile ConvNeXt model."""
    base_model = tf.keras.applications.ConvNeXtBase(
        include_top=False,
        weights="imagenet",
        input_shape=(*CONFIG['IMAGE_SIZE'], 3),
        pooling='avg'
    )

    base_model.trainable = True
    for layer in base_model.layers[:-30]:
        layer.trainable = False

    model = tf.keras.Sequential([
        base_model,
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dense(512, activation="swish"),
        tf.keras.layers.Dropout(0.4),
        tf.keras.layers.Dense(CONFIG['NUM_CLASSES'], activation="softmax")
    ])

    model.compile(
        optimizer=tf.keras.optimizers.AdamW(
            learning_rate=CONFIG['LEARNING_RATE'],
            weight_decay=0.0001
        ),
        loss="sparse_categorical_crossentropy",
        metrics=[
            "accuracy",
            tfa.metrics.F1Score(num_classes=CONFIG['NUM_CLASSES'], average="weighted")
        ]
    )

    return model

def train_and_evaluate():
    """Main training and evaluation function."""
    images, labels = load_and_prepare_dataset()
    if images is None:
        return

    print(f"\n✓ Loaded {len(images)} images")
    print(f"✓ Class distribution: {np.bincount(labels)}")

    kf = KFold(n_splits=CONFIG['K_FOLDS'], shuffle=True, random_state=CONFIG['SEED'])
    histories = []

    for fold, (train_idx, val_idx) in enumerate(kf.split(images)):
        print(f"\n🚀 Training Fold {fold + 1}/{CONFIG['K_FOLDS']}")

        train_ds = tf.data.Dataset.from_tensor_slices(
            (images[train_idx], labels[train_idx])
        ).batch(CONFIG['BATCH_SIZE'])

        val_ds = tf.data.Dataset.from_tensor_slices(
            (images[val_idx], labels[val_idx])
        ).batch(CONFIG['BATCH_SIZE'])

        model = create_convnext_model()

        callbacks = [
            ModelCheckpoint(
                os.path.join(CHECKPOINT_DIR, f'model_fold_{fold+1}.h5'),
                monitor='val_f1_score',
                save_best_only=True
            ),
            EarlyStopping(monitor='val_f1_score', patience=5, restore_best_weights=True),
            ReduceLROnPlateau(monitor='val_f1_score', factor=0.2, patience=3)
        ]

        history = model.fit(
            train_ds,
            validation_data=val_ds,
            epochs=CONFIG['EPOCHS'],
            callbacks=callbacks,
            verbose=1
        )

        histories.append(history.history)
        plot_fold_results(history, fold)

        if fold == CONFIG['K_FOLDS'] - 1:
            generate_shap_analysis(model, images[:100])

        tf.keras.backend.clear_session()

def plot_fold_results(history, fold):
    """Plot training results for each fold."""
    metrics = ['accuracy', 'loss', 'f1_score']
    plt.figure(figsize=(15, 5))

    for i, metric in enumerate(metrics):
        plt.subplot(1, 3, i+1)
        plt.plot(history.history[metric], label=f'Training {metric}')
        plt.plot(history.history[f'val_{metric}'], label=f'Validation {metric}')
        plt.title(f'Model {metric} - Fold {fold+1}')
        plt.xlabel('Epoch')
        plt.ylabel(metric)
        plt.legend()

    plt.tight_layout()
    plt.savefig(os.path.join(RESULTS_DIR, f'metrics_fold_{fold+1}.png'))
    plt.close()

if __name__ == "__main__":
    print("🌿 Starting Colocasia Plant Disease Classification Training")
    train_and_evaluate()
