In [4]:
# ==============================================================================
# STANDALONE SCRIPT FOR PAPAYA LEAF DISEASE CLASSIFICATION (Vision Transformer)
#
# Final Fix: Force-upgrades keras-cv to the latest version to resolve
# the AttributeError and then uses the modern 'VisionTransformer' class name.
# ==============================================================================

# --- 0. INSTALL LIBRARIES ---
# The '--upgrade' flag is the critical fix. It ensures we have the latest
# version of the library, aligning the environment with the code.
# print("--- Force-upgrading KerasCV to the latest version ---")
# !pip install --upgrade keras-cv -q

import os
import shutil
import random
import pathlib
import numpy as np
import tensorflow as tf
import keras_cv
import keras_hub
import tensorflow_text
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix

print(f"\n--- Using TensorFlow Version: {tf.__version__} ---")
print(f"--- Using KerasCV Version: {keras_cv.__version__} ---") # Should now be a newer version


# --- 1. SETUP AND CONSTANTS ---
print("\n--- Section 1: Initializing Setup ---")
SEED = 42
tf.keras.utils.set_random_seed(SEED)
ORIGINAL_DATA_DIR = pathlib.Path('BDPapayaLeaf/Original Images')
SPLIT_BASE_DIR = pathlib.Path('papaya_data_split_vit')
CLASS_NAMES = sorted([item.name for item in ORIGINAL_DATA_DIR.glob('*') if item.is_dir()])
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS = 25
LEARNING_RATE = 0.0001
NUM_CLASSES = len(CLASS_NAMES)


# --- 2. DATA PREPARATION: STRATIFIED SPLIT ---
print("\n--- Section 2: Splitting Data ---")
if SPLIT_BASE_DIR.exists():
    shutil.rmtree(SPLIT_BASE_DIR)
train_dir, val_dir, test_dir = (SPLIT_BASE_DIR / d for d in ['train', 'validation', 'test'])
for d in [train_dir, val_dir, test_dir]: os.makedirs(d)
for class_name in CLASS_NAMES:
    for d in [train_dir, val_dir, test_dir]: os.makedirs(d / class_name)
    image_files = list((ORIGINAL_DATA_DIR / class_name).glob('*.jpg'))
    random.Random(SEED).shuffle(image_files)
    train_split, val_split = int(len(image_files) * 0.70), int(len(image_files) * 0.85)
    for i, f in enumerate(image_files):
        if i < train_split: shutil.copy(f, train_dir / class_name)
        elif i < val_split: shutil.copy(f, val_dir / class_name)
        else: shutil.copy(f, test_dir / class_name)
print("Data splitting complete.")


# --- 3. DATA PIPELINES (tf.data) ---
print("\n--- Section 3: Creating Data Pipelines ---")
train_ds = tf.keras.utils.image_dataset_from_directory(
    train_dir, image_size=IMG_SIZE, batch_size=BATCH_SIZE, label_mode='categorical', shuffle=True, seed=SEED
)
val_ds = tf.keras.utils.image_dataset_from_directory(
    val_dir, image_size=IMG_SIZE, batch_size=BATCH_SIZE, label_mode='categorical', shuffle=False
)
test_ds = tf.keras.utils.image_dataset_from_directory(
    test_dir, image_size=IMG_SIZE, batch_size=BATCH_SIZE, label_mode='categorical', shuffle=False
)
AUTOTUNE = tf.data.AUTOTUNE
train_ds, val_ds, test_ds = (ds.cache().prefetch(buffer_size=AUTOTUNE) for ds in [train_ds, val_ds, test_ds])
print("Data pipelines created.")


# --- 4. VISUALIZATION HELPER FUNCTIONS ---
def plot_history(history, model_name):
    acc, val_acc, loss, val_loss = history.history['accuracy'], history.history['val_accuracy'], history.history['loss'], history.history['val_loss']
    plt.figure(figsize=(12, 5)); plt.subplot(1, 2, 1)
    plt.plot(acc, label='Training Accuracy'); plt.plot(val_acc, label='Validation Accuracy')
    plt.legend(); plt.title(f'{model_name} Accuracy')
    plt.subplot(1, 2, 2)
    plt.plot(loss, label='Training Loss'); plt.plot(val_loss, label='Validation Loss')
    plt.legend(); plt.title(f'{model_name} Loss'); plt.show()

def plot_confusion_matrix(y_true, y_pred, class_names):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6)); sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.xlabel('Predicted Label'); plt.ylabel('True Label'); plt.title('Confusion Matrix'); plt.show()


# --- 5. MODEL BUILDING (ViT-Base) ---
print("\n--- Section 5: Building ViT-Base Model ---")
tf.keras.backend.clear_session()

# Reverting to the modern, correct name now that the library is upgraded.
model_vit = keras_hub.models.ImageClassifier.from_preset(
    "vit_base_patch16_224_imagenet",
    num_classes=NUM_CLASSES
)

model_vit.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)
print("ViT-Base model built and compiled.")
model_vit.summary()


# --- 6. MODEL TRAINING & SAVING ---
print("\n--- Section 6: Starting Model Training ---")
history_vit = model_vit.fit(
    train_ds, epochs=EPOCHS, validation_data=val_ds,
    callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)]
)
print("\n--- Saving ViT-Base model ---")
model_vit.save("papaya_disease_vit_base.keras")
print("Model saved to papaya_disease_vit_base.keras")


# --- 7. MODEL EVALUATION ---
print("\n--- Section 7: Evaluating Model Performance ---")
plot_history(history_vit, "ViT-Base")
y_pred_probs = model_vit.predict(test_ds)
y_pred_labels = np.argmax(y_pred_probs, axis=1)
y_true_labels = np.argmax(np.concatenate([y for x, y in test_ds]), axis=1)
print("\n--- ViT-Base Classification Report ---\n", classification_report(y_true_labels, y_pred_labels, target_names=CLASS_NAMES))
print("\n--- Confusion Matrix ---")
plot_confusion_matrix(y_true_labels, y_pred_labels, CLASS_NAMES)
print("\n--- Script Finished ---")

ModuleNotFoundError: No module named 'tensorflow_text'

In [4]:

import tensorflow
import keras_cv
print(f"Using TensorFlow Version: {tensorflow.__version__}")
print(f"Using KerasCV Version: {keras_cv.__version__}")


Using TensorFlow Version: 2.20.0
Using KerasCV Version: 0.9.0
