In [None]:
import os
import numpy as np
import pandas as pd
import kagglehub
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam

# Set dataset path (replace with your actual path)
dataset_path = kagglehub.dataset_download("nirmalsankalana/rice-leaf-disease-image")

# Check if dataset path is valid
if not os.path.exists(dataset_path):
    raise ValueError("Dataset path does not exist.")

# Define parameters
image_size = (224, 224)
batch_size = 32

# Load dataset and create labels (assuming images are organized in folders by disease type)
diseases = ['Bacterial_Blight', 'Blast', 'Brown_Spot', 'Tungro']
data = []

for disease in diseases:
    disease_path = os.path.join(dataset_path, disease)

    # Check if disease directory exists
    if not os.path.exists(disease_path):
        print(f"Warning: Disease directory '{disease_path}' does not exist.")
        continue

    images = os.listdir(disease_path)

    for img in images:
        data.append((os.path.join(disease_path, img), disease))

# Create a DataFrame
df = pd.DataFrame(data, columns=['image_path', 'label'])

# Print total images and class distribution for debugging
print("Total images loaded:", len(df))
print("Class distribution:\n", df['label'].value_counts())

# Train-Test Split: Keep first 100 even-numbered images for each disease in test set
test_data = []
train_data = []

for disease in diseases:
    disease_images = df[df['label'] == disease]

    # Ensure there are enough images to select from
    if len(disease_images) < 100:
        print(f"Warning: Not enough images for {disease}. Available: {len(disease_images)}")
        continue

    even_images = disease_images.iloc[::2].head(100)  # Select first 100 even-indexed images

    test_data.append(even_images)
    train_data.append(disease_images.drop(even_images.index))

if not test_data or not train_data:
    raise ValueError("No valid data found for training or testing.")

test_df = pd.concat(test_data)
train_df = pd.concat(train_data)

# Data Generators
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_dataframe(
    train_df,
    x_col='image_path',
    y_col='label',
    target_size=image_size,
    class_mode='categorical',
    batch_size=batch_size)

test_generator = test_datagen.flow_from_dataframe(
    test_df,
    x_col='image_path',
    y_col='label',
    target_size=image_size,
    class_mode='categorical',
    batch_size=batch_size)

# Determine the number of classes dynamically
num_classes = len(df['label'].unique())  # Dynamically set number of classes

# Transfer Learning with VGG16
def create_vgg16_model():
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

    model = Sequential()
    model.add(base_model)
    model.add(GlobalAveragePooling2D())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))  # Use num_classes instead of len(diseases)

    # Freeze the base model layers
    for layer in base_model.layers:
        layer.trainable = False

    return model

vgg16_model = create_vgg16_model()
vgg16_model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

# Train VGG16 Model
history_vgg16 = vgg16_model.fit(train_generator, validation_data=test_generator, epochs=1)

# Custom CNN Model
def create_custom_cnn():
    model = Sequential()

    # Convolutional Layers
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    # Fully Connected Layers
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))

    model.add(Dense(num_classes, activation='softmax'))  # Use num_classes instead of len(diseases)

    return model

custom_cnn_model = create_custom_cnn()
custom_cnn_model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

# Train Custom CNN Model
history_custom_cnn = custom_cnn_model.fit(train_generator, validation_data=test_generator, epochs=1)

# Plotting Results
def plot_history(history_vgg16, history_custom_cnn):
    plt.figure(figsize=(12, 4))

    # Accuracy Plot
    plt.subplot(1, 2, 1)
    plt.plot(history_vgg16.history['accuracy'], label='VGG16 Train')
    plt.plot(history_vgg16.history['val_accuracy'], label='VGG16 Validation')

    plt.plot(history_custom_cnn.history['accuracy'], label='Custom CNN Train')
    plt.plot(history_custom_cnn.history['val_accuracy'], label='Custom CNN Validation')

    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epochs')
    plt.legend()

    # Loss Plot
    plt.subplot(1, 2, 2)
    plt.plot(history_vgg16.history['loss'], label='VGG16 Train')
    plt.plot(history_vgg16.history['val_loss'], label='VGG16 Validation')

    plt.plot(history_custom_cnn.history['loss'], label='Custom CNN Train')
    plt.plot(history_custom_cnn.history['val_loss'], label='Custom CNN Validation')

    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epochs')

plt.legend()
plt.show()

plot_history(history_vgg16, history_custom_cnn)