In [1]:
import os
import zipfile
import shutil
from sklearn.model_selection import train_test_split



In [2]:
extract_path = "/kaggle/input/plant-leaf/Plant_leave_diseases_dataset_without_augmentation"
output_dir = '/kaggle/working/'

In [3]:
# Create output subdirectories for train, valid, and test
subdirs = ['train', 'valid', 'test']
for subdir in subdirs:
    os.makedirs(os.path.join(output_dir, subdir), exist_ok=True)

# Split ratios
train_ratio = 0.7
valid_ratio = 0.2
test_ratio = 0.1

# Process each class folder
for class_name in os.listdir(extract_path):
    class_path = os.path.join(extract_path, class_name)
    
    # Check if it's a directory
    if os.path.isdir(class_path):
        # Get all image filenames in the folder
        images = [f for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))]
        
        if not images:
            print(f"No images found in {class_name}, skipping...")
            continue
        
        # Split the dataset into train, valid, and test
        train_files, temp_files = train_test_split(images, test_size=(1 - train_ratio), random_state=42)
        valid_files, test_files = train_test_split(temp_files, test_size=(test_ratio / (valid_ratio + test_ratio)), random_state=42)
        
        # Copy files to respective directories
        for split, split_files in zip(subdirs, [train_files, valid_files, test_files]):
            split_class_dir = os.path.join(output_dir, split, class_name)
            os.makedirs(split_class_dir, exist_ok=True)
            
            for file_name in split_files:
                src = os.path.join(class_path, file_name)
                dest = os.path.join(split_class_dir, file_name)
                shutil.copy(src, dest)

print("Dataset has been successfully split into train, valid, and test sets!")




Dataset has been successfully split into train, valid, and test sets!


In [4]:
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Paths to split dataset directories
base_dir = "/kaggle/working/"
train_dir = os.path.join(base_dir, 'train')
valid_dir = os.path.join(base_dir, 'valid')
test_dir = os.path.join(base_dir, 'test')

# Image dimensions and batch size
img_height = 224
img_width = 224
batch_size = 32

# Data Generators for Training, Validation, and Test Sets (Only Rescaling)
data_gen = ImageDataGenerator(rescale=1.0/255.0)

# Loading Data with Data Generators
train_generator = data_gen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical' 
)

valid_generator = data_gen.flow_from_directory(
    valid_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical'
)

test_generator = data_gen.flow_from_directory(
    test_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False  
    
)

# Display Class Indices
class_indices = train_generator.class_indices
print("Class Indices:", class_indices)


Found 38791 images belonging to 39 classes.
Found 11087 images belonging to 39 classes.
Found 5570 images belonging to 39 classes.
Class Indices: {'Apple___Apple_scab': 0, 'Apple___Black_rot': 1, 'Apple___Cedar_apple_rust': 2, 'Apple___healthy': 3, 'Background_without_leaves': 4, 'Blueberry___healthy': 5, 'Cherry___Powdery_mildew': 6, 'Cherry___healthy': 7, 'Corn___Cercospora_leaf_spot Gray_leaf_spot': 8, 'Corn___Common_rust': 9, 'Corn___Northern_Leaf_Blight': 10, 'Corn___healthy': 11, 'Grape___Black_rot': 12, 'Grape___Esca_(Black_Measles)': 13, 'Grape___Leaf_blight_(Isariopsis_Leaf_Spot)': 14, 'Grape___healthy': 15, 'Orange___Haunglongbing_(Citrus_greening)': 16, 'Peach___Bacterial_spot': 17, 'Peach___healthy': 18, 'Pepper,_bell___Bacterial_spot': 19, 'Pepper,_bell___healthy': 20, 'Potato___Early_blight': 21, 'Potato___Late_blight': 22, 'Potato___healthy': 23, 'Raspberry___healthy': 24, 'Soybean___healthy': 25, 'Squash___Powdery_mildew': 26, 'Strawberry___Leaf_scorch': 27, 'Strawberry

In [5]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, roc_auc_score
import numpy as np

In [6]:
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(train_generator.class_indices), activation='softmax')  # Number of classes
])

In [7]:
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [8]:
history = model.fit(
    train_generator,
    validation_data=valid_generator,
    epochs=10,  
    verbose=1
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [9]:
test_loss, test_accuracy = model.evaluate(test_generator, verbose=1)
print(f"Test Accuracy: {test_accuracy:.4f}")

Test Accuracy: 0.9294


In [10]:
# Predictions and true labels
y_pred_probs = model.predict(test_generator, verbose=1)
y_pred_classes = np.argmax(y_pred_probs, axis=1)
y_true = test_generator.classes

# Metrics: Classification Report
print("\nClassification Report:")
class_labels = list(test_generator.class_indices.keys())
print(classification_report(y_true, y_pred_classes, target_names=class_labels))


Classification Report:
                                               precision    recall  f1-score   support

                           Apple___Apple_scab       0.79      0.91      0.85        64
                            Apple___Black_rot       0.91      0.97      0.94        63
                     Apple___Cedar_apple_rust       1.00      0.75      0.86        28
                              Apple___healthy       0.80      0.98      0.88       165
                    Background_without_leaves       0.94      0.99      0.97       115
                          Blueberry___healthy       0.89      0.98      0.93       151
                      Cherry___Powdery_mildew       0.92      0.92      0.92       106
                             Cherry___healthy       0.94      0.94      0.94        86
   Corn___Cercospora_leaf_spot Gray_leaf_spot       0.76      0.75      0.76        52
                           Corn___Common_rust       0.98      0.99      0.99       120
                  

In [11]:
# Compute precision, recall, F1-score, and AUC
from sklearn.metrics import precision_score, recall_score, f1_score

precision = precision_score(y_true, y_pred_classes, average='weighted')
recall = recall_score(y_true, y_pred_classes, average='weighted')
f1 = f1_score(y_true, y_pred_classes, average='weighted')
try:
    auc = roc_auc_score(tf.keras.utils.to_categorical(y_true), y_pred_probs, multi_class='ovr', average='weighted')
except ValueError:
    auc = None  # If AUC computation fails for single-class datasets

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
if auc is not None:
    print(f"AUC Score: {auc:.4f}")
else:
    print("AUC Score: Not applicable for this dataset")

Precision: 0.9316
Recall: 0.9294
F1 Score: 0.9280
AUC Score: 0.9988
