# Processing Images

In [None]:
import os
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split

# Directory where the dataset is stored
dataset_dir = 'PlantVillage'

# Define image size for resizing
img_size = (128, 128)

# Initialize lists to hold image data and labels
images = []
labels = []

# Loop through each directory (class)
for class_name in os.listdir(dataset_dir):
    class_dir = os.path.join(dataset_dir, class_name)
    if os.path.isdir(class_dir):
        for img_name in os.listdir(class_dir):
            img_path = os.path.join(class_dir, img_name)
            if os.path.isfile(img_path):  # Ensure the path is a file
                try:
                    img = Image.open(img_path).convert('RGB')  # Convert image to RGB
                    img = img.resize(img_size)  # Resize image
                    img_array = np.array(img) / 255.0  # Normalize pixel values
                    images.append(img_array)
                    labels.append(class_name)
                except Exception as e:
                    print(f"Error loading image {img_path}: {e}")

# Convert lists to numpy arrays
images = np.array(images)
labels = np.array(labels)

print(f"Total images: {len(images)}")
print(f"Total labels: {len(labels)}")


In [14]:
from sklearn.preprocessing import LabelEncoder

# Encode labels
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Print class names and their corresponding labels
for class_name, label in zip(label_encoder.classes_, range(len(label_encoder.classes_))):
    print(f"{class_name}: {label}")


Pepper__bell___Bacterial_spot: 0
Pepper__bell___healthy: 1
Potato___Early_blight: 2
Potato___Late_blight: 3
Potato___healthy: 4
Tomato_Bacterial_spot: 5
Tomato_Early_blight: 6
Tomato_Late_blight: 7
Tomato_Leaf_Mold: 8
Tomato_Septoria_leaf_spot: 9
Tomato_Spider_mites_Two_spotted_spider_mite: 10
Tomato__Target_Spot: 11
Tomato__Tomato_YellowLeaf__Curl_Virus: 12
Tomato__Tomato_mosaic_virus: 13
Tomato_healthy: 14


In [15]:
# Split the dataset into training and remaining (validation + test) sets
X_train, X_rem, y_train, y_rem = train_test_split(images, labels_encoded, test_size=0.3, random_state=42)

# Further split the remaining set into validation and test sets
X_val, X_test, y_val, y_test = train_test_split(X_rem, y_rem, test_size=0.5, random_state=42)


In [16]:
print(f"Training set size: {len(X_train)}")
print(f"Validation set size: {len(X_val)}")
print(f"Test set size: {len(X_test)}")

Training set size: 14446
Validation set size: 3096
Test set size: 3096


In [17]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Create ImageDataGenerator for data augmentation
train_datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

val_datagen = ImageDataGenerator()  # No augmentation for validation data

# Create generators
train_generator = train_datagen.flow(X_train, y_train, batch_size=32)
val_generator = val_datagen.flow(X_val, y_val, batch_size=32)


In [18]:
import tensorflow as tf

# Define the CNN model
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(len(label_encoder.classes_), activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Print model summary
model.summary()

# Train the model
history = model.fit(
    train_generator,
    epochs=20,
    validation_data=val_generator
)

# Save the model
model.save('leaf_classification_model.h5')


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20


  self._warn_if_super_not_called()


[1m452/452[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 115ms/step - accuracy: 0.3500 - loss: 2.0071 - val_accuracy: 0.7006 - val_loss: 0.8870
Epoch 2/20
[1m452/452[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 112ms/step - accuracy: 0.7146 - loss: 0.8513 - val_accuracy: 0.8110 - val_loss: 0.5367
Epoch 3/20
[1m452/452[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 112ms/step - accuracy: 0.7968 - loss: 0.5946 - val_accuracy: 0.8563 - val_loss: 0.4224
Epoch 4/20
[1m452/452[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 116ms/step - accuracy: 0.8337 - loss: 0.4819 - val_accuracy: 0.8908 - val_loss: 0.3165
Epoch 5/20
[1m452/452[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 113ms/step - accuracy: 0.8690 - loss: 0.3815 - val_accuracy: 0.9002 - val_loss: 0.2985
Epoch 6/20
[1m452/452[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 109ms/step - accuracy: 0.8898 - loss: 0.3339 - val_accuracy: 0.9086 - val_loss: 0.2615
Epoch 7/20
[1m452/45



In [20]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy}")

# Get predictions
y_pred_probs = model.predict(X_test)
y_pred = y_pred_probs.argmax(axis=1)

# Calculate additional metrics
test_precision = precision_score(y_test, y_pred, average='weighted')
test_recall = recall_score(y_test, y_pred, average='weighted')
test_f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Test Precision: {test_precision}")
print(f"Test Recall: {test_recall}")
print(f"Test F1-Score: {test_f1}")

[1m97/97[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 20ms/step - accuracy: 0.9285 - loss: 0.2194
Test Accuracy: 0.9250646233558655
[1m97/97[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 20ms/step
Test Precision: 0.9316222443292221
Test Recall: 0.9250645994832042
Test F1-Score: 0.9231219580438275
