In [1]:
# !pip install tensorflow
# !pip install matplotlib
# !pip install scikit-learn
# !pip install opencv-python-headless

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import os
import shutil
import numpy as np
import matplotlib.pyplot as plt
import cv2
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing.image import img_to_array, load_img

In [None]:
# Function to split data into training, validation, and test sets
def split_data(source_dir, train_dir, val_dir, test_dir, split_size=0.15):
    for class_folder in os.listdir(source_dir):
        class_source_dir = os.path.join(source_dir, class_folder)
        if not os.path.isdir(class_source_dir):
            continue

        print(f'Processing class folder: {class_folder}')
        train_class_dir = os.path.join(train_dir, class_folder)
        val_class_dir = os.path.join(val_dir, class_folder)
        test_class_dir = os.path.join(test_dir, class_folder)

        os.makedirs(train_class_dir, exist_ok=True)
        os.makedirs(val_class_dir, exist_ok=True)
        os.makedirs(test_class_dir, exist_ok=True)

        file_list = [f for f in os.listdir(class_source_dir) if os.path.isfile(os.path.join(class_source_dir, f))]
        print(f'Found {len(file_list)} files in {class_folder}')
        if len(file_list) == 0:
            print(f'No files found in {class_folder}, skipping.')
            continue

        train_files, test_files = train_test_split(file_list, test_size=split_size, random_state=42)
        val_files, test_files = train_test_split(test_files, test_size=0.5, random_state=42)

        for file in train_files:
            shutil.copy(os.path.join(class_source_dir, file), os.path.join(train_class_dir, file))
        for file in val_files:
            shutil.copy(os.path.join(class_source_dir, file), os.path.join(val_class_dir, file))
        for file in test_files:
            shutil.copy(os.path.join(class_source_dir, file), os.path.join(test_class_dir, file))

# Define dataset paths
base_dir = '/content/drive/My Drive/Dataset/Tomato/'
train_dir = os.path.join(base_dir, 'train')
val_dir = os.path.join(base_dir, 'val')
test_dir = os.path.join(base_dir, 'test')

# Splitting the dataset
split_data(base_dir, train_dir, val_dir, test_dir)

Processing class folder: Tomato___healthy
Found 800 files in Tomato___healthy


In [None]:
# base_dir = '/content/drive/My Drive/Crop disease detection Dataset/'
# train_dir = os.path.join(base_dir, 'train')
# val_dir = os.path.join(base_dir, 'val')
# test_dir = os.path.join(base_dir, 'test')

In [None]:
# Image Data Generators with Augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

# Batch size
batch_size = 32

# Generate batches of data
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=batch_size,
    class_mode='categorical'
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(150, 150),
    batch_size=batch_size,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(150, 150),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

# Number of classes
num_classes = len(os.listdir(train_dir))
print(f"Number of classes: {num_classes}")

In [None]:
# Feature Extraction Model using VGG16
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(150, 150, 3))
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

base_model.trainable = False

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

In [None]:
epochs = 40

history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    epochs=epochs,
    validation_data=val_generator,
    validation_steps=val_generator.samples // batch_size
)

In [None]:
# Save the model
model.save('/content/drive/My Drive/Crop_disease_model.h5')
print("Model saved successfully")

In [None]:
from keras.models import load_model # Import the load_model function

# Load the model and store in a new variable
loaded_model = load_model('/content/drive/My Drive/Crop_disease_model.h5')
print("Model loaded successfully")

In [None]:
# Evaluate the model
test_loss, test_acc = model.evaluate(test_generator, steps=test_generator.samples // batch_size)
print('Test accuracy:', test_acc)

# Plot training & validation accuracy values
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

In [None]:


def predict_image(model, img_path, class_labels):
    # Extract actual class from image path
    actual_class = os.path.basename(os.path.dirname(img_path)).replace('_', ' ')

    # Load and display the image
    img = mpimg.imread(img_path)
    plt.imshow(img)
    plt.axis('off')
    plt.show()

    # Preprocess the image for prediction
    img = tf.keras.preprocessing.image.load_img(img_path, target_size=(150, 150))
    img_array = tf.keras.preprocessing.image.img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)

    # Make prediction
    prediction = model.predict(img_array)
    predicted_class_index = np.argmax(prediction)

    # Display prediction result
    predicted_class_label = class_labels[predicted_class_index]
    predicted_class_probability = np.max(prediction)

    print('Predicted Class:', predicted_class_label)
    print('Predicted Class Probability:', round(predicted_class_probability * 100, 2), '%')
    print('Actual Class:', actual_class, '\n')
    print('Prediction Array:', prediction)
    print('Predicted Class Index:', predicted_class_index)
    print('Image Path:', img_path)

# Automatically define class labels from directories
def get_class_labels(base_dir):
    return sorted(os.listdir(base_dir))

# Define dataset paths
base_dir = '/content/drive/My Drive/Dataset/Tomato/test'

# Get class labels
class_labels = get_class_labels(base_dir)
print(f"Class Labels: {class_labels}")

# Example usage
image_paths = [
    '/content/drive/My Drive/Dataset/Tomato/test/Tomato___Bacterial_spot/image (262).JPG',
    '/content/drive/My Drive/Dataset/Tomato/test/Tomato___healthy/image (74).JPG',
    '/content/drive/My Drive/Dataset/Tomato/test/Tomato___Early_blight/image (324).JPG',
    '/content/drive/My Drive/Dataset/Tomato/test/Tomato___Late_blight/image (205).JPG',
]

for image_path in image_paths:
    predict_image(model, image_path, class_labels)

In [None]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(test_generator, verbose=2)

# Print the test accuracy
print(f'Test accuracy: {test_accuracy * 100:.2f}%')


In [None]:
# Confusion matrix and classification report
print("Confusion Matrix")
print(confusion_matrix(y_true, y_pred_classes))

print("Classification Report")
print(classification_report(y_true, y_pred_classes, target_names=list(test_generator.class_indices.keys())))