In [6]:
import cv2
import numpy as np
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [10]:
def extract_histogram_features(directory, bins=32):
    """
    Extract pixel intensity histograms from images.

    Args:
        directory (str): Path to the dataset directory.
        bins (int): Number of bins for the histogram.

    Returns:
        features (ndarray): Feature matrix.
        labels (ndarray): Corresponding labels.
    """
    features = []
    labels = []
    classes = os.listdir(directory)
    class_map = {cls: idx for idx, cls in enumerate(classes)}  # Map class names to numerical labels

    for cls in classes:
        class_dir = os.path.join(directory, cls)
        for img_name in os.listdir(class_dir):
            img_path = os.path.join(class_dir, img_name)
            # Load and resize the image
            img = load_img(img_path, target_size=(224, 224))
            img_array = img_to_array(img).astype('uint8')
            
            # Compute histograms for each channel
            hist_r = cv2.calcHist([img_array], [0], None, [bins], [0, 256]).flatten()
            hist_g = cv2.calcHist([img_array], [1], None, [bins], [0, 256]).flatten()
            hist_b = cv2.calcHist([img_array], [2], None, [bins], [0, 256]).flatten()
            
            # Concatenate histograms to form the feature vector
            feature_vector = np.hstack([hist_r, hist_g, hist_b])
            features.append(feature_vector)
            labels.append(class_map[cls])
    
    return np.array(features), np.array(labels)


In [11]:
# Paths to dataset directories
train_dir = 'papaya_dataset/train'
val_dir = 'papaya_dataset/val'

# Extract features and labels
X_train, y_train = extract_histogram_features(train_dir)
X_val, y_val = extract_histogram_features(val_dir)


In [12]:
X_train_split, X_test_split, y_train_split, y_test_split = train_test_split(
    X_train, y_train, test_size=0.2, random_state=42
)


In [13]:
# Initialize and train the model
nb_model = GaussianNB()
nb_model.fit(X_train_split, y_train_split)


In [14]:
# Predict on the test set
y_pred = nb_model.predict(X_test_split)

# Calculate accuracy
accuracy = accuracy_score(y_test_split, y_pred)
print(f"Accuracy on Test Set: {accuracy * 100:.2f}%")

# Generate classification report
print("Classification Report:")
print(classification_report(y_test_split, y_pred))


Accuracy on Test Set: 97.30%
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.96      0.98        24
           1       0.93      1.00      0.96        13

    accuracy                           0.97        37
   macro avg       0.96      0.98      0.97        37
weighted avg       0.97      0.97      0.97        37



In [16]:
# Predict on the validation set
y_val_pred = nb_model.predict(X_val)

# Calculate validation accuracy
val_accuracy = accuracy_score(y_val, y_val_pred)
print(f"Accuracy on Validation Set: {val_accuracy * 100:.2f}%")


Accuracy on Validation Set: 92.31%


In [17]:
def predict_image_label(image_path, model, bins=32):
    """
    Predict if the given image is infected with Mite or Mealy Bug.

    Args:
        image_path (str): Path to the image file.
        model (GaussianNB): Trained Naive Bayes model.
        bins (int): Number of bins for histogram features.

    Returns:
        str: Predicted label ("Mite" or "Mealy Bug").
    """
    # Load and preprocess the image
    img = load_img(image_path, target_size=(224, 224))
    img_array = img_to_array(img).astype('uint8')
    
    # Calculate histograms for each color channel
    hist_r = cv2.calcHist([img_array], [0], None, [bins], [0, 256]).flatten()
    hist_g = cv2.calcHist([img_array], [1], None, [bins], [0, 256]).flatten()
    hist_b = cv2.calcHist([img_array], [2], None, [bins], [0, 256]).flatten()
    
    # Concatenate histograms to form a feature vector
    feature_vector = np.hstack([hist_r, hist_g, hist_b]).reshape(1, -1)
    
    # Predict using the trained model
    predicted_label_idx = model.predict(feature_vector)[0]
    
    # Map numerical labels back to class names
    class_map = {0: "Mite", 1: "Mealy Bug"}  # Adjust based on your dataset
    predicted_label = class_map[predicted_label_idx]
    
    return predicted_label


In [19]:
import os

image_path = os.path.join('papaya_dataset', 'test', 'Mite_Bug', '20240717_132415.jpg')

# Predict the label for the given image
predicted_label = predict_image_label(image_path, nb_model)
print(f"The predicted label for the image is: {predicted_label}")


The predicted label for the image is: Mealy Bug
