In [94]:
import os
import cv2
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE


In [95]:
def extract_enhanced_features(directory, bins=32):
    features = []
    labels = []
    classes = os.listdir(directory)
    class_map = {cls: idx for idx, cls in enumerate(classes)}

    for cls in classes:
        class_dir = os.path.join(directory, cls)
        for img_name in os.listdir(class_dir):
            img_path = os.path.join(class_dir, img_name)
            img = cv2.imread(img_path)
            img = cv2.resize(img, (224, 224))
            
            # Color histograms
            hist_r = cv2.calcHist([img], [0], None, [bins], [0, 256]).flatten()
            hist_g = cv2.calcHist([img], [1], None, [bins], [0, 256]).flatten()
            hist_b = cv2.calcHist([img], [2], None, [bins], [0, 256]).flatten()
            
            # Edge detection
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            edges = cv2.Canny(gray, 100, 200).flatten()
            
            # Combine features
            feature_vector = np.hstack([hist_r, hist_g, hist_b, edges])
            features.append(feature_vector)
            labels.append(class_map[cls])

    return np.array(features), np.array(labels)


In [96]:
# Paths to dataset directories
train_dir = 'papaya_dataset/train'
val_dir = 'papaya_dataset/val'

# Extract features and labels
X_train, y_train = extract_enhanced_features(train_dir)
X_val, y_val = extract_enhanced_features(val_dir)

# Split training set for testing
X_train_split, X_test_split, y_train_split, y_test_split = train_test_split(
    X_train, y_train, test_size=0.2, random_state=42
)


In [97]:
# Balance data with SMOTE
smote = SMOTE(random_state=42)
X_train_balanced, y_train_balanced = smote.fit_resample(X_train_split, y_train_split)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_balanced)
X_test_scaled = scaler.transform(X_test_split)
X_val_scaled = scaler.transform(X_val)


In [98]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

# Update model training
nb_model.fit(X_train_scaled, y_train)


In [99]:
# Test set predictions
y_test_pred = nb_model.predict(X_test_scaled)
test_accuracy = accuracy_score(y_test_split, y_test_pred)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")
print("Test Classification Report:")
print(classification_report(y_test_split, y_test_pred))

# Validation set predictions
y_val_pred = nb_model.predict(X_val_scaled)
val_accuracy = accuracy_score(y_val, y_val_pred)
print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")
print("Validation Classification Report:")
print(classification_report(y_val, y_val_pred))


Test Accuracy: 35.14%
Test Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        24
           1       0.35      1.00      0.52        13

    accuracy                           0.35        37
   macro avg       0.18      0.50      0.26        37
weighted avg       0.12      0.35      0.18        37

Validation Accuracy: 92.31%
Validation Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.88      0.93        16
           1       0.83      1.00      0.91        10

    accuracy                           0.92        26
   macro avg       0.92      0.94      0.92        26
weighted avg       0.94      0.92      0.92        26



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [100]:
def predict_label(image_path, model, scaler, bins=32):
    """
    Predict the label of an image using a trained Naive Bayes model.
    
    Parameters:
        image_path (str): Path to the input image.
        model (GaussianNB): Trained Naive Bayes model.
        scaler (StandardScaler): Scaler used for feature normalization.
        bins (int): Number of bins for histogram feature extraction.
    
    Returns:
        str: Predicted label ('Mite' or 'Mealy Bug').
    """
    # Load the image
    img = cv2.imread(image_path)
    if img is None:
        raise FileNotFoundError(f"Image not found at {image_path}")
    
    # Resize the image
    img = cv2.resize(img, (224, 224))
    
    # Extract features (color histograms and edges)
    # Color histograms
    hist_r = cv2.calcHist([img], [0], None, [bins], [0, 256]).flatten()
    hist_g = cv2.calcHist([img], [1], None, [bins], [0, 256]).flatten()
    hist_b = cv2.calcHist([img], [2], None, [bins], [0, 256]).flatten()
    
    # Edge detection
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 100, 200).flatten()
    
    # Combine features
    feature_vector = np.hstack([hist_r, hist_g, hist_b, edges])
    
    # Normalize features
    feature_vector_scaled = scaler.transform([feature_vector])
    
    # Predict label
    predicted_class = model.predict(feature_vector_scaled)[0]
    
    # Map class index to label
    class_map = {0: 'Mite', 1: 'Mealy Bug'}
    predicted_label = class_map[predicted_class]
    
    return predicted_label


In [101]:
import os

# Path to the test image
image_path = os.path.join('papaya_dataset', 'test', 'Mite_Bug', '20240717_132340.jpg')

# Predict the label
predicted_label = predict_label(
    image_path=image_path,
    model=nb_model,  # Trained Naive Bayes model
    scaler=scaler    # Scaler used during training
)

print(f"Predicted Label: {predicted_label}")


Predicted Label: Mealy Bug
