In [10]:
import cv2
import numpy as np
from skimage.feature import hog
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score
from imblearn.over_sampling import SMOTE
from tqdm import tqdm
import os

# Directories
train_dir = "./train"
test_dir = "./test"

# Function to extract color histogram (HSV-based)
def extract_color_histogram_hsv(image, bins=(8, 8, 8)):
    hsv_image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
    hist = cv2.calcHist([hsv_image], [0, 1, 2], None, bins, [0, 180, 0, 256, 0, 256])
    cv2.normalize(hist, hist)
    return hist.flatten()

# Function to preprocess image with color-based feature extraction
def preprocess_image_with_color(img_path, target_size=(64, 64)):
    # Read image and resize
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert to RGB
    img_resized = cv2.resize(img, target_size)
    
    # Extract color histogram (HSV)
    color_hist = extract_color_histogram_hsv(img_resized)
    
    # HOG features for texture (optional, add more features)
    gray_resized = cv2.cvtColor(img_resized, cv2.COLOR_RGB2GRAY)
    hog_features, _ = hog(
        gray_resized,
        orientations=9,
        pixels_per_cell=(8, 8),
        cells_per_block=(2, 2),
        block_norm="L2-Hys",
        visualize=True
    )
    
    # Combine color histogram and HOG features
    combined_features = np.hstack([color_hist, hog_features])
    return combined_features

# Load dataset with feature extraction
def load_dataset(directory):
    data = []
    labels = []
    classes = os.listdir(directory)
    for label in classes:
        class_dir = os.path.join(directory, label)
        if os.path.isdir(class_dir):
            for img_file in tqdm(os.listdir(class_dir), desc=f"Loading {label}"):
                img_path = os.path.join(class_dir, img_file)
                data.append(preprocess_image_with_color(img_path))
                labels.append(label)
    return np.array(data), np.array(labels)

# Load training and test datasets
print("Loading training data...")
X_train, y_train = load_dataset(train_dir)

print("Loading test data...")
X_test, y_test = load_dataset(test_dir)

# Handle class imbalance using SMOTE
smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)

# Perform PCA for dimensionality reduction
pca = PCA(n_components=50)  # You can experiment with n_components
X_train_res_pca = pca.fit_transform(X_train_res)
X_test_pca = pca.transform(X_test)

# Initialize KNN classifier
knn = KNeighborsClassifier(n_neighbors=5, weights='distance', metric='euclidean')

# Train the KNN model
knn.fit(X_train_res_pca, y_train_res)

# Predict on the test set
y_pred = knn.predict(X_test_pca)

# Evaluate the model
print("Classification Report:")
print(classification_report(y_test, y_pred))

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")


Loading training data...


Loading apple:   0%|          | 0/69 [00:00<?, ?it/s]

Loading apple: 100%|██████████| 69/69 [00:05<00:00, 12.61it/s]
Loading banana: 100%|██████████| 75/75 [00:04<00:00, 16.55it/s]
Loading beetroot: 100%|██████████| 88/88 [00:04<00:00, 20.94it/s]
Loading bell pepper: 100%|██████████| 89/89 [00:05<00:00, 14.95it/s]
Loading cabbage: 100%|██████████| 93/93 [00:04<00:00, 19.93it/s]
Loading capsicum: 100%|██████████| 89/89 [00:03<00:00, 23.26it/s]
Loading carrot: 100%|██████████| 82/82 [00:03<00:00, 26.69it/s]
Loading cauliflower: 100%|██████████| 79/79 [00:03<00:00, 21.64it/s]
Loading chilli pepper: 100%|██████████| 87/87 [00:03<00:00, 26.97it/s]
Loading corn: 100%|██████████| 87/87 [00:05<00:00, 14.89it/s]
Loading cucumber: 100%|██████████| 94/94 [00:07<00:00, 13.16it/s]
Loading eggplant: 100%|██████████| 84/84 [00:05<00:00, 14.17it/s]
Loading garlic: 100%|██████████| 92/92 [00:05<00:00, 18.15it/s]
Loading ginger: 100%|██████████| 68/68 [00:03<00:00, 20.42it/s]
Loading grapes: 100%|██████████| 100/100 [00:07<00:00, 12.66it/s]
Loading jalepen

Loading test data...


Loading apple: 100%|██████████| 10/10 [00:01<00:00,  9.18it/s]
Loading banana: 100%|██████████| 9/9 [00:00<00:00, 28.92it/s]
Loading beetroot: 100%|██████████| 10/10 [00:00<00:00, 13.86it/s]
Loading bell pepper: 100%|██████████| 10/10 [00:02<00:00,  4.78it/s]
Loading cabbage: 100%|██████████| 11/11 [00:00<00:00, 12.31it/s]
Loading capsicum: 100%|██████████| 10/10 [00:00<00:00, 10.90it/s]
Loading carrot: 100%|██████████| 10/10 [00:00<00:00, 21.57it/s]
Loading cauliflower: 100%|██████████| 10/10 [00:00<00:00, 12.77it/s]
Loading chilli pepper: 100%|██████████| 10/10 [00:00<00:00, 17.24it/s]
Loading corn: 100%|██████████| 10/10 [00:00<00:00, 10.01it/s]
Loading cucumber: 100%|██████████| 10/10 [00:01<00:00,  5.53it/s]
Loading eggplant: 100%|██████████| 10/10 [00:00<00:00, 12.05it/s]
Loading garlic: 100%|██████████| 10/10 [00:00<00:00, 14.05it/s]
Loading ginger: 100%|██████████| 10/10 [00:00<00:00, 12.15it/s]
Loading grapes: 100%|██████████| 10/10 [00:00<00:00, 19.57it/s]
Loading jalepeno: 1

Classification Report:
               precision    recall  f1-score   support

        apple       1.00      0.80      0.89        10
       banana       1.00      0.78      0.88         9
     beetroot       1.00      1.00      1.00        10
  bell pepper       1.00      1.00      1.00        10
      cabbage       0.92      1.00      0.96        11
     capsicum       0.91      1.00      0.95        10
       carrot       1.00      1.00      1.00        10
  cauliflower       0.91      1.00      0.95        10
chilli pepper       0.90      0.90      0.90        10
         corn       0.83      1.00      0.91        10
     cucumber       1.00      1.00      1.00        10
     eggplant       1.00      1.00      1.00        10
       garlic       1.00      1.00      1.00        10
       ginger       0.91      1.00      0.95        10
       grapes       0.91      1.00      0.95        10
     jalepeno       1.00      1.00      1.00        10
         kiwi       1.00      1.00      1