In [1]:
import numpy as np
import os
import pathlib
import cv2
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from keras.applications.mobilenet_v2 import MobileNetV2
from keras.layers import GlobalAveragePooling2D
from keras.models import Sequential

In [3]:
# Define constants
IMAGE_SHAPE = (200, 200)  # Resize all images to this shape for MobileNetV2
BATCH_SIZE = 100
DATA_DIR = 'F:/indhu/Datasets/Agricultural_crops_30/'  # Update with your dataset path

In [4]:
# Load data
data_dir = pathlib.Path(DATA_DIR)
breeds = os.listdir(DATA_DIR)
dogs_images_dict = {}
dogs_labels_dict = {}
X, y = [], []
for index, category in enumerate(breeds):
    dogs_images_dict[category] = list(data_dir.glob(category + '/*'))
    dogs_labels_dict[category] = index

for breed_name, images in dogs_images_dict.items():
    for image in images:
        img = cv2.imread(str(image))
        resized_img = cv2.resize(img, IMAGE_SHAPE)
        X.append(resized_img)
        y.append(dogs_labels_dict[breed_name])

X = np.array(X) / 255.0  # Normalize images
y = np.array(y)


In [5]:
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Create MobileNetV2 feature extractor
base_model_mobilenet = MobileNetV2(input_shape=IMAGE_SHAPE + (3,), include_top=False, weights='imagenet')
base_model_mobilenet.trainable = False  # Freeze the convolutional base

feature_extractor_mobilenet = Sequential([
    base_model_mobilenet,
    GlobalAveragePooling2D()
])

  base_model_mobilenet = MobileNetV2(input_shape=IMAGE_SHAPE + (3,), include_top=False, weights='imagenet')


In [7]:
# Extract features using MobileNetV2
X_train_features = feature_extractor_mobilenet.predict(X_train, batch_size=BATCH_SIZE, verbose=1)
X_test_features = feature_extractor_mobilenet.predict(X_test, batch_size=BATCH_SIZE, verbose=1)

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 843ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 466ms/step


In [8]:
# Initialize classifiers
classifiers = {
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=200, random_state=42),
    "K-Nearest Neighbors": KNeighborsClassifier(n_neighbors=5),
    "Support Vector Machine": SVC(kernel='rbf', probability=True, random_state=42),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Naive Bayes": GaussianNB()
}

In [9]:
# Evaluate classifiers using test set
for model_name, model in classifiers.items():
    print(f"Evaluating {model_name}...")
    model.fit(X_train_features, y_train)
    y_pred = model.predict(X_test_features)
    print(f"{model_name} Accuracy: {accuracy_score(y_test, y_pred):.4f}")
    print(classification_report(y_test, y_pred))


Evaluating Random Forest...
Random Forest Accuracy: 0.5000
              precision    recall  f1-score   support

           0       1.00      0.25      0.40         4
           1       0.25      0.29      0.27         7
           2       1.00      0.33      0.50         6
           3       0.57      0.44      0.50         9
           4       0.40      0.50      0.44         4
           5       0.40      0.67      0.50         3
           6       1.00      1.00      1.00         2
           7       0.33      0.14      0.20         7
           8       0.33      0.60      0.43         5
           9       0.50      0.43      0.46         7
          10       0.50      0.25      0.33         4
          11       0.00      0.00      0.00         3
          12       0.56      1.00      0.71         5
          13       1.00      0.71      0.83         7
          14       0.29      0.50      0.36         4
          15       0.40      0.22      0.29         9
          16       0.6

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Logistic Regression Accuracy: 0.6867
              precision    recall  f1-score   support

           0       1.00      0.50      0.67         4
           1       0.50      0.43      0.46         7
           2       0.67      0.67      0.67         6
           3       0.83      0.56      0.67         9
           4       1.00      1.00      1.00         4
           5       1.00      1.00      1.00         3
           6       1.00      1.00      1.00         2
           7       0.43      0.43      0.43         7
           8       0.67      0.80      0.73         5
           9       0.60      0.86      0.71         7
          10       0.67      1.00      0.80         4
          11       0.25      0.33      0.29         3
          12       0.71      1.00      0.83         5
          13       0.67      0.57      0.62         7
          14       0.50      0.75      0.60         4
          15       0.60      0.67      0.63         9
          16       0.88      0.88      0.88 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Support Vector Machine Accuracy: 0.6928
              precision    recall  f1-score   support

           0       1.00      0.50      0.67         4
           1       0.60      0.43      0.50         7
           2       0.80      0.67      0.73         6
           3       0.75      0.67      0.71         9
           4       1.00      1.00      1.00         4
           5       1.00      1.00      1.00         3
           6       1.00      0.50      0.67         2
           7       0.50      0.29      0.36         7
           8       0.57      0.80      0.67         5
           9       0.60      0.86      0.71         7
          10       0.67      1.00      0.80         4
          11       0.33      0.67      0.44         3
          12       0.71      1.00      0.83         5
          13       0.80      0.57      0.67         7
          14       0.60      0.75      0.67         4
          15       0.78      0.78      0.78         9
          16       0.88      0.88      0.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [11]:
# Evaluate classifiers using k-fold cross-validation
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
for clf_name, clf in classifiers.items():
    print(f"K-Fold Cross Validation for {clf_name}...")
    precision_scores = []
    recall_scores = []
    f1_scores = []
    accuracy_scores = []

    for train_index, val_index in kf.split(X_train_features, y_train):
        X_train_fold, X_val_fold = X_train_features[train_index], X_train_features[val_index]
        y_train_fold, y_val_fold = y_train[train_index], y_train[val_index]

        clf.fit(X_train_fold, y_train_fold)
        y_val_pred = clf.predict(X_val_fold)

        accuracy_scores.append(accuracy_score(y_val_fold, y_val_pred))
        precision_scores.append(precision_score(y_val_fold, y_val_pred, average='weighted'))
        recall_scores.append(recall_score(y_val_fold, y_val_pred, average='weighted'))
        f1_scores.append(f1_score(y_val_fold, y_val_pred, average='weighted'))

    print(f"Average Accuracy: {np.mean(accuracy_scores):.4f}")
    print(f"Average Precision: {np.mean(precision_scores):.4f}")
    print(f"Average Recall: {np.mean(recall_scores):.4f}")
    print(f"Average F1 Score: {np.mean(f1_scores):.4f}\n")


K-Fold Cross Validation for Random Forest...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Average Accuracy: 0.5385
Average Precision: 0.5445
Average Recall: 0.5385
Average F1 Score: 0.5144

K-Fold Cross Validation for Logistic Regression...
Average Accuracy: 0.7029
Average Precision: 0.7344
Average Recall: 0.7029
Average F1 Score: 0.6979

K-Fold Cross Validation for K-Nearest Neighbors...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Average Accuracy: 0.5445
Average Precision: 0.6061
Average Recall: 0.5445
Average F1 Score: 0.5396

K-Fold Cross Validation for Support Vector Machine...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Average Accuracy: 0.6743
Average Precision: 0.7116
Average Recall: 0.6743
Average F1 Score: 0.6666

K-Fold Cross Validation for Decision Tree...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Average Accuracy: 0.1991
Average Precision: 0.2310
Average Recall: 0.1991
Average F1 Score: 0.2003

K-Fold Cross Validation for Naive Bayes...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Average Accuracy: 0.4707
Average Precision: 0.5736
Average Recall: 0.4707
Average F1 Score: 0.4630



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
