In [2]:
import numpy as np
import os
import pathlib
import cv2
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from keras.applications.mobilenet_v2 import MobileNetV2
from keras.layers import GlobalAveragePooling2D
from keras.models import Sequential

In [4]:
# Define constants
IMAGE_SHAPE = (200, 200)  # Resize all images to this shape for MobileNetV2
BATCH_SIZE = 100
DATA_DIR = 'G:/Datasets/Medicinal Leaf dataset_30/'  # Update with your dataset path

In [8]:
# Load data
data_dir = pathlib.Path(DATA_DIR)
categories = os.listdir(DATA_DIR)
mleaf_images_dict = {}
mleaf_labels_dict = {}
X, y = [], []
for index, category in enumerate(categories):
   mleaf_images_dict[category] = list(data_dir.glob(category + '/*'))
   mleaf_labels_dict[category] = index

for category_name, images in mleaf_images_dict.items():
    for image in images:
        img = cv2.imread(str(image))
        resized_img = cv2.resize(img, IMAGE_SHAPE)
        X.append(resized_img)
        y.append(mleaf_labels_dict[category_name])

X = np.array(X) / 255.0  # Normalize images
y = np.array(y)

In [12]:
# Count images per class
class_counts = {category: len(images) for category, images in mleaf_images_dict.items()}

# Print counts for each class
for category, count in class_counts.items():
    print(f"Class '{category}': {count} images")

Class 'Aloevera': 118 images
Class 'Amla': 67 images
Class 'Amruthaballi': 91 images
Class 'Arali': 89 images
Class 'ashoka': 81 images
Class 'Astma_weed': 82 images
Class 'Badipala': 76 images
Class 'Balloon_Vine': 61 images
Class 'Bamboo': 118 images
Class 'Beans': 97 images
Class 'Betel': 114 images
Class 'Bhrami': 104 images
Class 'Bringaraja': 73 images
Class 'camphor': 66 images
Class 'Caricature': 76 images
Class 'Castor': 129 images
Class 'Catharanthus': 134 images
Class 'Chakte': 68 images
Class 'Chilly': 69 images
Class 'Citron lime (herelikai)': 99 images
Class 'Coffee': 83 images
Class 'Common rue(naagdalli)': 67 images
Class 'Coriender': 115 images
Class 'Curry': 168 images
Class 'Doddpathre': 142 images
Class 'Drumstick': 56 images
Class 'Ekka': 81 images
Class 'Eucalyptus': 80 images
Class 'Ganigale': 75 images
Class 'Ganike': 63 images
Class 'Gasagase': 79 images
Class 'Ginger': 82 images


In [14]:
print(f"Loaded {len(X)} images with shape {X.shape} and {len(y)} labels.")
print(f"X shape: {X.shape}, y shape: {y.shape}")

Loaded 2903 images with shape (2903, 200, 200, 3) and 2903 labels.
X shape: (2903, 200, 200, 3), y shape: (2903,)


In [16]:
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [18]:
# Calculate the percentages
train_percentage = len(X_train) / len(X) * 100
test_percentage = len(X_test) / len(X) * 100

# Print the results
print(f"Training data: {len(X_train)} {X_train.shape} samples ({train_percentage:.2f}%)")
print(f"Testing data: {len(X_test)} {X_test.shape} samples ({test_percentage:.2f}%)")

Training data: 2322 (2322, 200, 200, 3) samples (79.99%)
Testing data: 581 (581, 200, 200, 3) samples (20.01%)


In [20]:
base_model_mobilenet = MobileNetV2(input_shape=IMAGE_SHAPE + (3,), include_top=False, weights='imagenet')
base_model_mobilenet.trainable = False  # Freeze the convolutional base

feature_extractor_mobilenet = Sequential([
    base_model_mobilenet,
    GlobalAveragePooling2D()
])

  base_model_mobilenet = MobileNetV2(input_shape=IMAGE_SHAPE + (3,), include_top=False, weights='imagenet')


In [22]:
# Extract features using MobileNetV2
import time
start_trainfeatures_time = time.time()
X_train_features = feature_extractor_mobilenet.predict(X_train, batch_size=BATCH_SIZE, verbose=1)  
end_trainfeatures_time = time.time()
start_testfeatures_time = time.time()
X_test_features = feature_extractor_mobilenet.predict(X_test, batch_size=BATCH_SIZE, verbose=1) 
end_testfeatures_time = time.time()

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 2s/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 2s/step


In [24]:
# Calculate time taken for training and testing
train_time = end_trainfeatures_time - start_trainfeatures_time
test_time = end_testfeatures_time - start_testfeatures_time
print(f"Training time: {train_time} seconds")
print(f"Testing time: {test_time} seconds")

Training time: 83.82622647285461 seconds
Testing time: 12.70110011100769 seconds


In [26]:
# Initialize classifiers
classifiers = {
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=200, random_state=42),
    "K-Nearest Neighbors": KNeighborsClassifier(n_neighbors=5),
    "Support Vector Machine": SVC(kernel='rbf', probability=True, random_state=42),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Naive Bayes": GaussianNB()
}

In [28]:
# Evaluate classifiers using test set
for model_name, model in classifiers.items():
    print(f"Evaluating {model_name}...")
    start_train_time = time.time()
    model.fit(X_train_features, y_train)
    end_train_time = time.time()
    start_test_time = time.time() 
    y_pred = model.predict(X_test_features)
    end_test_time = time.time()
    print(f"{model_name} Accuracy: {accuracy_score(y_test, y_pred):.4f}")
    print(classification_report(y_test, y_pred))

Evaluating Random Forest...
Random Forest Accuracy: 0.8055
              precision    recall  f1-score   support

           0       0.80      0.91      0.85        22
           1       0.88      0.54      0.67        13
           2       0.86      0.86      0.86        22
           3       0.94      1.00      0.97        16
           4       0.90      0.83      0.86        23
           5       0.82      0.67      0.74        21
           6       0.79      0.79      0.79        19
           7       1.00      0.56      0.71         9
           8       0.65      0.77      0.71        22
           9       0.91      0.87      0.89        23
          10       0.74      1.00      0.85        17
          11       1.00      0.86      0.93        22
          12       0.80      0.73      0.76        11
          13       1.00      0.88      0.94        17
          14       0.94      1.00      0.97        16
          15       0.85      1.00      0.92        28
          16       0.6

[WinError 2] The system cannot find the file specified
  File "C:\Users\Indhu\anaconda3\Lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
               ^^^^^^^^^^^^^^^
  File "C:\Users\Indhu\anaconda3\Lib\subprocess.py", line 548, in run
    with Popen(*popenargs, **kwargs) as process:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Indhu\anaconda3\Lib\subprocess.py", line 1026, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "C:\Users\Indhu\anaconda3\Lib\subprocess.py", line 1538, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


K-Nearest Neighbors Accuracy: 0.8451
              precision    recall  f1-score   support

           0       1.00      0.82      0.90        22
           1       0.62      0.77      0.69        13
           2       0.95      0.82      0.88        22
           3       1.00      1.00      1.00        16
           4       0.81      0.91      0.86        23
           5       0.70      0.76      0.73        21
           6       0.84      0.84      0.84        19
           7       0.80      0.89      0.84         9
           8       0.86      0.86      0.86        22
           9       0.92      1.00      0.96        23
          10       0.94      1.00      0.97        17
          11       0.82      0.82      0.82        22
          12       0.50      0.82      0.62        11
          13       0.94      1.00      0.97        17
          14       0.94      1.00      0.97        16
          15       1.00      0.96      0.98        28
          16       0.57      0.91      0.70 

In [30]:
# Calculate time taken for training and testing
train_time = end_train_time - start_train_time
test_time = end_test_time - start_test_time
print(f"Training time: {train_time} seconds")
print(f"Testing time: {test_time} seconds")

Training time: 0.04194474220275879 seconds
Testing time: 0.422954797744751 seconds


In [32]:
# Evaluate classifiers using k-fold cross-validation
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
for clf_name, clf in classifiers.items():
    print(f"K-Fold Cross Validation for {clf_name}...")
    precision_scores = []
    recall_scores = []
    f1_scores = []
    accuracy_scores = []

    for train_index, val_index in kf.split(X_train_features, y_train):
        X_train_fold, X_val_fold = X_train_features[train_index], X_train_features[val_index]
        y_train_fold, y_val_fold = y_train[train_index], y_train[val_index]

        clf.fit(X_train_fold, y_train_fold)
        y_val_pred = clf.predict(X_val_fold)

        accuracy_scores.append(accuracy_score(y_val_fold, y_val_pred))
        precision_scores.append(precision_score(y_val_fold, y_val_pred, average='weighted'))
        recall_scores.append(recall_score(y_val_fold, y_val_pred, average='weighted'))
        f1_scores.append(f1_score(y_val_fold, y_val_pred, average='weighted'))

    print(f"Average Accuracy: {np.mean(accuracy_scores):.4f}")
    print(f"Average Precision: {np.mean(precision_scores):.4f}")
    print(f"Average Recall: {np.mean(recall_scores):.4f}")
    print(f"Average F1 Score: {np.mean(f1_scores):.4f}\n")

K-Fold Cross Validation for Random Forest...
Average Accuracy: 0.7705
Average Precision: 0.7844
Average Recall: 0.7705
Average F1 Score: 0.7631

K-Fold Cross Validation for Logistic Regression...
Average Accuracy: 0.9031
Average Precision: 0.9100
Average Recall: 0.9031
Average F1 Score: 0.9024

K-Fold Cross Validation for K-Nearest Neighbors...
Average Accuracy: 0.8157
Average Precision: 0.8307
Average Recall: 0.8157
Average F1 Score: 0.8123

K-Fold Cross Validation for Support Vector Machine...
Average Accuracy: 0.8893
Average Precision: 0.8993
Average Recall: 0.8893
Average F1 Score: 0.8877

K-Fold Cross Validation for Decision Tree...
Average Accuracy: 0.3532
Average Precision: 0.3608
Average Recall: 0.3532
Average F1 Score: 0.3493

K-Fold Cross Validation for Naive Bayes...
Average Accuracy: 0.7446
Average Precision: 0.7674
Average Recall: 0.7446
Average F1 Score: 0.7436

