## Importing Libraries

In [1]:
import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
import cv2

In [2]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


## Load Dataset

In [3]:
# Define paths
dataset_path = r'D:\Projects\ML-projects\Medicinal Plants Classification\Dataset\data'
train_path = r'D:\Projects\ML-projects\Medicinal Plants Classification\Dataset\train'
val_path = r'D:\Projects\ML-projects\Medicinal Plants Classification\Dataset\val'
test_path = r'D:\Projects\ML-projects\Medicinal Plants Classification\Dataset\test'


In [28]:
# Print class names and their corresponding indices
class_indices = train_generator.class_indices
class_names = {v: k for k, v in class_indices.items()}  # Reverse the dictionary to map indices to class names

print("Class Names:")
for class_index, class_name in class_names.items():
    print(f"Class {class_index}: {class_name}")

Class Names:
Class 0: Arive-Dantu
Class 1: Basale
Class 2: Betel
Class 3: Curry
Class 4: Drumstick
Class 5: Fenugreek
Class 6: Guava
Class 7: Hibiscus
Class 8: Indian_Mustard
Class 9: Jackfruit
Class 10: Jamun
Class 11: Karanda
Class 12: Lemon
Class 13: Mango
Class 14: Mint
Class 15: Neem
Class 16: Parijata
Class 17: Peepal
Class 18: Rasna
Class 19: Tulsi


In [4]:
# import os
# import shutil

# # Define paths
# dataset_path = r'D:\Projects\ML-projects\Medicinal Plants Classification\Dataset\data'
# train_path = r'D:\Projects\ML-projects\Medicinal Plants Classification\Dataset\train'
# val_path = r'D:\Projects\ML-projects\Medicinal Plants Classification\Dataset\val'
# test_path = r'D:\Projects\ML-projects\Medicinal Plants Classification\Dataset\test'

# # Create directories if they don't exist
# os.makedirs(train_path, exist_ok=True)
# os.makedirs(val_path, exist_ok=True)
# os.makedirs(test_path, exist_ok=True)

# # Split ratio
# train_ratio = 0.7
# val_ratio = 0.15
# test_ratio = 0.15

# # Loop through each plant folder
# for plant_folder in os.listdir(dataset_path):
#     plant_folder_path = os.path.join(dataset_path, plant_folder)
#     if os.path.isdir(plant_folder_path):
#         # Get all images in the folder
#         images = os.listdir(plant_folder_path)
#         images = [img for img in images if img.endswith(('.jpg', '.jpeg', '.png'))]  # Filter images

#         # Split images into train, val, and test
#         train_images, test_images = train_test_split(images, test_size=1 - train_ratio, random_state=42)
#         val_images, test_images = train_test_split(test_images, test_size=test_ratio/(test_ratio + val_ratio), random_state=42)

#         # Create subfolders for each plant in train, val, and test
#         os.makedirs(os.path.join(train_path, plant_folder), exist_ok=True)
#         os.makedirs(os.path.join(val_path, plant_folder), exist_ok=True)
#         os.makedirs(os.path.join(test_path, plant_folder), exist_ok=True)

#         # Copy images to respective folders
#         for img in train_images:
#             shutil.copy(os.path.join(plant_folder_path, img), os.path.join(train_path, plant_folder, img))
#         for img in val_images:
#             shutil.copy(os.path.join(plant_folder_path, img), os.path.join(val_path, plant_folder, img))
#         for img in test_images:
#             shutil.copy(os.path.join(plant_folder_path, img), os.path.join(test_path, plant_folder, img))

## Define Data Generators:

In [5]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [6]:
val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

In [7]:
train_generator = train_datagen.flow_from_directory(
    train_path,
    target_size=(224, 224),
    batch_size=16,
    class_mode='categorical'
)

val_generator = val_datagen.flow_from_directory(
    val_path,
    target_size=(224, 224),
    batch_size=16,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    test_path,
    target_size=(224, 224),
    batch_size=16,
    class_mode='categorical',
    shuffle=False
)

Found 6560 images belonging to 20 classes.
Found 1411 images belonging to 20 classes.
Found 1429 images belonging to 20 classes.


## Load a Pre-trained Model

In [8]:
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.models import Model

In [9]:
from tensorflow.keras.applications import VGG16
from keras.preprocessing.image import ImageDataGenerator
import numpy as np
import lightgbm as lgb
from sklearn.metrics import accuracy_score

# Load VGG16 without the top layers
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))


In [10]:

# Freeze the base model layers
for layer in base_model.layers:
    layer.trainable = False

In [11]:


# Define a function to extract features using VGG16
def extract_features(generator, num_samples):
    features = np.zeros(shape=(num_samples, 7, 7, 512))
    labels = np.zeros(shape=(num_samples, 20))  # Assuming 20 classes
    i = 0
    for inputs_batch, labels_batch in generator:
        features_batch = base_model.predict(inputs_batch)
        features[i * generator.batch_size: (i + 1) * generator.batch_size] = features_batch
        labels[i * generator.batch_size: (i + 1) * generator.batch_size] = labels_batch
        i += 1
        if i * generator.batch_size >= num_samples:
            break
    return features, labels

In [12]:
# Extract features for training, validation, and test sets with reduced batch size
# train_generator.batch_size = 8
# val_generator.batch_size = 8
# test_generator.batch_size = 8

train_features, train_labels = extract_features(train_generator, train_generator.samples)
val_features, val_labels = extract_features(val_generator, val_generator.samples)
test_features, test_labels = extract_features(test_generator, test_generator.samples)



In [13]:

# Flatten the features
train_features = np.reshape(train_features, (train_generator.samples, 7 * 7 * 512))
val_features = np.reshape(val_features, (val_generator.samples, 7 * 7 * 512))
test_features = np.reshape(test_features, (test_generator.samples, 7 * 7 * 512))


In [14]:

# Convert labels to single integer
train_labels = np.argmax(train_labels, axis=1)
val_labels = np.argmax(val_labels, axis=1)
test_labels = np.argmax(test_labels, axis=1)


In [15]:

# Train LightGBM model
lgb_train = lgb.Dataset(train_features, train_labels)
lgb_val = lgb.Dataset(val_features, val_labels, reference=lgb_train)

In [16]:


params = {
    'objective': 'multiclass',
    'num_class': 20,
    'metric': 'multi_logloss',
    'boosting_type': 'gbdt',
    'num_leaves': 31,
    'learning_rate': 0.05,
    'feature_fraction': 0.9
}
evals_result = {}  # Dictionary to store metrics

model = lgb.train(
    params,
    lgb_train,
    num_boost_round=100,
    valid_sets=[lgb_train, lgb_val],
    valid_names=['train', 'val'],
    callbacks=[
        lgb.early_stopping(stopping_rounds=10),
        lgb.record_evaluation(evals_result)  # <-- Critical addition
    ]
)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 3.919807 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3729148
[LightGBM] [Info] Number of data points in the train set: 6560, number of used features: 21345
[LightGBM] [Info] Start training from score -2.933674
[LightGBM] [Info] Start training from score -2.933674
[LightGBM] [Info] Start training from score -2.974615
[LightGBM] [Info] Start training from score -2.933674
[LightGBM] [Info] Start training from score -2.933674
[LightGBM] [Info] Start training from score -3.263293
[LightGBM] [Info] Start training from score -2.933674
[LightGBM] [Info] Start training from score -3.084963
[LightGBM] [Info] Start training from score -3.320686
[LightGBM] [Info] Start training from score -2.933674
[LightGBM] [Info] Start training from score -3.182944
[LightGBM] [Info] Start training from score -2.933674
[LightGBM] [Info] Start training from score -2.933674
[Lig

In [17]:
# Predict on test set
test_preds = model.predict(test_features)
test_preds = np.argmax(test_preds, axis=1)

In [18]:
# Calculate accuracy
test_accuracy = accuracy_score(test_labels, test_preds)
print(f"Test Accuracy: {test_accuracy}")

Test Accuracy: 0.9629111266620014


In [None]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

# Assuming you have:
# y_true = true labels (shape: [n_samples])
# y_pred = predicted labels (shape: [n_samples])
# y_probs = predicted probabilities (shape: [n_samples, n_classes])

def calculate_metrics(y_true, y_pred, y_probs, class_names):
    """
    Calculate comprehensive classification metrics and plots
    
    Args:
        y_true: True labels (1D array)
        y_pred: Predicted labels (1D array)
        y_probs: Predicted probabilities (2D array)
        class_names: List of class names
    
    Returns:
        Dictionary containing all metrics
    """
    metrics = {}
    
    # 1. Basic Metrics
    metrics['accuracy'] = accuracy_score(y_true, y_pred)
    metrics['precision'] = precision_score(y_true, y_pred, average='weighted')
    metrics['recall'] = recall_score(y_true, y_pred, average='weighted')
    metrics['f1'] = f1_score(y_true, y_pred, average='weighted')
    
    # 2. Class-wise Metrics
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred, target_names=class_names))
    
    # 3. Confusion Matrix
    cm = confusion_matrix(y_true, y_pred)
    metrics['confusion_matrix'] = cm
    
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=class_names, 
                yticklabels=class_names)
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.savefig('confusion_matrix.png', bbox_inches='tight')
    plt.close()
    
    # 4. Top-k Accuracy (Optional)
    if len(np.unique(y_true)) > 2:  # Only for multi-class
        top2_correct = 0
        for i, true_class in enumerate(y_true):
            top2_preds = np.argsort(y_probs[i])[-2:]
            if true_class in top2_preds:
                top2_correct += 1
        metrics['top2_accuracy'] = top2_correct / len(y_true)
    
    return metrics

# Example Usage:
# Assuming you have:
# - test_labels: True labels for test set
# - predictions: Model predictions (class indices)
# - probabilities: Prediction probabilities from LightGBM
# - class_names: List like ['Basale', 'Curry', 'Mint', ...]

# metrics = calculate_metrics(test_labels, predictions, probabilities, class_names)
# print(f"Overall Accuracy: {metrics['accuracy']:.2%}")
# print(f"Weighted F1-Score: {metrics['f1']:.4f}")

In [4]:
# 2. Class-wise Metrics
print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=class_names))


Classification Report:
              precision    recall  f1-score   support

      Basale       1.00      1.00      1.00         2
       Curry       0.50      0.50      0.50         2
        Mint       0.50      0.50      0.50         2

    accuracy                           0.67         6
   macro avg       0.67      0.67      0.67         6
weighted avg       0.67      0.67      0.67         6



In [None]:
# Convert integer labels back to original class names
def convert_labels_to_class_names(labels, class_names):
    return [class_names[label] for label in labels]

train_class_names = convert_labels_to_class_names(train_labels, class_names)
val_class_names = convert_labels_to_class_names(val_labels, class_names)
test_class_names = convert_labels_to_class_names(test_labels, class_names)

# Print a few examples
print("Train Class Names:", train_class_names[:10])
print("Validation Class Names:", val_class_names[:10])
print("Test Class Names:", test_class_names[:10])

Train Class Names: ['Neem', 'Jackfruit', 'Peepal', 'Curry', 'Hibiscus', 'Mint', 'Peepal', 'Karanda', 'Neem', 'Basale']
Validation Class Names: ['Basale', 'Mango', 'Betel', 'Guava', 'Mango', 'Drumstick', 'Mint', 'Hibiscus', 'Hibiscus', 'Arive-Dantu']
Test Class Names: ['Arive-Dantu', 'Arive-Dantu', 'Arive-Dantu', 'Arive-Dantu', 'Arive-Dantu', 'Arive-Dantu', 'Arive-Dantu', 'Arive-Dantu', 'Arive-Dantu', 'Arive-Dantu']


In [32]:
from sklearn.metrics import classification_report

# Define the function to convert integer labels to class names
def convert_labels_to_class_names(labels, class_names):
	return [class_names[label] for label in labels]

# Ensure class_names contains the correct number of class names
class_names = list(class_indices.keys())  # Use the keys from class_indices as class names

# Convert integer labels to class names
y_true_names = convert_labels_to_class_names(test_labels, class_names)
y_pred_names = convert_labels_to_class_names(test_preds, class_names)

# Generate the classification report
report = classification_report(y_true_names, y_pred_names, target_names=class_names)

# Print the classification report
print("Classification Report:")
print(report)

Classification Report:
                precision    recall  f1-score   support

   Arive-Dantu       0.95      0.99      0.97        76
        Basale       1.00      1.00      1.00        76
         Betel       0.97      1.00      0.99        73
         Curry       0.97      0.93      0.95        76
     Drumstick       0.93      0.92      0.93        76
     Fenugreek       0.96      0.91      0.93        55
         Guava       0.97      1.00      0.99        76
      Hibiscus       1.00      0.91      0.95        65
Indian_Mustard       1.00      1.00      1.00        52
     Jackfruit       0.87      0.99      0.93        76
         Jamun       0.95      0.90      0.92        59
       Karanda       0.90      0.95      0.92        76
         Lemon       0.99      0.87      0.92        76
         Mango       0.99      0.99      0.99        76
          Mint       0.95      0.97      0.96        76
          Neem       0.96      1.00      0.98        76
      Parijata       1.0

In [25]:
from tensorflow.keras.preprocessing import image
import numpy as np

# Load and preprocess the image
img_path = r'D:\Projects\ML-projects\Medicinal Plants Classification\testing\__0_2979262.png'  # Replace with the path to your image
img = image.load_img(img_path, target_size=(224, 224))
img_array = image.img_to_array(img)
img_array = np.expand_dims(img_array, axis=0)
img_array /= 255.0  # Rescale the image

# Extract features using the base model
features = base_model.predict(img_array)

# Flatten the features
features = np.reshape(features, (1, 7 * 7 * 512))

# Predict the class using the LightGBM model
preds = model.predict(features)
pred_class = np.argmax(preds, axis=1)

print(f"Predicted class: {pred_class[0]}")

Predicted class: 14


In [26]:
# Create a dictionary that maps class names to numerical labels
class_indices = train_generator.class_indices
class_dict = {v: k for k, v in class_indices.items()}

print(class_dict)

{0: 'Arive-Dantu', 1: 'Basale', 2: 'Betel', 3: 'Curry', 4: 'Drumstick', 5: 'Fenugreek', 6: 'Guava', 7: 'Hibiscus', 8: 'Indian_Mustard', 9: 'Jackfruit', 10: 'Jamun', 11: 'Karanda', 12: 'Lemon', 13: 'Mango', 14: 'Mint', 15: 'Neem', 16: 'Parijata', 17: 'Peepal', 18: 'Rasna', 19: 'Tulsi'}


In [27]:
# Save LightGBM model (binary format)
model.save_model('final_model/lightgbm_model.txt')

# Alternative: Save as pickle (for Python-only use)
import pickle
with open('final_model/lightgbm_model.pkl', 'wb') as f:
    pickle.dump(model, f)

In [None]:
from tensorflow.keras.models import save_model

# Save entire feature extraction model (architecture + weights)
save_model(base_model, 'final_model/vgg16_feature_extractor.h5')

# Alternative: Save weights only
base_model.save_weights('final_model/vgg16_weights.h5')

Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x000001AE00CD1EE0>>
Traceback (most recent call last):
  File "c:\Users\mdsah\anaconda3\envs\tf\lib\site-packages\ipykernel\ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(
KeyboardInterrupt: 


In [None]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

# Assuming `train_labels` contains the class labels
classes = np.unique(train_labels)
class_weights = compute_class_weight(class_weight='balanced', classes=classes, y=train_labels)
class_weights = dict(zip(classes, class_weights))


In [None]:
import json
from sklearn.preprocessing import LabelEncoder

# Define and fit the label encoder
le = LabelEncoder()
le.fit(train_labels)

with open('class_info.json', 'w') as f:
    json.dump({
        'class_names': [int(k) for k in class_weights.keys()],
        'class_weights': [float(v) for v in class_weights.values()],
        'label_encoder': [str(cls) for cls in le.classes_]  # Ensure 'le' is the label encoder used
    }, f)

In [None]:
import lightgbm as lgb
from tensorflow.keras.models import load_model
base_model.compile()  # Compile the model manually to resolve the warning

# Load LightGBM model
model = lgb.Booster(model_file='final_model/lightgbm_model.txt')

# Load VGG16 feature extractor
base_model = load_model('final_model/vgg16_feature_extractor.h5')

