# ðŸŒ¿ Medicinal Plant Identification - Model Training

This notebook provides an interactive environment for training and evaluating the **AI-Based Medicinal Plant Detection System**. 

### Objectives:
1. **Explore** the Indian Medicinal Leaves dataset.
2. **Preprocess** and augment leaf images.
3. **Train** a MobileNetV2 architecture using transfer learning and class weighting.
4. **Evaluate** model performance on imbalanced classes.
5. **Visualize** model interpretations (Explainable AI).

In [None]:
import os
import json
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras import layers, models, applications, callbacks
from sklearn.utils import class_weight
from sklearn.metrics import classification_report, confusion_matrix
from PIL import Image

print(f"TensorFlow Version: {tf.__version__}")
print(f"GPU Available: {len(tf.config.list_physical_devices('GPU')) > 0}")

## 1. Configuration & Data Loading

In [None]:
DATA_DIR = '../dataset/Indian Medicinal Leaves Image Datasets/Medicinal Leaf dataset'
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS = 20

def get_dataset_summary(data_dir):
    classes = sorted(os.listdir(data_dir))
    counts = {cls: len(os.listdir(os.path.join(data_dir, cls))) for cls in classes}
    return counts

summary = get_dataset_summary(DATA_DIR)
print("Class Distribution:")
for cls, count in summary.items():
    print(f"{cls:20}: {count} images")

## 2. Data Generators & Augmentation

In [None]:
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2
)

train_generator = train_datagen.flow_from_directory(
    DATA_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training',
    shuffle=True
)

val_generator = train_datagen.flow_from_directory(
    DATA_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation',
    shuffle=False
)

class_names = list(train_generator.class_indices.keys())
NUM_CLASSES = len(class_names)

## 3. Visualize Samples

In [None]:
def plot_samples(generator, class_names, num_samples=9):
    plt.figure(figsize=(12, 12))
    images, labels = next(generator)
    for i in range(min(num_samples, len(images))):
        plt.subplot(3, 3, i + 1)
        plt.imshow(images[i])
        plt.title(class_names[np.argmax(labels[i])])
        plt.axis('off')
    plt.show()

plot_samples(train_generator, class_names)

## 4. Model Architecture (MobileNetV2)

In [None]:
def build_model(num_classes):
    base_model = applications.MobileNetV2(
        input_shape=(224, 224, 3),
        include_top=False,
        weights='imagenet'
    )
    base_model.trainable = False
    
    model = models.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.Dropout(0.3),
        layers.Dense(256, activation='relu'),
        layers.BatchNormalization(),
        layers.Dense(num_classes, activation='softmax')
    ])
    
    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy', tf.keras.metrics.TopKCategoricalAccuracy(k=3, name='top_3_accuracy')]
    )
    return model, base_model

model, base_model = build_model(NUM_CLASSES)
model.summary()

## 5. Calculate Class Weights

In [None]:
labels = train_generator.classes
class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(labels),
    y=labels
)
class_weights_dict = dict(enumerate(class_weights))
print("Calculated Class Weights:")
print(class_weights_dict)

## 6. Training - Phase 1 (Feature Extraction)

In [None]:
print("Starting Training Phase 1...")
history1 = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=5,
    class_weight=class_weights_dict
)

## 7. Training - Phase 2 (Fine-tuning)

In [None]:
print("Unfreezing base model layers for fine-tuning...")
base_model.trainable = True
for layer in base_model.layers[:100]:
    layer.trainable = False

model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-5),
    loss='categorical_crossentropy',
    metrics=['accuracy', tf.keras.metrics.TopKCategoricalAccuracy(k=3, name='top_3_accuracy')]
)

history2 = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=5,
    class_weight=class_weights_dict
)

## 8. Final Evaluation

In [None]:
def evaluate_performance(model, generator, names):
    Y_pred = model.predict(generator)
    y_pred = np.argmax(Y_pred, axis=1)
    y_true = generator.classes
    
    print("Classification Report:")
    print(classification_report(y_true, y_pred, target_names=names))
    
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(15, 12))
    sns.heatmap(cm, annot=True, fmt='d', xticklabels=names, yticklabels=names, cmap='Blues')
    plt.title('Confusion Matrix')
    plt.ylabel('Actual')
    plt.xlabel('Predicted')
    plt.show()

evaluate_performance(model, val_generator, class_names)

## 9. Save Model

In [None]:
model.save('models/medicinal_plant_model_v2.h5')
print("Model saved to models/medicinal_plant_model_v2.h5")