In [1]:
import os
import shutil
from pathlib import Path
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split



In [2]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Flatten, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam


In [13]:
# Input paths (Kaggle-specific)
leaf_path = Path(r"S:\archive\Indian Medicinal Leaves Image Datasets\Medicinal Leaf dataset")
plant_path = Path(r"S:\archive\Indian Medicinal Leaves Image Datasets\Medicinal plant dataset")



In [14]:

# Output folders
base_dir = Path("/kaggle/working/combined_dataset")
train_dir = base_dir / "train"
val_dir = base_dir / "val"
test_dir = base_dir / "test"






In [15]:
for d in [train_dir, val_dir]:
    d.mkdir(parents=True, exist_ok=True)



In [16]:
def prepare_dataset(source_dir, label_prefix):
    classes = sorted([cls for cls in os.listdir(source_dir) if os.path.isdir(source_dir / cls)])
    for cls in tqdm(classes, desc=f"Processing {label_prefix}"):
        img_dir = source_dir / cls
        images = list(img_dir.glob("*.*"))
        label = f"{label_prefix}_{cls}"
        
        train_imgs, temp_imgs = train_test_split(images, test_size=0.3, random_state=42)
        val_imgs, test_imgs = train_test_split(temp_imgs, test_size=0.5, random_state=42)

        for subset, imgs in zip([train_dir, val_dir, test_dir], [train_imgs, val_imgs, test_imgs]):
            cls_dir = subset / label
            cls_dir.mkdir(parents=True, exist_ok=True)
            for img in imgs:
                shutil.copy(img, cls_dir / img.name)


In [17]:
# Combine both datasets
prepare_dataset(leaf_path, "leaf")
prepare_dataset(plant_path, "plant")


Processing leaf: 100%|██████████| 80/80 [02:12<00:00,  1.65s/it]
Processing plant: 100%|██████████| 40/40 [01:31<00:00,  2.29s/it]


In [18]:
# Parameters
img_size = (224, 224)
batch_size = 32



In [19]:
# Data generators
train_gen = ImageDataGenerator(rescale=1./255)
val_gen = ImageDataGenerator(rescale=1./255)
test_gen = ImageDataGenerator(rescale=1./255)



In [20]:
train_data = train_gen.flow_from_directory(train_dir, target_size=img_size, batch_size=batch_size, class_mode='categorical')
val_data = val_gen.flow_from_directory(val_dir, target_size=img_size, batch_size=batch_size, class_mode='categorical')
test_data = test_gen.flow_from_directory(test_dir, target_size=img_size, batch_size=batch_size, class_mode='categorical', shuffle=False)



Found 8947 images belonging to 120 classes.
Found 1921 images belonging to 120 classes.
Found 1977 images belonging to 120 classes.


In [21]:
# VGG16 Transfer Learning
base_model = VGG16(include_top=False, input_shape=(224, 224, 3), weights='imagenet')
for layer in base_model.layers:
    layer.trainable = False



In [22]:
x = Flatten()(base_model.output)
x = Dense(256, activation='relu')(x)
x = Dropout(0.4)(x)
output = Dense(train_data.num_classes, activation='softmax')(x)



In [23]:
model = Model(inputs=base_model.input, outputs=output)
model.compile(optimizer=Adam(1e-4), loss='categorical_crossentropy', metrics=['accuracy'])



In [24]:
# Train
history = model.fit(train_data, validation_data=val_data, epochs=10)



  self._warn_if_super_not_called()


Epoch 1/10
[1m175/280[0m [32m━━━━━━━━━━━━[0m[37m━━━━━━━━[0m [1m10:38[0m 6s/step - accuracy: 0.0356 - loss: 4.7051

KeyboardInterrupt: 

In [None]:

# Save
model.save("/kaggle/working/medicinal_model.h5")


In [None]:
# Evaluate on test set
loss, acc = model.evaluate(test_data)
print(f"Test Accuracy: {acc:.2f}")



In [None]:
# View some predictions
import matplotlib.pyplot as plt

x_test, y_test = next(test_data)
predictions = model.predict(x_test)
pred_classes = np.argmax(predictions, axis=1)
true_classes = np.argmax(y_test, axis=1)
class_labels = list(test_data.class_indices.keys())

plt.figure(figsize=(12, 8))
for i in range(6):
    plt.subplot(2, 3, i+1)
    plt.imshow(x_test[i])
    plt.title(f"True: {class_labels[true_classes[i]]}\nPred: {class_labels[pred_classes[i]]}")
    plt.axis('off')
plt.tight_layout()
plt.show()


In [None]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.TPUStrategy(tpu)
    print("Using TPU")
except:
    strategy = tf.distribute.get_strategy()
    print("Using GPU/CPU")

with strategy.scope():
    # define model here
    ...
