In [1]:
import os
import cv2
import numpy as np
import pandas as pd
import mahotas as mt

  from pandas.core import (


In [2]:
def extract_features_from_directory(root_dir):
    # Define the column names for the dataframe
    names = ['area', 'perimeter', 'physiological_length', 'physiological_width', 'aspect_ratio', 
             'rectangularity', 'circularity', 'mean_r', 'mean_g', 'mean_b', 'stddev_r', 'stddev_g', 'stddev_b', 
             'contrast', 'correlation', 'inverse_difference_moments', 'entropy', 'leaf_name']
    
    # Create an empty list to store feature vectors
    data = []
    
    # Loop through all subdirectories in the root directory
    for subdir in os.listdir(root_dir):
        subdir_path = os.path.join(root_dir, subdir)
        
        # Skip if it's not a directory
        if not os.path.isdir(subdir_path):
            continue
        
        print("Processing images in:", subdir)
        
        # Loop through all image files in the subdirectory
        for filename in os.listdir(subdir_path):
            if filename.endswith('.jpg') or filename.endswith('.jpeg') or filename.endswith('.png'):
                imgpath = os.path.join(subdir_path, filename)
                main_img = cv2.imread(imgpath)
                
                # Preprocessing
                img = cv2.cvtColor(main_img, cv2.COLOR_BGR2RGB)
                gs = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
                blur = cv2.GaussianBlur(gs, (25, 25), 0)
                ret_otsu, im_bw_otsu = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
                kernel = np.ones((50,50), np.uint8)
                closing = cv2.morphologyEx(im_bw_otsu, cv2.MORPH_CLOSE, kernel)
                
                # Shape features
                contours, _ = cv2.findContours(closing, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
                if len(contours) > 0:
                    cnt = max(contours, key=cv2.contourArea)
                    area = cv2.contourArea(cnt)
                    if area == 0:
                        continue  # Skip contours with zero area
                    perimeter = cv2.arcLength(cnt, True)
                    x, y, w, h = cv2.boundingRect(cnt)
                    aspect_ratio = float(w) / h
                    rectangularity = w * h / area
                    circularity = ((perimeter) ** 2) / area
                
                    # Color features
                    red_channel = img[:, :, 0]
                    green_channel = img[:, :, 1]
                    blue_channel = img[:, :, 2]
                    blue_channel[blue_channel == 255] = 0
                    green_channel[green_channel == 255] = 0
                    red_channel[red_channel == 255] = 0

                    red_mean = np.mean(red_channel)
                    green_mean = np.mean(green_channel)
                    blue_mean = np.mean(blue_channel)

                    red_std = np.std(red_channel)
                    green_std = np.std(green_channel)
                    blue_std = np.std(blue_channel)

                    # Texture features
                    textures = mt.features.haralick(gs)
                    ht_mean = textures.mean(axis=0)
                    contrast = ht_mean[1]
                    correlation = ht_mean[2]
                    inverse_diff_moments = ht_mean[4]
                    entropy = ht_mean[8]

                    # Construct feature vector
                    vector = [area, perimeter, w, h, aspect_ratio, rectangularity, circularity,
                              red_mean, green_mean, blue_mean, red_std, green_std, blue_std,
                              contrast, correlation, inverse_diff_moments, entropy, subdir]

                    # Append the feature vector to the list
                    data.append(vector)
                
    # Create a DataFrame from the list of feature vectors
    df = pd.DataFrame(data, columns=names)
    
    return df

In [3]:
ds_path = "../Medicinal Leaf Dataset/Segmented Medicinal Leaves"

# Extract features from the dataset
dataset = extract_features_from_directory(ds_path)


Processing images in: Alpinia Galanga (Rasna)
Processing images in: Amaranthus Viridis (Arive-Dantu)
Processing images in: Artocarpus Heterophyllus (Jackfruit)
Processing images in: Azadirachta Indica (Neem)
Processing images in: Basella Alba (Basale)
Processing images in: Brassica Juncea (Indian Mustard)
Processing images in: Carissa Carandas (Karanda)
Processing images in: Citrus Limon (Lemon)
Processing images in: Ficus Auriculata (Roxburgh fig)
Processing images in: Ficus Religiosa (Peepal Tree)
Processing images in: Hibiscus Rosa-sinensis
Processing images in: Jasminum (Jasmine)
Processing images in: Mangifera Indica (Mango)
Processing images in: Mentha (Mint)
Processing images in: Moringa Oleifera (Drumstick)
Processing images in: Muntingia Calabura (Jamaica Cherry-Gasagase)
Processing images in: Murraya Koenigii (Curry)
Processing images in: Nerium Oleander (Oleander)
Processing images in: Nyctanthes Arbor-tristis (Parijata)
Processing images in: Ocimum Tenuiflorum (Tulsi)
Proce

In [4]:
dataset.to_csv("../Medicinal_leaf.csv", index=False)