In [28]:
import os
import cv2
import numpy as np
import pandas as pd
from skimage.feature import graycomatrix, graycoprops
from skimage import morphology
from rembg import remove  # Correct import for rembg
from PIL import Image
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
from skimage.morphology import skeletonize
from skimage.util import img_as_ubyte
import rembg

In [29]:
# Directories and extensions
input_dir = r"D:\MED_LEAF_ID-1\dataset\Medicinal Leaf dataset"
output_dir = r"D:\MED_LEAF_ID\data\glcm"
os.makedirs(output_dir, exist_ok=True)


In [30]:
allowed_extensions = {".jpg", ".jpeg", ".png", ".bmp"}
distances = [1, 2, 3, 4, 5]  # Example distances
angles = [0, np.pi / 4, np.pi / 2, 3 * np.pi / 4]  # Example angles in radians

In [31]:
def preprocess_image(image):
    # Convert to RGB if not already
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Use rembg to remove background
    input_data = cv2.imencode('.png', image)[1].tobytes()
    output_data = rembg.remove(input_data)
    image_no_bg = cv2.imdecode(np.frombuffer(output_data, np.uint8), cv2.IMREAD_UNCHANGED)
    
    # Apply CLAHE (Contrast Limited Adaptive Histogram Equalization)
    lab = cv2.cvtColor(image_no_bg, cv2.COLOR_BGR2Lab)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
    cl = clahe.apply(l)
    limg = cv2.merge((cl, a, b))
    processed_img = cv2.cvtColor(limg, cv2.COLOR_Lab2BGR)
    
    # Apply Edge Detection (Canny)
    processed_img = cv2.Canny(processed_img, 100, 200)
    
    return processed_img, None

In [36]:
def extract_glcm_features(image, distances, angles):
    features = []
    
    # Check if the image is already in grayscale (single channel)
    if len(image.shape) == 3:  # Color image (3 channels)
        gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    else:
        gray_image = image  # If already grayscale, no need to convert
    
    for d in distances:
        for angle in angles:
            glcm = graycomatrix(gray_image, distances=[d], angles=[angle], symmetric=True, normed=True)
            contrast = graycoprops(glcm, 'contrast')[0, 0]
            energy = graycoprops(glcm, 'energy')[0, 0]
            homogeneity = graycoprops(glcm, 'homogeneity')[0, 0]
            correlation = graycoprops(glcm, 'correlation')[0, 0]
            dissimilarity = graycoprops(glcm, 'dissimilarity')[0, 0]
            asm = graycoprops(glcm, 'ASM')[0, 0]
            entropy = graycoprops(glcm, 'entropy')[0, 0]
            features.extend([contrast, energy, homogeneity, correlation, dissimilarity, asm, entropy])
    
    return features


In [38]:
def skeletonize_image(image):
    # Check if the image is already grayscale (single channel)
    if len(image.shape) == 3:  # Color image (3 channels)
        gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    else:
        gray_image = image  # If already grayscale, no need to convert

    _, thresh = cv2.threshold(gray_image, 127, 255, cv2.THRESH_BINARY)
    skeleton = skeletonize(thresh // 255).astype(np.uint8) * 255
    return skeleton


In [34]:
def augmentations(image):
    transform = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(30),
        transforms.RandomResizedCrop(224),
        transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5)
    ])
    augmented_image = transform(image)
    return augmented_image


In [39]:
features_list = []
for root, _, files in os.walk(input_dir):
    for filename in files:
        if os.path.splitext(filename)[1].lower() in allowed_extensions:
            img_path = os.path.join(root, filename)

            # Read Image
            image = cv2.imread(img_path)

            if image is None:
                print(f"❌ Skipping unreadable image: {img_path}")
                continue

            # Apply Preprocessing with rembg and CLAHE/Blur/Edge Detection
            processed_img, mask = preprocess_image(image)

            # Convert to PIL for augmentation
            processed_pil = Image.fromarray(processed_img)

            # Apply Augmentation using Torchvision
            augmented_pil = augmentations(processed_pil)

            # Convert back to NumPy
            augmented_np = np.array(augmented_pil)

            # Extract GLCM features
            glcm_features = extract_glcm_features(augmented_np, distances, angles)

            # Perform Skeletonization
            skeleton_image = skeletonize_image(augmented_np)

            # Extract class name from the folder structure (parent directory)
            class_name = os.path.basename(root)

            # Add the class name, image name, GLCM features, and skeleton to the list
            features_list.append([class_name, filename] + glcm_features)

            # Generate output path preserving folder structure
            relative_path = os.path.relpath(img_path, input_dir)
            save_path = os.path.join(output_dir, relative_path)

            # Ensure subdirectory exists
            os.makedirs(os.path.dirname(save_path), exist_ok=True)

            # Save Processed and Augmented Images
            augmented_pil.save(save_path)

            # Save Skeleton Image
            skeleton_pil = Image.fromarray(skeleton_image)
            skeleton_path = save_path.replace(os.path.splitext(save_path)[1], '_skeleton.png')
            skeleton_pil.save(skeleton_path)

            print(f"✅ Saved: {save_path}, Skeleton: {skeleton_path}")

✅ Saved: D:\MED_LEAF_ID\data\glcm\Aloevera\10.jpg, Skeleton: D:\MED_LEAF_ID\data\glcm\Aloevera\10_skeleton.png
✅ Saved: D:\MED_LEAF_ID\data\glcm\Aloevera\100.jpg, Skeleton: D:\MED_LEAF_ID\data\glcm\Aloevera\100_skeleton.png
✅ Saved: D:\MED_LEAF_ID\data\glcm\Aloevera\102.jpg, Skeleton: D:\MED_LEAF_ID\data\glcm\Aloevera\102_skeleton.png
✅ Saved: D:\MED_LEAF_ID\data\glcm\Aloevera\104.jpg, Skeleton: D:\MED_LEAF_ID\data\glcm\Aloevera\104_skeleton.png
✅ Saved: D:\MED_LEAF_ID\data\glcm\Aloevera\106.jpg, Skeleton: D:\MED_LEAF_ID\data\glcm\Aloevera\106_skeleton.png
✅ Saved: D:\MED_LEAF_ID\data\glcm\Aloevera\108.jpg, Skeleton: D:\MED_LEAF_ID\data\glcm\Aloevera\108_skeleton.png
✅ Saved: D:\MED_LEAF_ID\data\glcm\Aloevera\110.jpg, Skeleton: D:\MED_LEAF_ID\data\glcm\Aloevera\110_skeleton.png
✅ Saved: D:\MED_LEAF_ID\data\glcm\Aloevera\112.jpg, Skeleton: D:\MED_LEAF_ID\data\glcm\Aloevera\112_skeleton.png
✅ Saved: D:\MED_LEAF_ID\data\glcm\Aloevera\114.jpg, Skeleton: D:\MED_LEAF_ID\data\glcm\Aloevera\11

In [40]:
columns = ['Class_Name', 'Image_Name']
for d in distances:
    for angle in angles:
        columns += [
            f'contrast_d{d}_a{angle}',
            f'energy_d{d}_a{angle}',
            f'homogeneity_d{d}_a{angle}',
            f'correlation_d{d}_a{angle}',
            f'dissimilarity_d{d}_a{angle}',
            f'asm_d{d}_a{angle}',
            f'entropy_d{d}_a{angle}'
        ]

In [41]:
df = pd.DataFrame(features_list, columns=columns)
output_dir = r'D:\MED_LEAF_ID\data'
os.makedirs(output_dir, exist_ok=True)

# Save the DataFrame to a CSV file
output_file = os.path.join(output_dir, 'glcm_features.csv')
df.to_csv(output_file, index=False)

print(f"Features extracted and saved to {output_file}")

Features extracted and saved to D:\MED_LEAF_ID\data\glcm_features.csv
