In [4]:
import os
import cv2
import numpy as np
import pandas as pd
import skimage.feature as feature
from PIL import Image
from rembg import remove
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA
from imblearn.over_sampling import SMOTE, ADASYN
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
import xgboost as xgb
from sklearn.ensemble import VotingClassifier

In [5]:
input_dir = r"D:\MED_LEAF_ID-1\dataset\Medicinal Leaf dataset"
output_dir = r"D:/MED_LEAF_ID-1/preprocessed_glcm"
os.makedirs(output_dir, exist_ok=True)

# Allowed image extensions
allowed_extensions = {".jpg", ".jpeg", ".png", ".bmp"}

# GLCM Feature Extraction Parameters
distances = [1, 2, 3, 4, 5]
angles = [0, np.pi/4, np.pi/2, 3*np.pi/4]

In [6]:
def preprocess_for_glcm(image):
    """
    Advanced preprocessing optimized for GLCM feature extraction
    
    Best Practices:
    1. Background Removal
    2. Noise Reduction
    3. Contrast Enhancement
    4. Adaptive Thresholding
    5. Texture Preservation
    """
    # Convert to PIL for background removal
    pil_image = Image.fromarray(image)
    
    # Remove background using rembg
    output_pil = remove(pil_image)
    output_np = np.array(output_pil)
    
    # Handle RGBA to RGB conversion
    if output_np.shape[2] == 4:
        output_np = cv2.cvtColor(output_np, cv2.COLOR_BGRA2BGR)
    
    # Convert to Grayscale
    gray = cv2.cvtColor(output_np, cv2.COLOR_BGR2GRAY)
    
    # Advanced Noise Reduction
    # 1. Non-local means denoising
    denoised = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)
    
    # 2. Adaptive Histogram Equalization for contrast enhancement
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    enhanced = clahe.apply(denoised)
    
    # 3. Adaptive Thresholding
    # Prevents over/under-exposure issues
    thresh = cv2.adaptiveThreshold(
        enhanced, 255, 
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
        cv2.THRESH_BINARY, 11, 2
    )
    
    # 4. Morphological operations for noise cleanup
    kernel = np.ones((3, 3), np.uint8)
    cleaned = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
    
    # 5. Edge Enhancement for Texture Preservation
    # Gentle edge enhancement to preserve texture details
    edges = cv2.Canny(cleaned, 50, 150)
    enhanced_texture = cv2.addWeighted(cleaned, 0.8, edges, 0.2, 0)
    
    return enhanced_texture

In [7]:
def extract_glcm_features(image):
    """
    Extract comprehensive GLCM features for texture analysis
    """
    # Ensure image is 2D and in uint8 format
    if len(image.shape) > 2:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = image.astype(np.uint8)
    
    # Normalize the image
    image = cv2.normalize(image, None, 0, 255, cv2.NORM_MINMAX)
    
    # Store feature extraction results
    features_list = []
    
    for d in distances:
        for angle in angles:
            # Compute GLCM matrix
            glcm = feature.graycomatrix(
                image, 
                distances=[d], 
                angles=[angle], 
                symmetric=True, 
                normed=True
            )
            
            # Extract statistical features
            contrast = feature.graycoprops(glcm, 'contrast')[0, 0]
            energy = feature.graycoprops(glcm, 'energy')[0, 0]
            homogeneity = feature.graycoprops(glcm, 'homogeneity')[0, 0]
            correlation = feature.graycoprops(glcm, 'correlation')[0, 0]
            dissimilarity = feature.graycoprops(glcm, 'dissimilarity')[0, 0]
            asm = feature.graycoprops(glcm, 'ASM')[0, 0]
            
            # Calculate entropy
            entropy = -np.sum(glcm * np.log2(glcm + np.finfo(float).eps))
            
            features_list.extend([
                contrast, energy, homogeneity, 
                correlation, dissimilarity, asm, entropy
            ])
    
    return features_list

In [8]:
def process_images():
    """Process images for comprehensive GLCM feature extraction"""
    features_list = []
    
    for root, _, files in os.walk(input_dir):
        for filename in files:
            if os.path.splitext(filename)[1].lower() in allowed_extensions:
                img_path = os.path.join(root, filename)
                
                # Read image
                image = cv2.imread(img_path)
                
                if image is None:
                    print(f"❌ Skipping unreadable image: {img_path}")
                    continue
                
                # Preprocess for GLCM
                preprocessed_img = preprocess_for_glcm(image)
                
                # Extract class name
                class_name = os.path.basename(os.path.dirname(img_path))
                
                # Extract GLCM features
                glcm_features = extract_glcm_features(preprocessed_img)
                
                # Add class name and filename to features
                features_list.append([class_name, filename] + glcm_features)
                
                # Optional: Save preprocessed image
                relative_path = os.path.relpath(img_path, input_dir)
                save_path = os.path.join(output_dir, relative_path)
                os.makedirs(os.path.dirname(save_path), exist_ok=True)
                cv2.imwrite(save_path, preprocessed_img)
                
                print(f"✅ Processed: {img_path}")
    
    # Prepare DataFrame columns
    columns = ['Class_Name', 'Image_Name']
    for d in distances:
        for angle in angles:
            columns += [
                f'contrast_d{d}_a{angle}',
                f'energy_d{d}_a{angle}',
                f'homogeneity_d{d}_a{angle}',
                f'correlation_d{d}_a{angle}',
                f'dissimilarity_d{d}_a{angle}',
                f'asm_d{d}_a{angle}',
                f'entropy_d{d}_a{angle}'
            ]
    
    # Create DataFrame and save to CSV
    df = pd.DataFrame(features_list, columns=columns)
    output_file = os.path.join(output_dir, 'glcm_features.csv')
    df.to_csv(output_file, index=False)
    
    print(f"🔥 GLCM Feature Extraction Completed!")
    print(f"Features saved to {output_file}")


In [None]:
if __name__ == "__main__":
    process_images()

✅ Processed: D:\MED_LEAF_ID-1\dataset\Medicinal Leaf dataset\Aloevera\10.jpg
✅ Processed: D:\MED_LEAF_ID-1\dataset\Medicinal Leaf dataset\Aloevera\100.jpg
✅ Processed: D:\MED_LEAF_ID-1\dataset\Medicinal Leaf dataset\Aloevera\102.jpg
✅ Processed: D:\MED_LEAF_ID-1\dataset\Medicinal Leaf dataset\Aloevera\104.jpg
✅ Processed: D:\MED_LEAF_ID-1\dataset\Medicinal Leaf dataset\Aloevera\106.jpg
✅ Processed: D:\MED_LEAF_ID-1\dataset\Medicinal Leaf dataset\Aloevera\108.jpg
✅ Processed: D:\MED_LEAF_ID-1\dataset\Medicinal Leaf dataset\Aloevera\110.jpg
✅ Processed: D:\MED_LEAF_ID-1\dataset\Medicinal Leaf dataset\Aloevera\112.jpg
✅ Processed: D:\MED_LEAF_ID-1\dataset\Medicinal Leaf dataset\Aloevera\114.jpg
✅ Processed: D:\MED_LEAF_ID-1\dataset\Medicinal Leaf dataset\Aloevera\116.jpg
✅ Processed: D:\MED_LEAF_ID-1\dataset\Medicinal Leaf dataset\Aloevera\118.jpg
✅ Processed: D:\MED_LEAF_ID-1\dataset\Medicinal Leaf dataset\Aloevera\12.jpg
✅ Processed: D:\MED_LEAF_ID-1\dataset\Medicinal Leaf dataset\Aloev

In [None]:
# Load dataset
df = pd.read_csv("D:\MED_LEAF_ID\data\glcm_features.csv")

# Drop the Image_Name column as it's not needed
df = df.drop(columns=["Image_Name"])

# Encode target variable
label_encoder = LabelEncoder()
df["Class_Name"] = label_encoder.fit_transform(df["Class_Name"])

# Split features and target
X = df.drop(columns=["Class_Name"])
y = df["Class_Name"]

In [None]:
# Exploratory Data Analysis (EDA)
plt.figure(figsize=(12, 5))
sns.countplot(x=y)
plt.xticks(rotation=90)
plt.title("Class Distribution")
plt.xlabel("Plant Class")
plt.ylabel("Count")
plt.show()

In [None]:
plt.figure(figsize=(12, 10))
sns.heatmap(X.corr(), cmap="coolwarm", vmax=1.0, vmin=-1.0, square=True)
plt.title("Feature Correlation Heatmap")
plt.show()

In [None]:
# Feature Selection using Recursive Feature Elimination (RFE)
log_reg = LogisticRegression(max_iter=1000)
rfe = RFE(log_reg, n_features_to_select=30)
rfe.fit(X, y)
X_selected = X.loc[:, rfe.support_]

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
pca = PCA(n_components=0.95, random_state=42)
X_pca = pca.fit_transform(X_scaled)

In [None]:
from imblearn.combine import SMOTETomek

smote_tomek = SMOTETomek(random_state=42)
X_resampled, y_resampled = smote_tomek.fit_resample(X_selected, y)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)

# Define parameter grid for Random Search

In [None]:
param_dist = {
    "n_estimators": [50, 100, 200, 300],
    "max_depth": [10, 20, 30, None],
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf": [1, 2, 4],
    "bootstrap": [True, False]
}

In [None]:
rf = RandomForestClassifier(random_state=42)

In [None]:
random_search = RandomizedSearchCV(
    estimator=rf, param_distributions=param_dist, n_iter=50, cv=5, verbose=1, n_jobs=-1, random_state=42
)
random_search.fit(X_train, y_train)

In [None]:
best_rf = random_search.best_estimator_

In [None]:
xgb_clf = xgb.XGBClassifier(objective='multi:softmax', num_class=len(np.unique(y)), random_state=42)
xgb_clf.fit(X_train, y_train)

In [None]:
ensemble = VotingClassifier(estimators=[('rf', best_rf), ('xgb', xgb_clf)], voting='soft')
ensemble.fit(X_train, y_train)

In [None]:
y_pred = ensemble.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

In [None]:
print("Best Parameters (RF):", random_search.best_params_)
print("Ensemble Model Accuracy:", accuracy)

In [None]:
import joblib
import os

# Define the path where the model will be saved
model_path = r"D:\MED_LEAF_ID\models\plant_classifier.pkl"

# Ensure the directory exists
os.makedirs(os.path.dirname(model_path), exist_ok=True)

# Save the trained model
joblib.dump(ensemble, model_path)

print(f"Model saved successfully at: {model_path}")