In [21]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib

In [22]:
# Define the path to the _Output directory
input_directory = "/content/drive/MyDrive/PlantClass/segmented_output"

# Initialize the images array
images = []

# Iterate through each folder in the _Output directory
for folder_name in os.listdir(input_directory):
    folder_path = os.path.join(input_directory, folder_name)

    # Ensure it's a directory
    if os.path.isdir(folder_path):
        # List to hold images for this folder
        folder_images = []

        # Iterate through each file in the folder
        for file in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file)

            # Check if the file is an image (we'll assume all files are images)
            if os.path.isfile(file_path):
                # Read the image in grayscale
                image = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)

                # Add the image to the list if it's successfully read
                if image is not None:
                    folder_images.append({
                        "image_name": file,
                        "image": image
                    })

        # If the folder contains images, append the folder object to the images array
        if folder_images:
            images.append({
                "folder_name": folder_name,
                "images": folder_images
            })


In [23]:
# Function to process each image and extract features
def extract_features(image_data, folder_name):
    """
    Extract features from a given image.
    Returns a dictionary containing the features.
    """
    # Get the image and its name
    image_name = image_data["image_name"]
    image = image_data["image"]

    # Find contours
    contours, _ = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour = max(contours, key=cv2.contourArea) if contours else None

    if contour is not None:
        # Region Features
        area = cv2.contourArea(contour)
        perimeter = cv2.arcLength(contour, True)
        circularity = (4 * np.pi * area) / (perimeter ** 2) if perimeter != 0 else 0
        convex_hull = cv2.convexHull(contour)
        convex_perimeter = cv2.arcLength(convex_hull, True)
        convexity = convex_perimeter / perimeter if perimeter != 0 else 0
        compactness = (perimeter ** 2) / area if area != 0 else 0

        # Moments
        moments = cv2.moments(image)
        hu_moments = cv2.HuMoments(moments).flatten()

        # Combine all features into a dictionary
        feature_dict = {
            "Class": folder_name,
            "Image_Name": image_name,
            "Area": area,
            "Perimeter": perimeter,
            "Circularity": circularity,
            "Convexity": convexity,
            "Compactness": compactness,
        }

        # Add Hu Moments to the dictionary
        for i, moment in enumerate(hu_moments):
            feature_dict[f'Hu_Moment_{i+1}'] = moment

        return feature_dict

    return None  # Return None if no valid contour is found

In [24]:
# Extract features for all images
features = []

for folder_data in images:
    folder_name = folder_data["folder_name"]
    folder_images = folder_data["images"]

    for image_data in folder_images:
        # Extract features for each image
        feature = extract_features(image_data, folder_name)
        if feature:
            features.append(feature)

# Convert to pandas DataFrame
features_df = pd.DataFrame(features)

# Save to CSV
output_csv = "/content/drive/MyDrive/PlantClass/features.csv"
features_df.to_csv(output_csv, index=False)
print(f"Feature extraction complete. Features saved to {output_csv}.")

# Display the first few rows of the DataFrame
print(features_df.head)

Feature extraction complete. Features saved to /content/drive/MyDrive/PlantClass/features.csv.
<bound method NDFrame.head of                         Class     Image_Name     Area    Perimeter  \
0     Alstonia_Scholaris_(P2)  0003_0024.JPG  12659.5  1697.010438   
1     Alstonia_Scholaris_(P2)  0003_0161.JPG  39112.5  2649.975070   
2     Alstonia_Scholaris_(P2)  0003_0118.JPG  61646.0  3862.260550   
3     Alstonia_Scholaris_(P2)  0003_0061.JPG  38550.5  3876.964086   
4     Alstonia_Scholaris_(P2)  0003_0062.JPG  43666.5  2886.994403   
...                       ...            ...      ...          ...   
1735               Jamun_(P5)  0005_0137.JPG  65495.5  2998.734518   
1736               Jamun_(P5)  0005_0125.JPG  68608.0  2966.526366   
1737               Jamun_(P5)  0005_0007.JPG  36318.5  3268.842161   
1738               Jamun_(P5)  0005_0051.JPG  35332.5  2363.156748   
1739               Jamun_(P5)  0005_0075.JPG  63958.5  4691.242428   

      Circularity  Convexity  Comp

In [37]:
def split_data(features_df, test_size=0.2, random_state=42):
    # Separate features and labels
    X = features_df.drop(columns=['Class', 'Image_Name'])  # Drop non-feature columns
    y = features_df["Class"]  # Target class labels

    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    return X_train, X_test, y_train, y_test


In [38]:
# Call the split function
X_train, X_test, y_train, y_test = split_data(features_df)

# Verify the results
print(f"Training set size: {len(X_train)}")
print(f"Testing set size: {len(X_test)}")

Training set size: 1392
Testing set size: 348


In [39]:
# Initialize the classifier
classifier = RandomForestClassifier(random_state=42)

# Train the classifier
classifier.fit(X_train, y_train)

# Evaluate on the test set
y_pred = classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Test Accuracy: {accuracy:.2f}")
joblib.dump(classifier, "/content/drive/MyDrive/PlantClass/trained_model.pkl")


Test Accuracy: 0.62


['/content/drive/MyDrive/PlantClass/trained_model.pkl']

In [53]:
def classify_image(image_path, classifier, feature_columns):
    # Extract features from the image using your `extract_features` function
    image_data = {"image_name": "input_image", "image": cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)}
    folder_name = "Unknown"  # Placeholder for class

    feature_dict = extract_features(image_data, folder_name)
    if feature_dict is None:
        raise ValueError("Failed to extract features from the image.")

    # Convert the feature dictionary to a DataFrame with the same feature names as training
    feature_vector = pd.DataFrame([feature_dict], columns=feature_columns)

    # Predict the class
    predicted_class = classifier.predict(feature_vector)[0]
    return predicted_class


In [58]:
# Load the trained classifier
classifier = joblib.load("/content/drive/MyDrive/PlantClass/trained_model.pkl")

# Define the feature columns (as used during training)
feature_columns = ["Area", "Perimeter", "Circularity", "Convexity", "Compactness"] + \
                  [f'Hu_Moment_{i+1}' for i in range(7)]

# Provide the path to the input image
image_path = "/content/drive/MyDrive/PlantClass/segmented_output/Chinar_(P11)/0011_0033.JPG"

predicted_class = classify_image(image_path, classifier, feature_columns)
print(f"The predicted class for the input image is: {predicted_class}")


The predicted class for the input image is: Chinar_(P11)
