In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.optimizers import Adam


In [None]:
# Paths to the uploaded zip file and extraction location
zip_file = '/content/drive/MyDrive/archive.zip' # Update with the correct path to your zip file
output_dir = '/content/extracted_files'  # Specify the directory to extract to



In [None]:
# Create the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

In [None]:
import os
import zipfile # Import the zipfile module



In [None]:

# Unzip the file
with zipfile.ZipFile(zip_file, 'r') as zip_ref:
    zip_ref.extractall(output_dir)

print(f"Files extracted to {output_dir}")

Files extracted to /content/extracted_files


In [None]:
# Path to the dataset folder containing images
images_dir = os.path.join('/content/extracted_files', '/content/extracted_files/Segmented Medicinal Leaf Images')

# Create train, validation, and test directories
train_dir = '/content/dataset/train/'
validation_dir = '/content/dataset/validation/'
test_dir = '/content/dataset/test/'

# Create subdirectories for each class
class_names = os.listdir(images_dir)
for class_name in class_names:
    os.makedirs(os.path.join(train_dir, class_name), exist_ok=True)
    os.makedirs(os.path.join(validation_dir, class_name), exist_ok=True)
    os.makedirs(os.path.join(test_dir, class_name), exist_ok=True)

# Split the data into train, test, and validation sets (80% train, 10% validation, 10% test)
for class_name in class_names:
    class_path = os.path.join(images_dir, class_name)
    all_images = os.listdir(class_path)

    # Split into train/validation/test
    from sklearn.model_selection import train_test_split

    train_images, temp_images = train_test_split(all_images, test_size=0.2, random_state=42)
    validation_images, test_images = train_test_split(temp_images, test_size=0.5, random_state=42)

    # Move the images to respective folders
    import shutil

    for img in train_images:
        shutil.copy(os.path.join(class_path, img), os.path.join(train_dir, class_name, img))
    for img in validation_images:
        shutil.copy(os.path.join(class_path, img), os.path.join(validation_dir, class_name, img))
    for img in test_images:
        shutil.copy(os.path.join(class_path, img), os.path.join(test_dir, class_name, img))

print("Dataset organized into train, validation, and test sets!")

Dataset organized into train, validation, and test sets!


In [None]:
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt

# Set dataset directory and image dimensions
dataset_dir = '/content/extracted_files/Segmented Medicinal Leaf Images'  # Update this with your dataset path
img_height, img_width = 128, 128  # Resize images to speed up processing

# Initialize data and labels
data = []
labels = []

# Load images and extract features (flattened image pixels)
for class_name in os.listdir(dataset_dir):
    class_path = os.path.join(dataset_dir, class_name)
    if os.path.isdir(class_path):
        for img_file in os.listdir(class_path):
            img_path = os.path.join(class_path, img_file)
            try:
                # Load and preprocess the image
                img = cv2.imread(img_path)
                img = cv2.resize(img, (img_height, img_width))  # Resize image to 128x128
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert to RGB
                data.append(img.flatten())  # Flatten the image into a 1D array
                labels.append(class_name)  # Add the class label
            except Exception as e:
                print(f"Error loading image {img_path}: {e}")

# Convert to NumPy arrays
data = np.array(data)
labels = np.array(labels)

print(f"Loaded {len(data)} images.")

# Encode class labels into numeric values
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Split the data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(data, labels_encoded, test_size=0.2, random_state=42)

# Standardize the data for better performance
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)  # Fit and transform on the training data
X_test = scaler.transform(X_test)  # Transform the testing data based on the training set

print("Data preprocessing complete.")

# Define the KNN model
k = 5  # Number of neighbors
knn = KNeighborsClassifier(n_neighbors=k)

# Train the KNN model
knn.fit(X_train, y_train)

print("KNN model training complete.")

# Evaluate the KNN model
def evaluate_model(knn_model, X_test, y_test, label_encoder):
    """
    Evaluate the KNN model and print precision, recall, F1 score, and classification report.
    """
    # Predict using the test data
    y_pred_encoded = knn_model.predict(X_test)
    y_pred = label_encoder.inverse_transform(y_pred_encoded)
    y_test_decoded = label_encoder.inverse_transform(y_test)

    # Classification report
    print("Classification Report:")
    print(classification_report(y_test_decoded, y_pred))

    # Calculate and print precision, recall, and F1 score
    precision = precision_score(y_test_decoded, y_pred, average='weighted')
    recall = recall_score(y_test_decoded, y_pred, average='weighted')
    f1 = f1_score(y_test_decoded, y_pred, average='weighted')

    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"F1 Score: {f1:.2f}")

# Example usage
evaluate_model(knn, X_test, y_test, label_encoder)


Loaded 1835 images.
Data preprocessing complete.
KNN model training complete.
Classification Report:
                         precision    recall  f1-score   support

            Arive-Dantu       0.50      0.79      0.61        19
                 Basale       0.74      0.64      0.68        22
                  Betel       0.88      0.50      0.64        14
          Crape_Jasmine       0.67      0.55      0.60        11
                  Curry       0.12      0.12      0.12         8
              Drumstick       0.71      0.71      0.71        17
              Fenugreek       0.89      0.80      0.84        10
                  Guava       0.74      1.00      0.85        14
               Hibiscus       0.91      0.77      0.83        13
           Indian_Beech       0.83      0.56      0.67         9
         Indian_Mustard       0.67      0.18      0.29        11
              Jackfruit       1.00      0.80      0.89        15
Jamaica_Cherry-Gasagase       0.50      0.86      0.6

In [None]:
# Paths to your images
dataset_dir = '/content/extracted_files/Segmented Medicinal Leaf Images'

# Initialize data and labels
data = []
labels = []

# Load images and extract features
for class_name in os.listdir(dataset_dir):
    class_path = os.path.join(dataset_dir, class_name)
    if os.path.isdir(class_path):
        for img_file in os.listdir(class_path):
            img_path = os.path.join(class_path, img_file)
            try:
                # Load and preprocess the image
                img = cv2.imread(img_path)
                img = cv2.resize(img, (img_height, img_width))
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                data.append(img.flatten())  # Flatten the image into a 1D array

                # Label as "Medicinal" or "Non-Medicinal"
                if 'medicinal' in class_name.lower():  # Modify this condition if needed
                    labels.append('Medicinal')
                else:
                    labels.append('Non-Medicinal')
            except Exception as e:
                print(f"Error loading image {img_path}: {e}")

# Convert to NumPy arrays
data = np.array(data)
labels = np.array(labels)


In [None]:
# Encode class labels (Medicinal and Non-Medicinal) into numeric values
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)


In [None]:
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(data, labels_encoded, test_size=0.2, random_state=42)

# Standardize the data for better performance
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define and train the KNN model
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

print("KNN model training complete.")


KNN model training complete.


In [None]:
def predict_leaf(image_path, knn_model, label_encoder, scaler, img_height=128, img_width=128):
    """
    Predict if the given leaf image is medicinal or non-medicinal.

    Parameters:
    - image_path: Path to the user's leaf image
    - knn_model: Trained KNN model
    - label_encoder: Label encoder for class labels
    - scaler: Scaler used for standardizing the data
    - img_height, img_width: Resize dimensions for the image

    Returns:
    - Predicted class (Medicinal/Non-Medicinal)
    """
    try:
        # Load and preprocess the input image
        img = cv2.imread(image_path)
        img = cv2.resize(img, (img_height, img_width))  # Resize image
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert to RGB
        img_flattened = img.flatten()  # Flatten the image

        # Standardize the image data using the scaler
        img_standardized = scaler.transform([img_flattened])

        # Predict using the trained KNN model
        pred_label_encoded = knn_model.predict(img_standardized)
        pred_label = label_encoder.inverse_transform(pred_label_encoded)[0]

        # Return the predicted label
        return pred_label
    except Exception as e:
        print(f"Error processing image: {e}")
        return None


In [None]:
# Path to the user's leaf image
user_image_path = '/content/sample_data/amarnthus.jpg'  # Replace with the actual image path

# Predict if the given leaf is medicinal or non-medicinal
predicted_class = predict_leaf(user_image_path, knn, label_encoder, scaler)

# Display the result
if predicted_class:
    print(f"The given leaf is predicted to be: {predicted_class}")
else:
    print("Prediction failed. Please check the image or model.")


The given leaf is predicted to be: Non-Medicinal
