In [1]:
import os
import cv2
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from skimage.io import imread
from skimage.transform import resize
from skimage.feature import hog



In [2]:
def load_images_and_extract_features(dataset_dir):
    images = []
    labels = []

    for label_idx, label in enumerate(os.listdir(dataset_dir)):
        label_dir = os.path.join(dataset_dir, label)
        for image_file in os.listdir(label_dir):
            image_path = os.path.join(label_dir, image_file)
            try:
                image = cv2.imread(image_path)
                if image is not None:
                    # Resize image to a fixed size
                    image = resize(image, (100, 100))
                    # Extract features using Histogram of Oriented Gradients (HOG)
                    features = hog(image, pixels_per_cell=(8, 8), cells_per_block=(2, 2), multichannel=True, channel_axis=-1)
                    images.append(features)
                    labels.append(label)
                else:
                    print("Failed to load image:", image_path)
            except Exception as e:
                print("Error loading image:", image_path)
                print(e)

    return np.array(images), np.array(labels)


In [3]:
# Path to Plant Village dataset
dataset_dir = "D://New Plant Diseases Dataset(Augmented)//New Plant Diseases Dataset(Augmented)//train"

In [4]:
# Load images and extract features
images, labels = load_images_and_extract_features(dataset_dir)


  features = hog(image, pixels_per_cell=(8, 8), cells_per_block=(2, 2), multichannel=True, channel_axis=-1)


In [5]:
print("Number of images:", len(images))
print("Number of labels:", len(labels))


Number of images: 70295
Number of labels: 70295


In [6]:
# Split dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)


In [7]:
# Initialize Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)


In [None]:
# Train the classifier
clf.fit(X_train, y_train)

In [16]:
# Evaluate the classifier on the validation set
accuracy_val = clf.score(X_test, y_test)
print("Validation Accuracy:", accuracy_val)


Validation Accuracy: 0.5554449107333381


In [None]:
# Evaluate the classifier
accuracy = clf.score(X_test, y_test)
print("Accuracy:", accuracy)

In [10]:
import joblib

# Save the trained model to a file
joblib.dump(clf, 'rf_model.pkl')


['rf_model.pkl']