In [1]:
import os
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score




In [2]:
# Function to load and preprocess images
def load_images(dataset_folder):
    images = []
    labels = []
    for folder in os.listdir(dataset_folder):
        folder_path = os.path.join(dataset_folder, folder)
        if os.path.isdir(folder_path):
            for file in os.listdir(folder_path):
                if file.endswith(".jpg") or file.endswith(".png"):
                    image_path = os.path.join(folder_path, file)
                    image = Image.open(image_path)
                    image = image.convert("RGB")  # Convert to RGB mode
                    image = image.resize((256, 256))  # Resize images to a common size
                    images.append(np.array(image))
                    labels.append(folder)
    return np.array(images), np.array(labels)


In [3]:
# Path to the dataset folder
dataset_folder = "D:/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/train"


In [4]:
# Load and preprocess images
images, labels = load_images(dataset_folder)

In [5]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)


In [6]:
# Train the Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train.reshape(len(X_train), -1), y_train)  # Reshape images to 1D arrays for training


RandomForestClassifier(random_state=42)

In [7]:
# Evaluate the model
y_pred = model.predict(X_test.reshape(len(X_test), -1))
accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy:", accuracy)

Test Accuracy: 0.9448979591836735


In [8]:
import joblib

# Save the trained model to a file
model_file = "new_rf_model.pkl"
joblib.dump(model, model_file)
print("Model saved as", model_file)


Model saved as new_rf_model.pkl
