# Data Splitting

In [42]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Define paths
data_dir = '5 Black and white'  # Path to the directory containing all your data
train_dir = 'data/train'  # Path to the directory where you want to store the training data
val_dir = 'data/val'  # Path to the directory where you want to store the validation data
test_dir = 'data/test'  # Path to the directory where you want to store the test data

# Create directories if they don't exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# List all subdirectories (classes) in the data directory
classes = os.listdir(data_dir)

# Split data into training and validation sets
for cls in classes:
    cls_dir = os.path.join(data_dir, cls)
    images = os.listdir(cls_dir)
    train_images, test_images = train_test_split(images, test_size=0.2, random_state=42)
    val_images, test_images = train_test_split(test_images, test_size=0.5, random_state=42)

    # Move images to respective directories
    for img in train_images:
        src = os.path.join(cls_dir, img)
        dst = os.path.join(train_dir, cls, img)
        os.makedirs(os.path.dirname(dst), exist_ok=True)
        shutil.copy(src, dst)

    for img in val_images:
        src = os.path.join(cls_dir, img)
        dst = os.path.join(val_dir, cls, img)
        os.makedirs(os.path.dirname(dst), exist_ok=True)
        shutil.copy(src, dst)

    for img in test_images:
        src = os.path.join(cls_dir, img)
        dst = os.path.join(test_dir, cls, img)
        os.makedirs(os.path.dirname(dst), exist_ok=True)
        shutil.copy(src, dst)

print("Data split completed successfully.")


Data split completed successfully.


# model creation

In [54]:
import os
import cv2
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import train_test_split
import joblib
# Function to load images and labels from a directory
def load_data(directory):
    images = []
    labels = []
    class_names = os.listdir(directory)
    label_map = {class_name: i for i, class_name in enumerate(class_names)}
    
    for class_name in class_names:
        class_dir = os.path.join(directory, class_name)
        for filename in os.listdir(class_dir):
            image_path = os.path.join(class_dir, filename)
            image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
            image = cv2.resize(image, (224, 224))  # Resize images if needed
            image= image/255.0
            images.append(image)
            labels.append(label_map[class_name])
    
    return np.array(images), np.array(labels)

# Load training data
train_dir = "data/train"
X_train, y_train = load_data(train_dir)

# Load validation data
val_dir = "data/val"
X_val, y_val = load_data(val_dir)

# Load test data
test_dir = "data/test"
X_test, y_test = load_data(test_dir)

# Reshape the data for RandomForestClassifier
X_train_flat = X_train.reshape(X_train.shape[0], -1)
X_val_flat = X_val.reshape(X_val.shape[0], -1)
X_test_flat = X_test.reshape(X_test.shape[0], -1)

# Initialize and train Random Forest Classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train_flat, y_train)

# Predictions on validation set
y_val_pred = rf_classifier.predict(X_val_flat)
val_accuracy = accuracy_score(y_val, y_val_pred)
print("Validation Accuracy:", val_accuracy)

# Predictions on test set
y_test_pred = rf_classifier.predict(X_test_flat)
test_accuracy = accuracy_score(y_test, y_test_pred)
print("Test Accuracy:", test_accuracy)

# Classification report on test set
print("Classification Report:")
print(classification_report(y_test, y_test_pred))
model_file = "random_forest_model.pkl"
joblib.dump(rf_classifier, model_file)


Validation Accuracy: 0.7411273486430062
Test Accuracy: 0.732776617954071
Classification Report:
              precision    recall  f1-score   support

           0       0.71      0.62      0.67        80
           1       0.72      0.88      0.79        80
           2       0.78      0.71      0.75        80
           3       0.68      0.81      0.74        80
           4       0.76      0.76      0.76        80
           5       0.75      0.61      0.67        79

    accuracy                           0.73       479
   macro avg       0.74      0.73      0.73       479
weighted avg       0.74      0.73      0.73       479



['random_forest_model.pkl']

In [58]:

import cv2
import numpy as np
import joblib
model=joblib.load("random_forest_model.pkl")
# Load and preprocess the image
def preprocess_image(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    image = cv2.resize(image, (224, 224))  # Resize to (224, 224)
    # normalize
    image = image / 255.0
    image_flat = image.flatten()  # Flatten the image
    return image_flat

# Example usage
image_path = "3.jpg"
preprocessed_image = preprocess_image(image_path)

class_lables={0:'Aphids',1:"Army Worm",2:"Bacterial Blight",3:"Healthy",4:"Powdery Mildew",5:"Target Spot"}

# Make prediction with the trained model
predicted_class = rf_classifier.predict([preprocessed_image])  # Pass the preprocessed image as a list

print("Predicted class:", class_lables[predicted_class[0]])

Predicted class: Bacterial Blight
