In [28]:
import os
import numpy as np
from skimage import io
from skimage.transform import resize
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib

# Define the path to the Potato dataset
potato_dataset_path = 'PotatoDataset'

# List of disease types and corresponding labels
disease_types = os.listdir(potato_dataset_path)
labels = {disease: index for index, disease in enumerate(disease_types)}

class_mapping = {index: disease for disease, index in labels.items()}

# Load images and labels
X = []
y = []

for disease in disease_types:
    disease_path = os.path.join(potato_dataset_path, disease)
    
    # Ensure the path is a directory before attempting to access it
    if os.path.isdir(disease_path):
        image_filenames = os.listdir(disease_path)
        
        for filename in image_filenames:
            image_path = os.path.join(disease_path, filename)
            
            # Check if the file is an image before attempting to load it
            if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')):
                image = io.imread(image_path)
                resized_image = resize(image, (128, 128))  # Resize images to a common size
                X.append(resized_image)
                y.append(labels[disease])

# Convert to numpy arrays
X = np.array(X)
y = np.array(y)

if len(np.unique(y)) > 1:
    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

    # Flatten the images for Random Forest
    X_train_flatten = X_train.reshape(X_train.shape[0], -1)
    X_test_flatten = X_test.reshape(X_test.shape[0], -1)

    # Initialize and train the Random Forest model
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train_flatten, y_train)

    # Save the trained model
    model_filename = 'random_forest_model.joblib'
    joblib.dump(model, model_filename)
    print(f"Model saved as '{model_filename}'")

    # Make predictions and calculate confidence scores
    y_pred = model.predict(X_test_flatten)
    confidence_scores = model.predict_proba(X_test_flatten)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy:.2f}")

    # Print confidence scores for each class
    for i, confidence_score in enumerate(confidence_scores):
        predicted_class_index = y_pred[i]
        predicted_class = class_mapping.get(predicted_class_index, "Unknown")
        predicted_confidence = confidence_score[predicted_class_index]
        print(f"Predicted Class: {predicted_class}, Confidence: {predicted_confidence:.2f}")

Model saved as 'random_forest_model.joblib'
Accuracy: 0.84
Predicted Class: Potato___Late_blight, Confidence: 0.69
Predicted Class: Potato___Late_blight, Confidence: 1.00
Predicted Class: Potato___Early_blight, Confidence: 0.72
Predicted Class: Potato___Early_blight, Confidence: 0.93
Predicted Class: Potato___Late_blight, Confidence: 0.84
Predicted Class: Potato___Early_blight, Confidence: 0.92
Predicted Class: Potato___Late_blight, Confidence: 0.69
Predicted Class: Potato___Late_blight, Confidence: 0.94
Predicted Class: Potato___Late_blight, Confidence: 0.51
Predicted Class: Potato___Early_blight, Confidence: 0.83
Predicted Class: Potato___Early_blight, Confidence: 0.80
Predicted Class: Potato___Early_blight, Confidence: 0.77
Predicted Class: Potato___Early_blight, Confidence: 0.89
Predicted Class: Potato___Late_blight, Confidence: 0.73
Predicted Class: Potato___Early_blight, Confidence: 0.94
Predicted Class: Potato___Late_blight, Confidence: 0.99
Predicted Class: Potato___Late_blight

In [29]:
labels

{'Potato___Early_blight': 0, 'Potato___Late_blight': 1, 'Potato___healthy': 2}