In [1]:
import pandas as pd
import cv2
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,f1_score,classification_report,confusion_matrix
from sklearn.preprocessing import LabelEncoder
import numpy as np
import os
image_count = {}
default_image_size = tuple((128, 128))

data = []
for root, _, files in os.walk('PlantVillage'):
    disease = os.path.basename(root)
    print(disease)
    image_count[disease] = 0  

    for file in files:
        if file.endswith('.jpg') or file.endswith('.png') or file.endswith('.JPG') or file.endswith('.PNG') or file.endswith('JPEG') or file.endswith('jpeg') :
            image_path = os.path.join(root, file)

            if image_count[disease] >= 1000:
                continue

            # Read the image
            image = cv2.imread(image_path)
            # image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            image = cv2.resize(image, default_image_size)
            # Flatten the image to a 1D array and convert to NumPy array
            flattened_image = image.flatten().astype(int)
            # Store the flattened pixel values and disease name in data
            data.append([flattened_image, disease])

            # Increment the counter for the current disease
            image_count[disease] += 1

df = pd.DataFrame(data, columns=['image_pixels', 'disease'])

# Split the data into training and testing sets
X = np.vstack(df['image_pixels'].to_numpy())
y = df['disease']

# Encode disease labels using LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Train the model
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model






PlantVillage
Pepper__bell___Bacterial_spot
Pepper__bell___healthy
Potato___Early_blight
Potato___healthy
Potato___Late_blight
Tomato_Bacterial_spot
Tomato_Early_blight
Tomato_healthy
Tomato_Late_blight
Tomato_Leaf_Mold
Tomato_Septoria_leaf_spot
Tomato_Spider_mites_Two_spotted_spider_mite
Tomato__Target_Spot
Tomato__Tomato_mosaic_virus
Tomato__Tomato_YellowLeaf__Curl_Virus


In [2]:
# print(X_train)
# print(y_train)
# print(X_train.shape)


In [3]:
len(X_train[0])

49152

In [4]:
print(image_count)

{'PlantVillage': 0, 'Pepper__bell___Bacterial_spot': 997, 'Pepper__bell___healthy': 1000, 'Potato___Early_blight': 1000, 'Potato___healthy': 152, 'Potato___Late_blight': 1000, 'Tomato_Bacterial_spot': 1000, 'Tomato_Early_blight': 1000, 'Tomato_healthy': 1000, 'Tomato_Late_blight': 1000, 'Tomato_Leaf_Mold': 952, 'Tomato_Septoria_leaf_spot': 1000, 'Tomato_Spider_mites_Two_spotted_spider_mite': 1000, 'Tomato__Target_Spot': 1000, 'Tomato__Tomato_mosaic_virus': 373, 'Tomato__Tomato_YellowLeaf__Curl_Virus': 1000}


In [5]:
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)

Accuracy: 0.6515769944341373


In [6]:
f1 = f1_score(y_test, y_pred, average='weighted')
f1

0.6359617131348002

In [7]:
conf_matrix = confusion_matrix(y_test, y_pred)
conf_matrix

array([[133,  10,  22,  14,   0,   0,   6,   1,   1,  11,   5,   4,   5,
          0,   1],
       [ 13, 145,   6,   7,   0,   3,   1,   4,   0,   5,   0,   5,   1,
          0,   1],
       [  0,   3, 184,   6,   0,   0,   0,   0,   0,   6,   0,   1,   0,
          0,   1],
       [ 18,   3,  17, 132,   0,  11,   2,   1,   3,   4,   5,   9,   3,
          0,   3],
       [ 13,   9,   0,  11,   0,   0,   0,   0,   1,   6,   0,   2,   0,
          0,   0],
       [  0,   1,   1,   8,   0, 149,   2,   9,   0,   0,   0,   2,  12,
          0,   5],
       [  8,  13,   6,   8,   0,  34,  66,   2,   6,   4,  15,  25,  16,
          0,   2],
       [  4,  10,  14,  20,   0,   2,  13,  79,   5,  14,   3,   9,   2,
          1,   2],
       [  3,   1,   3,   5,   0,   6,   5,   1, 152,   8,  14,   5,   3,
          3,   4],
       [  6,  11,  18,   7,   0,   5,   1,   3,   9, 102,   7,   7,   8,
          2,   5],
       [  2,   1,   0,   2,   0,   7,   4,   0,   0,   3, 160,  16,   4,
       