In [28]:
import pandas as pd
import cv2
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,f1_score,classification_report,confusion_matrix
from sklearn.preprocessing import LabelEncoder
import numpy as np
import os
image_count = {}
default_image_size = tuple((128, 128))

data = []
c=-1
for root, _, files in os.walk('/kaggle/input/plantdisease/PlantVillage'):
    disease = os.path.basename(root)
    print(disease)
    image_count[disease] = 0  
    c+=1
    if((c>=3 and c<=10) or c==13 or c==15):
        for file in files:
            if file.endswith('.jpg') or file.endswith('.png') or file.endswith('.JPG') or file.endswith('.PNG') or file.endswith('JPEG') or file.endswith('jpeg') :
                image_path = os.path.join(root, file)

                if image_count[disease] >= 1000:
                    continue

                # Read the image
                image = cv2.imread(image_path)
    #             image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                image = cv2.resize(image, default_image_size)
                # Flatten the image to a 1D array and convert to NumPy array
                flattened_image = image.flatten().astype(int)
                # Store the flattened pixel values and disease name in data
                data.append([flattened_image, disease])

                # Increment the counter for the current disease
                image_count[disease] += 1

df = pd.DataFrame(data, columns=['image_pixels', 'disease'])

# Split the data into training and testing sets
X = np.vstack(df['image_pixels'].to_numpy())
y = df['disease']

# Encode disease labels using LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Train the model
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model






PlantVillage
Pepper__bell___Bacterial_spot
Potato___healthy
Tomato_Leaf_Mold
Tomato__Tomato_YellowLeaf__Curl_Virus
Tomato_Bacterial_spot
Tomato_Septoria_leaf_spot
Tomato_healthy
Tomato_Spider_mites_Two_spotted_spider_mite
Tomato_Early_blight
Tomato__Target_Spot
Pepper__bell___healthy
Potato___Late_blight
Tomato_Late_blight
Potato___Early_blight
Tomato__Tomato_mosaic_virus


In [29]:
# print(X_train)
# print(y_train)
# print(X_train.shape)


In [30]:
len(X_train[0])

49152

In [31]:
print(image_count)

{'PlantVillage': 0, 'Pepper__bell___Bacterial_spot': 0, 'Potato___healthy': 0, 'Tomato_Leaf_Mold': 952, 'Tomato__Tomato_YellowLeaf__Curl_Virus': 1000, 'Tomato_Bacterial_spot': 1000, 'Tomato_Septoria_leaf_spot': 1000, 'Tomato_healthy': 1000, 'Tomato_Spider_mites_Two_spotted_spider_mite': 1000, 'Tomato_Early_blight': 1000, 'Tomato__Target_Spot': 1000, 'Pepper__bell___healthy': 0, 'Potato___Late_blight': 0, 'Tomato_Late_blight': 1000, 'Potato___Early_blight': 0, 'Tomato__Tomato_mosaic_virus': 373}


In [32]:
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)

Accuracy: 0.6863270777479893


In [33]:
f1 = f1_score(y_test, y_pred, average='weighted')
f1

0.6795353027985946

In [34]:
conf_matrix = confusion_matrix(y_test, y_pred)
conf_matrix

array([[155,   7,   8,   0,   1,   0,   2,  15,   0,   8],
       [ 36,  84,  12,   2,   9,  15,  25,   8,   0,   6],
       [  5,  22, 106,   6,  21,   3,   5,   7,   1,   7],
       [  5,   4,   7, 139,  14,  10,   2,   7,   1,   1],
       [  4,   5,   5,  13, 137,   7,   9,  12,   1,   5],
       [  4,   7,   1,   0,   0, 155,  27,  10,   1,   2],
       [ 10,  10,   2,   1,   5,  30, 140,   3,   0,   8],
       [ 12,   9,   2,   0,   6,   8,   1, 161,   0,   2],
       [  4,   2,   5,  11,   4,  19,   3,   3,  23,   3],
       [  3,   3,   1,   1,   0,   2,  17,   0,   0, 180]])