In [1]:
import random
random.seed(0)

import numpy as np
np.random.seed(0)


import tensorflow as tf
tf.random.set_seed(0)

In [2]:
import os
import json
from zipfile import ZipFile
from PIL import Image

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models

In [3]:
import kagglehub
path = kagglehub.dataset_download("abdallahalidev/plantvillage-dataset")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/abdallahalidev/plantvillage-dataset?dataset_version_number=3...


100%|██████████| 2.04G/2.04G [01:34<00:00, 23.2MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/abdallahalidev/plantvillage-dataset/versions/3


In [4]:
dataset_root=os.path.join(path,'plantvillage dataset')
if os.path.exists(dataset_root):
  print("Dataset extracted at:",dataset_root)
  print("Contents:",os.listdir(dataset_root))
  subfolders=['segmented','color','grayscale']
  for subfolder in subfolders:
    folder_path=os.path.join(dataset_root,subfolder)
    if os.path.exists(folder_path):
      print(f"{subfolder.capitalize()} images count:",len(os.listdir(folder_path)))
      print(f"Sample {subfolder} images:",os.listdir(folder_path)[:5])
    else:
      print(f"{subfolder} folder not found")
else:
  print("dataset folder not found")

Dataset extracted at: /root/.cache/kagglehub/datasets/abdallahalidev/plantvillage-dataset/versions/3/plantvillage dataset
Contents: ['color', 'grayscale', 'segmented']
Segmented images count: 38
Sample segmented images: ['Potato___healthy', 'Raspberry___healthy', 'Soybean___healthy', 'Potato___Late_blight', 'Strawberry___Leaf_scorch']
Color images count: 38
Sample color images: ['Potato___healthy', 'Raspberry___healthy', 'Soybean___healthy', 'Potato___Late_blight', 'Strawberry___Leaf_scorch']
Grayscale images count: 38
Sample grayscale images: ['Potato___healthy', 'Raspberry___healthy', 'Soybean___healthy', 'Potato___Late_blight', 'Strawberry___Leaf_scorch']


**Data Preprocessing**

In [5]:
base_dir = os.path.join(dataset_root, "color")
img_size=224
batch_size=32
train_datagen=ImageDataGenerator(rescale=1./255,validation_split=0.2)
train_generator = train_datagen.flow_from_directory(base_dir,
                                                    target_size=(img_size,img_size),
                                                    batch_size=batch_size,
                                                    subset='training',
                                                    class_mode='categorical')
val_generator = train_datagen.flow_from_directory(base_dir,
                                                    target_size=(img_size,img_size),
                                                    batch_size=batch_size,
                                                    subset='validation',
                                                    class_mode='categorical')

Found 43456 images belonging to 38 classes.
Found 10849 images belonging to 38 classes.


**CNN Model**

In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D,Dense,Dropout,MaxPooling2D,Flatten
model=Sequential([
    Conv2D(32,(3,3),activation='relu',input_shape=(224,224,3)),
    MaxPooling2D(2,2),

    Conv2D(64,(3,3),activation='relu'),
    MaxPooling2D(2,2),

    Conv2D(128,(3,3),activation='relu'),
    MaxPooling2D(2,2),

    Flatten(),

    Dense(512,activation='relu'),

    Dropout(0.5),

    Dense(len(train_generator.class_indices),activation='softmax')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [7]:
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

In [8]:
history=model.fit(train_generator,
                  steps_per_epoch=train_generator.samples//batch_size,
                  epochs=5,
                  validation_data=val_generator,validation_steps=val_generator.samples//batch_size)

Epoch 1/5


  self._warn_if_super_not_called()


[1m1358/1358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m113s[0m 77ms/step - accuracy: 0.4908 - loss: 1.9159 - val_accuracy: 0.8212 - val_loss: 0.5616
Epoch 2/5
[1m1358/1358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 74ms/step - accuracy: 0.8201 - loss: 0.5733 - val_accuracy: 0.8968 - val_loss: 0.3430
Epoch 3/5
[1m1358/1358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 74ms/step - accuracy: 0.8866 - loss: 0.3528 - val_accuracy: 0.9181 - val_loss: 0.2482
Epoch 4/5
[1m1358/1358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m106s[0m 78ms/step - accuracy: 0.9175 - loss: 0.2545 - val_accuracy: 0.9291 - val_loss: 0.2299
Epoch 5/5
[1m1358/1358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 74ms/step - accuracy: 0.9379 - loss: 0.1869 - val_accuracy: 0.9273 - val_loss: 0.2283


In [10]:
val_loss,val_acc=model.evaluate(val_generator,steps=val_generator.samples//batch_size)
print(f"validation accuracy is {val_acc*100}")

[1m339/339[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 50ms/step - accuracy: 0.9254 - loss: 0.2328
validation accuracy is 92.72676706314087


**Prediction System**

In [11]:
def load_preprocess(image_path,target_size=(224,224)):
  img=Image.open(image_path)
  img=img.resize(target_size)
  img_arr=np.array(img)
  img_arr=np.expand_dims(img_arr,axis=0)
  img_arr=img_arr.astype('float32')/255.
  return img_arr
def predict_class(model,img_path,class_idx):
  processed_img=load_preprocess(img_path)
  predictions=model.predict(processed_img)
  predict_class_idx=np.argmax(predictions,axis=1)[0]
  predict_class_name=class_idx[predict_class_idx]
  return predict_class_name

In [13]:
class_idx = {v: k for k, v in train_generator.class_indices.items()}

In [14]:
model.save('leaf_disease_prediction2.h5')

