In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import os
import numpy as np
from tensorflow.keras.preprocessing import image_dataset_from_directory
import shutil

In [2]:
 import kagglehub
 # Download latest version
 path = kagglehub.dataset_download("emmarex/plantdisease")

Downloading from https://www.kaggle.com/api/v1/datasets/download/emmarex/plantdisease?dataset_version_number=1...


100%|██████████| 658M/658M [00:05<00:00, 116MB/s]

Extracting files...





In [3]:
data_dir=os.path.join(path, "PlantVillage" )

In [4]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import os
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Image preprocessing parameters
IMG_HEIGHT = 224
IMG_WIDTH = 224
BATCH_SIZE = 32

# Data augmentation (optional but recommended)
datagen = ImageDataGenerator(
    rescale=1./255,  # Normalize pixel values
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2 # 20% for validation
)

# Create training data generator
train_generator = datagen.flow_from_directory(
    data_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical', # Assuming multi-class classification
    subset='training',
    shuffle=True,
    seed=42  # for reproducibility
)

# Create validation data generator
validation_generator = datagen.flow_from_directory(
    data_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation',
    shuffle=False,
    seed=42
)

Found 16516 images belonging to 15 classes.
Found 4122 images belonging to 15 classes.


In [6]:
disease_types=train_generator.class_indices.keys()

In [8]:
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2  # Example transfer learning model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# Load the pre-trained model (excluding the top classification layer)
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(IMG_HEIGHT, IMG_WIDTH, 3))

# Add custom classification layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)  # Adjust units as needed
predictions = Dense(len(disease_types), activation='softmax')(x)  # Output layer

# Create the final model
model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the base model layers (optional, but often improves initial training)
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001),
              loss='categorical_crossentropy',
              metrics=['f1_score'])

# Train the model
history = model.fit(
    train_generator,
    epochs=10,
    validation_data=validation_generator
)

Epoch 1/10
[1m517/517[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m272s[0m 504ms/step - f1_score: 0.5473 - loss: 1.2567 - val_f1_score: 0.8169 - val_loss: 0.5189
Epoch 2/10
[1m517/517[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m246s[0m 475ms/step - f1_score: 0.8287 - loss: 0.4682 - val_f1_score: 0.8424 - val_loss: 0.4304
Epoch 3/10
[1m517/517[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m246s[0m 476ms/step - f1_score: 0.8617 - loss: 0.3750 - val_f1_score: 0.8662 - val_loss: 0.3617
Epoch 4/10
[1m517/517[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m247s[0m 478ms/step - f1_score: 0.8736 - loss: 0.3388 - val_f1_score: 0.8631 - val_loss: 0.3484
Epoch 5/10
[1m517/517[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m244s[0m 471ms/step - f1_score: 0.8908 - loss: 0.3008 - val_f1_score: 0.8788 - val_loss: 0.3202
Epoch 6/10
[1m517/517[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m278s[0m 538ms/step - f1_score: 0.8924 - loss: 0.2874 - val_f1_score: 0.8777 - val_loss: 0.3287
Epoc

In [14]:
# prompt: test the data with model.eval and give me the f1 score and the loss of testing

from sklearn.metrics import f1_score

# Evaluate the model
model.evaluate(validation_generator)

# Get predictions
predictions = model.predict(validation_generator)
predicted_classes = np.argmax(predictions, axis=1)

# Get true classes
true_classes = validation_generator.classes

# Calculate F1 score
f1 = f1_score(true_classes, predicted_classes, average='weighted')  # Use 'weighted' for multi-class

print(f"F1 Score: {f1}")


[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 380ms/step - f1_score: 0.5271 - loss: 0.2449
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 423ms/step
F1 Score: 0.8962710340535448


In [15]:
model.save('disease_detection.h5')

