In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

import zipfile
from PIL import Image
import sklearn
import sklearn.metrics
import os
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau

In [4]:
! kaggle datasets download -d shylesh101/tomato-leaf-disease

Dataset URL: https://www.kaggle.com/datasets/shylesh101/tomato-leaf-disease
License(s): unknown
Downloading tomato-leaf-disease.zip to c:\Users\aszay\Developer\simple-mobilenetv2




  0%|          | 0.00/370M [00:00<?, ?B/s]
  0%|          | 1.00M/370M [00:01<07:27, 866kB/s]
  1%|          | 2.00M/370M [00:02<06:28, 993kB/s]
  1%|          | 3.00M/370M [00:03<06:01, 1.07MB/s]
  1%|          | 4.00M/370M [00:03<05:41, 1.13MB/s]
  1%|▏         | 5.00M/370M [00:04<05:32, 1.15MB/s]
  2%|▏         | 6.00M/370M [00:05<05:26, 1.17MB/s]
  2%|▏         | 7.00M/370M [00:06<05:24, 1.17MB/s]
  2%|▏         | 8.00M/370M [00:07<05:17, 1.20MB/s]
  2%|▏         | 9.00M/370M [00:08<05:11, 1.22MB/s]
  3%|▎         | 10.0M/370M [00:09<05:09, 1.22MB/s]
  3%|▎         | 11.0M/370M [00:09<05:16, 1.19MB/s]
  3%|▎         | 12.0M/370M [00:10<05:11, 1.21MB/s]
  4%|▎         | 13.0M/370M [00:11<05:13, 1.20MB/s]
  4%|▍         | 14.0M/370M [00:12<05:07, 1.21MB/s]
  4%|▍         | 15.0M/370M [00:13<05:06, 1.21MB/s]
  4%|▍         | 16.0M/370M [00:14<05:04, 1.22MB/s]
  5%|▍         | 17.0M/370M [00:15<05:04, 1.22MB/s]
  5%|▍         | 18.0M/370M [00:16<05:04, 1.21MB/s]
  5%|▌         | 19.0M

In [5]:
zip_path = 'tomato-leaf-disease.zip'

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall('./')

['$Recycle.Bin', '$SysReset', '$WINRE_BACKUP_PARTITION.MARKER', '.GamingRoot', 'Documents and Settings', 'DumpStack.log', 'DumpStack.log.tmp', 'Games', 'hiberfil.sys', 'LDPlayer', 'OneDriveTemp', 'pagefile.sys', 'PerfLogs', 'Program Files', 'Program Files (x86)', 'ProgramData', 'Recovery', 'Riot Games', 'swapfile.sys', 'System Volume Information', 'Users', 'Windows', 'Wuthering Waves', 'XboxGames']


In [2]:
path_dataset = "./tomato_dataset/"
train_dir = path_dataset + "/train"
val_dir = path_dataset + "/valid"
test_dir = path_dataset + "/test"

In [12]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2  # Using 20% of training data for validation
)

# Training data generator
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
)

# Validation data generator
validation_generator = train_datagen.flow_from_directory(
    val_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
)

# Test data generator (without augmentation)
test_datagen = ImageDataGenerator(rescale=1./255)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

Found 18345 images belonging to 10 classes.
Found 4585 images belonging to 10 classes.
Found 50 images belonging to 10 classes.


In [13]:
def generator_to_dataset(generator):
    dataset = tf.data.Dataset.from_generator(
        lambda: generator,
        output_types=(tf.float32, tf.float32),
        output_shapes=([None, 224, 224, 3], [None, generator.num_classes])
    )
    return dataset

train_dataset = generator_to_dataset(train_generator)
validation_dataset = generator_to_dataset(validation_generator)
test_dataset = generator_to_dataset(test_generator)

In [14]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)
validation_dataset = validation_dataset.prefetch(buffer_size=AUTOTUNE)
test_dataset = test_dataset.prefetch(buffer_size=AUTOTUNE)


In [15]:
# Load the MobileNetV2 model with pre-trained ImageNet weights, excluding the top layers
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Add global average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)

# Add a fully-connected layer
x = Dense(1024, activation='relu')(x)

# Add a logistic layer for classification (assuming 10 classes)
predictions = Dense(10, activation='softmax')(x)

# Construct the final model
model = Model(inputs=base_model.input, outputs=predictions)


In [16]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [17]:
# Reduce learning rate when a metric has stopped improving
learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss', 
                                            patience=2, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)

In [18]:
history = model.fit(
    train_dataset,
    steps_per_epoch=len(train_generator),
    validation_data=validation_dataset,
    validation_steps=len(validation_generator),
    epochs=25,
    callbacks=[learning_rate_reduction],
    verbose=1
)

Epoch 1/25
[1m574/574[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.7982 - loss: 0.6291