In [3]:
import os
import numpy as np
import tensorflow as tf

# 1. Define the base directory first [cite: 641]
base_dir = '/kaggle/input/plantvillage-tomato-leaf-dataset/plantvillage/plantvillage'

# 2. Print the files to verify the path is working
for dirname, _, filenames in os.walk(base_dir):
    for filename in filenames[:1]: 
        print(os.path.join(dirname, filename))

/kaggle/input/plantvillage-tomato-leaf-dataset/plantvillage/plantvillage/Tomato___Late_blight/781e93a9-2059-42de-8075-658033a6abf7___RS_Late.B 6075.JPG
/kaggle/input/plantvillage-tomato-leaf-dataset/plantvillage/plantvillage/Tomato___healthy/4a1e2b71-992a-4a64-a599-b49b8fa75378___RS_HL 0627.JPG
/kaggle/input/plantvillage-tomato-leaf-dataset/plantvillage/plantvillage/Tomato___Early_blight/cf8c6e28-201c-4c8e-994f-8dcf98362e64___RS_Erly.B 7651.JPG
/kaggle/input/plantvillage-tomato-leaf-dataset/plantvillage/plantvillage/Tomato___Septoria_leaf_spot/8854cb53-e283-46b4-b150-3d0b414b77fd___Matt.S_CG 0951.JPG
/kaggle/input/plantvillage-tomato-leaf-dataset/plantvillage/plantvillage/Tomato___Tomato_Yellow_Leaf_Curl_Virus/d08a1c48-3360-40e3-9c3d-e47c2812bed2___UF.GRC_YLCV_Lab 01920.JPG
/kaggle/input/plantvillage-tomato-leaf-dataset/plantvillage/plantvillage/Tomato___Bacterial_spot/a129e8eb-e2b4-4a8a-a509-f6625da6b11c___GCREC_Bact.Sp 3002.JPG
/kaggle/input/plantvillage-tomato-leaf-dataset/plantvill

In [4]:
print(os.listdir(base_dir))

['Tomato___Late_blight', 'Tomato___healthy', 'Tomato___Early_blight', 'Tomato___Septoria_leaf_spot', 'Tomato___Tomato_Yellow_Leaf_Curl_Virus', 'Tomato___Bacterial_spot', 'Tomato___Target_Spot', 'Tomato___Tomato_mosaic_virus', 'Tomato___Leaf_Mold', 'Tomato___Spider_mites Two-spotted_spider_mite']


In [5]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os

# This will search for any folder containing "Tomato"
for root, dirs, files in os.walk('/kaggle/input'):
    for name in dirs:
        if "Tomato" in name:
            print(f"FOUND PATH: {os.path.join(root, name)}")
            # Stop after finding the first one to keep it simple
            break

FOUND PATH: /kaggle/input/plantvillage-tomato-leaf-dataset/plantvillage/Tomato___Late_blight
FOUND PATH: /kaggle/input/plantvillage-tomato-leaf-dataset/plantvillage/plantvillage/Tomato___Late_blight


In [6]:


# 1. Define the path to your dataset (Kaggle specific path)
base_dir = '/kaggle/input/plantvillage-tomato-leaf-dataset/plantvillage/plantvillage'

print(os.listdir(base_dir))
# 2. Setup Data Augmentation as specified in your Chapter 3 (Data Preprocessing)
# This handles resizing to 224x224 and normalization (1./255)
datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,      # cite: 1373
    width_shift_range=0.2,   # cite: 1373
    height_shift_range=0.2,  # cite: 1373
    zoom_range=0.2,          # cite: 1373
    horizontal_flip=True,    # cite: 1373
    validation_split=0.2     # Your 80:20 split
)

# 3. Load the Training Data
train_data = datagen.flow_from_directory(
    base_dir,
    target_size=(224, 224), #cite: 655
    batch_size=32,           #cite: 679
    class_mode='sparse',   #Change 'categorical'to 'sparse'
    subset='training'   #cite: 658
)

# 4. Load the Validation Data
val_data = datagen.flow_from_directory(
    base_dir,
    target_size=(224, 224), #cite: 655
    batch_size=32,          #cite: 679
    class_mode='sparse',     #Change 'categorical' to 'sparse'
    subset='validation'    #cite: 658
)

print("Class indices:", train_data.class_indices)

['Tomato___Late_blight', 'Tomato___healthy', 'Tomato___Early_blight', 'Tomato___Septoria_leaf_spot', 'Tomato___Tomato_Yellow_Leaf_Curl_Virus', 'Tomato___Bacterial_spot', 'Tomato___Target_Spot', 'Tomato___Tomato_mosaic_virus', 'Tomato___Leaf_Mold', 'Tomato___Spider_mites Two-spotted_spider_mite']
Found 11627 images belonging to 10 classes.
Found 2902 images belonging to 10 classes.
Class indices: {'Tomato___Bacterial_spot': 0, 'Tomato___Early_blight': 1, 'Tomato___Late_blight': 2, 'Tomato___Leaf_Mold': 3, 'Tomato___Septoria_leaf_spot': 4, 'Tomato___Spider_mites Two-spotted_spider_mite': 5, 'Tomato___Target_Spot': 6, 'Tomato___Tomato_Yellow_Leaf_Curl_Virus': 7, 'Tomato___Tomato_mosaic_virus': 8, 'Tomato___healthy': 9}


In [7]:
from tensorflow.keras import layers, models

# Based on your Chapter 3.4 [cite: 663-673]
model = models.Sequential([
    # Input Layer: 224x224 RGB [cite: 665]
    layers.Input(shape=(224, 224, 3)),
    
    # Block 1: 32 filters [cite: 667]
    layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
    layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
    layers.MaxPooling2D((2, 2)), # [cite: 668]
    
    # Block 2: 64 filters [cite: 667]
    layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
    layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
    layers.MaxPooling2D((2, 2)),
    
    # Block 3: 128 filters [cite: 667]
    layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
    layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
    layers.MaxPooling2D((2, 2)),
    
    # Block 4: 256 filters [cite: 667]
    layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
    layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
    layers.MaxPooling2D((2, 2)),
    
    # Block 5: 512 filters [cite: 667]
    layers.Conv2D(512, (3, 3), activation='relu', padding='same'),
    layers.Conv2D(512, (3, 3), activation='relu', padding='same'),
    layers.MaxPooling2D((2, 2)),
    
    # Classification Layers [cite: 669-671]
    layers.Flatten(),
    layers.Dense(1500, activation='relu'), # Your 1500 neurons [cite: 669]
    layers.Dropout(0.5), # Regularization to prevent overfitting [cite: 670]
    layers.Dense(10, activation='softmax') # Softmax for multi-class 
])

# Compile with the settings from your Chapter 3 [cite: 676-678]
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='sparse_categorical_crossentropy', # Matches your data format
    metrics=['accuracy']
)

model.summary() # This will confirm the ~7.8 million parameters [cite: 673]

In [8]:
# Based on Chapter 3.5:Model Training  [cite: 675-679]
history = model.fit(
    train_data,
    validation_data=val_data,
    epochs=50, # cite: 677
    steps_per_epoch=train_data.samples // 32, # cite: 679
    validation_steps=val_data.samples // 32
)

# Save the model so you can download it for VS Code
model.save('tomato_model.h5')
print("Model training complete. Download 'tomato_model.h5' from the Output tab.")

  self._warn_if_super_not_called()


Epoch 1/50


I0000 00:00:1772064631.442057     122 service.cc:152] XLA service 0x7d28680080b0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1772064631.442095     122 service.cc:160]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1772064632.202992     122 cuda_dnn.cc:529] Loaded cuDNN version 91002


[1m  2/363[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m33s[0m 93ms/step - accuracy: 0.1250 - loss: 2.2829   

I0000 00:00:1772064644.058591     122 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m239s[0m 617ms/step - accuracy: 0.2948 - loss: 2.0351 - val_accuracy: 0.4941 - val_loss: 1.4986
Epoch 2/50
[1m  1/363[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m31s[0m 88ms/step - accuracy: 0.5625 - loss: 1.4470



[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 110ms/step - accuracy: 0.5625 - loss: 1.4470 - val_accuracy: 0.4878 - val_loss: 1.4956
Epoch 3/50
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m187s[0m 514ms/step - accuracy: 0.5107 - loss: 1.4062 - val_accuracy: 0.6264 - val_loss: 1.0898
Epoch 4/50
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 117ms/step - accuracy: 0.6250 - loss: 1.1486 - val_accuracy: 0.6382 - val_loss: 1.0663
Epoch 5/50
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m193s[0m 533ms/step - accuracy: 0.6057 - loss: 1.1335 - val_accuracy: 0.6653 - val_loss: 0.9190
Epoch 6/50
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 86ms/step - accuracy: 0.6250 - loss: 1.0847 - val_accuracy: 0.6958 - val_loss: 0.8651
Epoch 7/50
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m172s[0m 473ms/step - accuracy: 0.6413 - loss: 0.9881 - val_accuracy: 0.6931 - val_loss: 0.8302
Epoch 8/50
[1m363/



Model training complete. Download 'tomato_model.h5' from the Output tab.


In [9]:
model.save("tomato_model.keras")
print("Saved tomato_model.keras")

Saved tomato_model.keras
