In [1]:
# Set seeds for reproducibility
import random
random.seed(0)

import numpy as np
np.random.seed(0)

import tensorflow as tf
tf.random.set_seed(0)

In [2]:
import os
import shutil
import numpy as np
import time
import json
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import classification_report, confusion_matrix, f1_score
import matplotlib.pyplot as plt
import wandb


In [3]:
!pip install kaggle



In [4]:
kaggle_credentails = json.load(open("kaggle.json"))

In [5]:
# setup Kaggle API key as environment variables
os.environ['KAGGLE_USERNAME'] = kaggle_credentails["username"]
os.environ['KAGGLE_KEY'] = kaggle_credentails["key"]

In [6]:
!kaggle datasets download -d vipoooool/new-plant-diseases-dataset

Dataset URL: https://www.kaggle.com/datasets/vipoooool/new-plant-diseases-dataset
License(s): copyright-authors
Downloading new-plant-diseases-dataset.zip to /content
 97% 2.60G/2.70G [00:19<00:02, 40.2MB/s]
100% 2.70G/2.70G [00:20<00:00, 144MB/s] 


In [7]:
from zipfile import ZipFile

# Unzip the downloaded dataset
with ZipFile("new-plant-diseases-dataset.zip", 'r') as zip_ref:
    zip_ref.extractall()

In [8]:
# Create base folders
!mkdir -p /content/newData/train
!mkdir -p /content/newData/valid

# Dataset original path
DATASET="/content/new plant diseases dataset(augmented)/New Plant Diseases Dataset(Augmented)"

# Copy the 4 grape classes for training
!cp -r "$DATASET/train/Grape___Black_rot" /content/newData/train
!cp -r "$DATASET/train/Grape___Esca_(Black_Measles)" /content/newData/train
!cp -r "$DATASET/train/Grape___Leaf_blight_(Isariopsis_Leaf_Spot)" /content/newData/train
!cp -r "$DATASET/train/Grape___healthy" /content/newData/train

# Copy the 4 grape classes for validation
!cp -r "$DATASET/valid/Grape___Black_rot" /content/newData/valid
!cp -r "$DATASET/valid/Grape___Esca_(Black_Measles)" /content/newData/valid
!cp -r "$DATASET/valid/Grape___Leaf_blight_(Isariopsis_Leaf_Spot)" /content/newData/valid
!cp -r "$DATASET/valid/Grape___healthy" /content/newData/valid

# Create split folders
!mkdir -p /content/newData/train_split
!mkdir -p /content/newData/valid_split
!mkdir -p /content/newData/test_split

print("Dataset prepared.")

Dataset prepared.


In [9]:
import os
import shutil
import numpy as np

# Define source and destination directories (COLAB VERSION)
source_train_dir = '/content/newData/train'
source_valid_dir = '/content/newData/valid'
target_train_dir = '/content/newData/train_split'
target_valid_dir = '/content/newData/valid_split'
target_test_dir = '/content/newData/test_split'

# Create target directories for each class
for class_name in os.listdir(source_train_dir):
    os.makedirs(os.path.join(target_train_dir, class_name), exist_ok=True)
    os.makedirs(os.path.join(target_valid_dir, class_name), exist_ok=True)
    os.makedirs(os.path.join(target_test_dir, class_name), exist_ok=True)

    # Get file paths
    class_source_path = os.path.join(source_train_dir, class_name)
    files = os.listdir(class_source_path)

    # Shuffle and split files
    np.random.shuffle(files)
    num_files = len(files)

    train_end = int(0.8 * num_files)
    valid_end = train_end + int(0.1 * num_files)

    train_files = files[:train_end]
    valid_files = files[train_end:valid_end]
    test_files = files[valid_end:]

    # Copy files to respective directories
    for file in train_files:
        shutil.copy(os.path.join(class_source_path, file), os.path.join(target_train_dir, class_name, file))
    for file in valid_files:
        shutil.copy(os.path.join(class_source_path, file), os.path.join(target_valid_dir, class_name, file))
    for file in test_files:
        shutil.copy(os.path.join(class_source_path, file), os.path.join(target_test_dir, class_name, file))


In [10]:
# Define the directories (COLAB VERSION)
train_dir = '/content/newData/train_split'
valid_dir = '/content/newData/valid_split'
test_dir = '/content/newData/test_split'

# Function to count files in each class folder
def count_files_in_directory(directory):
    class_counts = {}
    for class_name in os.listdir(directory):
        class_path = os.path.join(directory, class_name)
        if os.path.isdir(class_path):  # Ensure it's a directory
            class_counts[class_name] = len(os.listdir(class_path))
    return class_counts

# Get counts for train, valid, and test directories
train_counts = count_files_in_directory(train_dir)
valid_counts = count_files_in_directory(valid_dir)
test_counts = count_files_in_directory(test_dir)

# Print the counts
print("Training counts:", train_counts)
print("Validation counts:", valid_counts)
print("Test counts:", test_counts)


Training counts: {'Grape___Black_rot': 1510, 'Grape___Esca_(Black_Measles)': 1536, 'Grape___Leaf_blight_(Isariopsis_Leaf_Spot)': 1377, 'Grape___healthy': 1353}
Validation counts: {'Grape___Black_rot': 188, 'Grape___Esca_(Black_Measles)': 192, 'Grape___Leaf_blight_(Isariopsis_Leaf_Spot)': 172, 'Grape___healthy': 169}
Test counts: {'Grape___Black_rot': 190, 'Grape___Esca_(Black_Measles)': 192, 'Grape___Leaf_blight_(Isariopsis_Leaf_Spot)': 173, 'Grape___healthy': 170}


In [12]:
use_wandb = True
import wandb
from wandb.integration.keras import WandbMetricsLogger, WandbModelCheckpoint

if use_wandb:
    import wandb
    wandb.init(project="plantdoctor", name="cnn_again_part7")

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
[34m[1mwandb[0m: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33masma-daaab[0m ([33mtesnime328-tek-up[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [13]:
# Data Preparation with ImageDataGenerator
batch_size = 32
img_size = (224, 224)

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    zoom_range=0.2,
    shear_range=0.2,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True
)

valid_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

# === FIXED PATHS FOR COLAB ===
train_dir = "/content/newData/train_split"
valid_dir = "/content/newData/valid_split"
test_dir = "/content/newData/test_split"

# Train sets
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical'
)

validation_generator = valid_datagen.flow_from_directory(
    valid_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical'
)

# Test set
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)



Found 5776 images belonging to 4 classes.
Found 721 images belonging to 4 classes.
Found 725 images belonging to 4 classes.


In [14]:
# Build the optimized CNN model
def create_optimized_cnn(num_classes):
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=img_size + (3,)),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.GlobalAveragePooling2D(),
        layers.Dense(256, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation='softmax')
    ])
    return model

# Create the model
model = create_optimized_cnn(train_generator.num_classes)

# Compile the model with learning rate scheduling
lr_schedule = optimizers.schedules.ExponentialDecay(
    initial_learning_rate=1e-3,
    decay_steps=10000,
    decay_rate=0.9
)
optimizer = optimizers.Adam(learning_rate=lr_schedule)

model.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()]
)

# Callbacks
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

checkpoint = ModelCheckpoint(
    'cnn_model.keras',
    monitor='val_loss',
    save_best_only=True
)

# === ADD W&B CALLBACKS WITHOUT CHANGING YOUR CODE ===
callbacks_list = [early_stop, checkpoint]
if use_wandb:
    callbacks_list.append(WandbMetricsLogger())
    callbacks_list.append(WandbModelCheckpoint("model-wandb.keras"))

# Train the model
history = model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=11,
    callbacks=callbacks_list
)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  self._warn_if_super_not_called()


Epoch 1/11
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 453ms/step - accuracy: 0.3868 - loss: 1.2130 - precision: 0.6066 - recall: 0.1139 - val_accuracy: 0.6186 - val_loss: 0.8512 - val_precision: 0.6866 - val_recall: 0.5409
Epoch 2/11
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 408ms/step - accuracy: 0.7462 - loss: 0.6138 - precision: 0.7963 - recall: 0.6597 - val_accuracy: 0.6283 - val_loss: 0.9939 - val_precision: 0.6479 - val_recall: 0.6075
Epoch 3/11
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 406ms/step - accuracy: 0.7990 - loss: 0.5052 - precision: 0.8244 - recall: 0.7681 - val_accuracy: 0.6907 - val_loss: 0.8894 - val_precision: 0.7118 - val_recall: 0.6782
Epoch 4/11
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 408ms/step - accuracy: 0.8292 - loss: 0.4433 - precision: 0.8486 - recall: 0.8061 - val_accuracy: 0.7767 - val_loss: 0.5661 - val_precision: 0.8027 - val_recall: 0.7503
Epoch 5/11
