In [1]:
!pip install split-folders

Collecting split-folders
  Downloading split_folders-0.5.1-py3-none-any.whl.metadata (6.2 kB)
Downloading split_folders-0.5.1-py3-none-any.whl (8.4 kB)
Installing collected packages: split-folders
Successfully installed split-folders-0.5.1


### Install Libraries


In [2]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import random
import cv2
import os
import PIL
import pathlib
import splitfolders

In [3]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator


In [4]:
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder

In [5]:
import wandb
from wandb.integration.keras import WandbCallback
from kaggle_secrets import UserSecretsClient


### Add WandB api_key

In [7]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("wandb_api_key")

### Load Dataset

In [8]:
# Dataset paths
base_ds = '/kaggle/input/pomegranate-fruit-diseases/Pomegranate Fruit Diseases Dataset for Deep Learning Models/Pomegranate Diseases Dataset/Pomegranate Diseases Dataset'
base_ds = pathlib.Path(base_ds)

In [9]:
# Prepare the data for Stratified K-Fold
data = []
labels = []

In [10]:
# Gathering file paths and their respective labels
for class_dir in base_ds.glob('*'):
    class_name = class_dir.name
    for img_path in class_dir.glob('*'):
        data.append(str(img_path))
        labels.append(class_name)

In [11]:
# Convert labels to integers
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Setting up Stratified K-Fold
n_splits = 5  # Number of folds
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=123)

### Data Augmentation

In [12]:
# Image data generators
train_datagen = ImageDataGenerator(rescale=1./255,
                                   rotation_range=30,
                                   zoom_range=0.15,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   shear_range=0.15,
                                   horizontal_flip=True,
                                   fill_mode="nearest")

test_datagen = ImageDataGenerator(rescale=1./255)

In [13]:
!pip install wandb --upgrade

Collecting wandb
  Downloading wandb-0.19.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Downloading wandb-0.19.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (20.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.3/20.3 MB[0m [31m90.6 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: wandb
  Attempting uninstall: wandb
    Found existing installation: wandb 0.19.1
    Uninstalling wandb-0.19.1:
      Successfully uninstalled wandb-0.19.1
Successfully installed wandb-0.19.4


# K-Fold Cross Validadtion

In [17]:
import wandb

# Store results from each fold
fold_results = []

# K-Fold Cross-Validation
for fold, (train_idx, val_idx) in enumerate(skf.split(data, labels)):
    print(f"Training fold {fold+1}/{n_splits}...")
    
    # Split data into training and validation sets
    train_files = np.array(data)[train_idx]
    train_labels = np.array(labels)[train_idx]
    val_files = np.array(data)[val_idx]
    val_labels = np.array(labels)[val_idx]
    
    # Convert integer labels back to their string equivalents
    train_labels_str = label_encoder.inverse_transform(train_labels)
    val_labels_str = label_encoder.inverse_transform(val_labels)
    
    # Create training and validation datasets
    train_generator = train_datagen.flow_from_dataframe(
        pd.DataFrame({'filename': train_files, 'class': train_labels_str}),
        x_col='filename',
        y_col='class',
        target_size=(128, 128),
        class_mode='categorical',
        batch_size=16,
        shuffle=True
    )
    
    val_generator = train_datagen.flow_from_dataframe(
        pd.DataFrame({'filename': val_files, 'class': val_labels_str}),
        x_col='filename',
        y_col='class',
        target_size=(128, 128),
        class_mode='categorical',
        batch_size=16,
        shuffle=False
    )
    
    # Model configuration
    CONFIG = dict(
        batch_size=16,
        img_height=128,
        img_width=128,
    )
    
    # Rename the model variable to avoid conflicts
    densenet_model = tf.keras.applications.DenseNet169(weights="imagenet", include_top=False, input_shape=(128, 128, 3))
    densenet_model.trainable = True
    inputs = tf.keras.Input((128, 128, 3))
    x = densenet_model(inputs, training=False)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dense(1024, activation='relu')(x)
    x = tf.keras.layers.Dense(len(label_encoder.classes_), activation='softmax')(x)
    densenet_model = tf.keras.Model(inputs, x)
    
    densenet_model.compile(optimizer=keras.optimizers.Adam(1e-5),
                           loss='categorical_crossentropy', metrics=['accuracy'])
    
    # W&B run
    # run = wandb.init(settings=wandb.Settings(start_method="fork"),
    #                  reinit=True,
    #                  project='Pomegranate_Kfold',
    #                  entity="avinashreddykasireddy954",
    #                  config=CONFIG,
    #                  group='DenseNet169',
    #                  job_type='train')
    
    # wandb.config.type = f'fold_{fold+1}'

    # DataFrame to store metrics for this fold
    metrics_df = pd.DataFrame(columns=["epoch", "train_loss", "train_accuracy", "val_loss", "val_accuracy"])
    
    # Training with manual logging
    for epoch in range(20):
        history = densenet_model.fit(
            train_generator,
            epochs=1,
            validation_data=val_generator
        )

         # Log metrics manually to DataFrame
        new_row = {
            "epoch": epoch + 1,
            "train_loss": history.history['loss'][0],
            "train_accuracy": history.history['accuracy'][0],
            "val_loss": history.history['val_loss'][0],
            "val_accuracy": history.history['val_accuracy'][0]
        }

        metrics_df = pd.concat([metrics_df, pd.DataFrame([new_row])], ignore_index=True)
        
        # Log metrics manually to W&B
        # wandb.log({
        #     "epoch": epoch + 1,
        #     "train_loss": history.history['loss'][0],
        #     "train_accuracy": history.history['accuracy'][0],
        #     "val_loss": history.history['val_loss'][0],
        #     "val_accuracy": history.history['val_accuracy'][0]
        # })
    
    # Evaluate model on validation set and log final metrics
    val_loss, val_accuracy = densenet_model.evaluate(val_generator)
    print(f"Fold {fold+1} - Loss: {val_loss}, Accuracy: {val_accuracy * 100:.2f}%")
    fold_results.append((val_loss, val_accuracy))
    # wandb.log({"final_val_loss": val_loss, "final_val_accuracy": val_accuracy})

     # Save metrics to Excel file
    metrics_df.to_excel(f'/kaggle/working/fold_{fold+1}_metrics.xlsx', index=False)

    # Save the model in TensorFlow SavedModel format
    # densenet_model.save(f'DenseNet201_fold_{fold+1}', save_format='tf')
    
    # Finish W&B run
    # run.finish()

# After all folds
for i, (val_loss, val_accuracy) in enumerate(fold_results):
    print(f"Fold {i+1} - Loss: {val_loss}, Accuracy: {val_accuracy * 100:.2f}%")


Training fold 1/5...
Found 4079 validated image filenames belonging to 5 classes.
Found 1020 validated image filenames belonging to 5 classes.


  self._warn_if_super_not_called()


[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m493s[0m 1s/step - accuracy: 0.6206 - loss: 1.0655 - val_accuracy: 0.9461 - val_loss: 0.2351


  metrics_df = pd.concat([metrics_df, pd.DataFrame([new_row])], ignore_index=True)


[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m253s[0m 973ms/step - accuracy: 0.9323 - loss: 0.2390 - val_accuracy: 0.9676 - val_loss: 0.1054
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m243s[0m 931ms/step - accuracy: 0.9557 - loss: 0.1390 - val_accuracy: 0.9765 - val_loss: 0.0710
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m252s[0m 970ms/step - accuracy: 0.9651 - loss: 0.1189 - val_accuracy: 0.9814 - val_loss: 0.0733
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m240s[0m 921ms/step - accuracy: 0.9777 - loss: 0.0770 - val_accuracy: 0.9843 - val_loss: 0.0545
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m244s[0m 938ms/step - accuracy: 0.9788 - loss: 0.0686 - val_accuracy: 0.9843 - val_loss: 0.0513
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m248s[0m 952ms/step - accuracy: 0.9807 - loss: 0.0571 - val_accuracy: 0.9814 - val_loss: 0.0520
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m241s[0m 92

  self._warn_if_super_not_called()


[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m491s[0m 1s/step - accuracy: 0.6158 - loss: 1.1127 - val_accuracy: 0.9529 - val_loss: 0.2101


  metrics_df = pd.concat([metrics_df, pd.DataFrame([new_row])], ignore_index=True)


[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m243s[0m 935ms/step - accuracy: 0.9390 - loss: 0.2264 - val_accuracy: 0.9745 - val_loss: 0.0963
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m247s[0m 947ms/step - accuracy: 0.9583 - loss: 0.1456 - val_accuracy: 0.9784 - val_loss: 0.0766
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m241s[0m 924ms/step - accuracy: 0.9624 - loss: 0.1271 - val_accuracy: 0.9794 - val_loss: 0.0613
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m240s[0m 921ms/step - accuracy: 0.9714 - loss: 0.1012 - val_accuracy: 0.9853 - val_loss: 0.0483
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m255s[0m 981ms/step - accuracy: 0.9787 - loss: 0.0716 - val_accuracy: 0.9824 - val_loss: 0.0491
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m262s[0m 1s/step - accuracy: 0.9808 - loss: 0.0638 - val_accuracy: 0.9892 - val_loss: 0.0385
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m245s[0m 942ms

  self._warn_if_super_not_called()


[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m500s[0m 1s/step - accuracy: 0.6513 - loss: 0.9544 - val_accuracy: 0.9324 - val_loss: 0.2425


  metrics_df = pd.concat([metrics_df, pd.DataFrame([new_row])], ignore_index=True)


[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m258s[0m 992ms/step - accuracy: 0.9441 - loss: 0.2100 - val_accuracy: 0.9608 - val_loss: 0.1216
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m252s[0m 967ms/step - accuracy: 0.9581 - loss: 0.1522 - val_accuracy: 0.9706 - val_loss: 0.0919
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m250s[0m 960ms/step - accuracy: 0.9678 - loss: 0.1015 - val_accuracy: 0.9716 - val_loss: 0.0878
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m250s[0m 958ms/step - accuracy: 0.9665 - loss: 0.1013 - val_accuracy: 0.9794 - val_loss: 0.0698
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m255s[0m 979ms/step - accuracy: 0.9773 - loss: 0.0714 - val_accuracy: 0.9784 - val_loss: 0.0627
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m263s[0m 1s/step - accuracy: 0.9788 - loss: 0.0574 - val_accuracy: 0.9794 - val_loss: 0.0635
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m258s[0m 991ms

  self._warn_if_super_not_called()


[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m500s[0m 1s/step - accuracy: 0.6325 - loss: 0.9979 - val_accuracy: 0.9539 - val_loss: 0.2166


  metrics_df = pd.concat([metrics_df, pd.DataFrame([new_row])], ignore_index=True)


[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m250s[0m 961ms/step - accuracy: 0.9386 - loss: 0.2315 - val_accuracy: 0.9784 - val_loss: 0.0730
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m244s[0m 937ms/step - accuracy: 0.9568 - loss: 0.1602 - val_accuracy: 0.9804 - val_loss: 0.0633
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m244s[0m 938ms/step - accuracy: 0.9616 - loss: 0.1256 - val_accuracy: 0.9843 - val_loss: 0.0465
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m252s[0m 966ms/step - accuracy: 0.9707 - loss: 0.0876 - val_accuracy: 0.9853 - val_loss: 0.0486
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m251s[0m 963ms/step - accuracy: 0.9784 - loss: 0.0772 - val_accuracy: 0.9863 - val_loss: 0.0452
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m251s[0m 962ms/step - accuracy: 0.9735 - loss: 0.0766 - val_accuracy: 0.9863 - val_loss: 0.0331
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m252s[0m 96

  self._warn_if_super_not_called()


[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m423s[0m 1s/step - accuracy: 0.6154 - loss: 1.0590 - val_accuracy: 0.9323 - val_loss: 0.2320


  metrics_df = pd.concat([metrics_df, pd.DataFrame([new_row])], ignore_index=True)


[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m244s[0m 936ms/step - accuracy: 0.9496 - loss: 0.2094 - val_accuracy: 0.9637 - val_loss: 0.1154
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m264s[0m 1s/step - accuracy: 0.9647 - loss: 0.1208 - val_accuracy: 0.9784 - val_loss: 0.0771
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m252s[0m 967ms/step - accuracy: 0.9694 - loss: 0.1021 - val_accuracy: 0.9784 - val_loss: 0.0617
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m249s[0m 957ms/step - accuracy: 0.9691 - loss: 0.0989 - val_accuracy: 0.9804 - val_loss: 0.0561
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m255s[0m 979ms/step - accuracy: 0.9780 - loss: 0.0732 - val_accuracy: 0.9804 - val_loss: 0.0620
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m245s[0m 938ms/step - accuracy: 0.9781 - loss: 0.0602 - val_accuracy: 0.9843 - val_loss: 0.0527
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m249s[0m 957ms

In [18]:
# After all folds
for i, (val_loss, val_accuracy) in enumerate(fold_results):
    print(f"Fold {i+1} - Loss: {val_loss}, Accuracy: {val_accuracy * 100:.2f}%")

Fold 1 - Loss: 0.03920096158981323, Accuracy: 99.12%
Fold 2 - Loss: 0.031773891299963, Accuracy: 98.92%
Fold 3 - Loss: 0.0362534299492836, Accuracy: 99.02%
Fold 4 - Loss: 0.02787677012383938, Accuracy: 99.02%
Fold 5 - Loss: 0.04132742062211037, Accuracy: 98.63%


### Evaluation Metrics


In [19]:
# After all folds: Compute average performance
average_val_loss = np.mean([result[0] for result in fold_results])
average_val_accuracy = np.mean([result[1] for result in fold_results])
print(f"Average Validation Loss: {average_val_loss}, Average Validation Accuracy: {average_val_accuracy * 100:.2f}%")

Average Validation Loss: 0.03528649471700192, Average Validation Accuracy: 98.94%
