In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, KFold
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
import cv2
import os

# Define dataset paths
dataset_path = '/kaggle/input/coffee-leaf-disease-akash/CoLeaf DATASET'
categories = os.listdir(dataset_path)
num_classes = len(categories)

# Load images and labels
def load_data():
    images = []
    labels = []
    for label, category in enumerate(categories):
        category_path = os.path.join(dataset_path, category)
        for root, _, files in os.walk(category_path):
            for img_name in files:
                img_path = os.path.join(root, img_name)
                img = cv2.imread(img_path)
                if img is not None:
                    img = cv2.resize(img, (224, 224))  # Resize images to 224x224
                    images.append(img)
                    labels.append(label)
                else:
                    print(f"Failed to load image: {img_path}")
    return np.array(images), np.array(labels)

images, labels = load_data()

# Ensure that we have loaded images
if len(images) == 0:
    raise Exception("No images loaded. Check dataset path and image files.")

# Normalize the pixel values
images = images / 255.0

# Convert labels to one-hot encoding
labels = to_categorical(labels, num_classes)

# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    zoom_range=0.2,
    shear_range=0.2,
    fill_mode='nearest'
)

# Define the MobileNetV2 model with regularization
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, LearningRateScheduler, ReduceLROnPlateau
from sklearn.model_selection import train_test_split

def create_mobilenetv2_model(input_shape, num_classes):
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=input_shape)
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    x = Dropout(0.5)(x)  # Regularization
    predictions = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=predictions)
    
    for layer in base_model.layers:
        layer.trainable = False

    model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Implementing K-fold Cross-Validation
kf = KFold(n_splits=3, shuffle=True, random_state=42)
fold_no = 1

for train_index, val_index in kf.split(images):
    print(f'Training on fold {fold_no}...')
    X_train, X_val = images[train_index], images[val_index]
    y_train, y_val = labels[train_index], labels[val_index]
    
    mobilenetv2_model = create_mobilenetv2_model(input_shape=(224, 224, 3), num_classes=num_classes)
    
    # Define callbacks
    callbacks = [
        ModelCheckpoint(f'mobilenetv2_best_model_fold_{fold_no}.keras', save_best_only=True, monitor='val_accuracy', mode='max'),
        EarlyStopping(monitor='val_accuracy', mode='max', patience=5, verbose=1)
    ]
    
    # Train the model
    history = mobilenetv2_model.fit(
        datagen.flow(X_train, y_train, batch_size=32),
        validation_data=(X_val, y_val),
        epochs=10,
        callbacks=callbacks
    )
    
    # Evaluate the model
    scores = mobilenetv2_model.evaluate(X_val, y_val, verbose=0)
    print(f'Score for fold {fold_no}: {mobilenetv2_model.metrics_names[0]} of {scores[0]}; {mobilenetv2_model.metrics_names[1]} of {scores[1]}')
    
    fold_no += 1


Training on fold 1...
Epoch 1/10


  self._warn_if_super_not_called()


[1m 1/21[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m6:33[0m 20s/step - accuracy: 0.0312 - loss: 3.0008

I0000 00:00:1716129790.595935     103 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
W0000 00:00:1716129790.633159     103 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m13/21[0m [32m━━━━━━━━━━━━[0m[37m━━━━━━━━[0m [1m8s[0m 1s/step - accuracy: 0.2313 - loss: 2.7549 

W0000 00:00:1716129802.917361     105 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m20/21[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 678ms/step - accuracy: 0.2722 - loss: 2.5526

W0000 00:00:1716129808.024292     104 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1716129813.717435     105 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 1s/step - accuracy: 0.2801 - loss: 2.5138 - val_accuracy: 0.4970 - val_loss: 1.3802
Epoch 2/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 281ms/step - accuracy: 0.5234 - loss: 1.4263 - val_accuracy: 0.5238 - val_loss: 1.3297
Epoch 3/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 277ms/step - accuracy: 0.5749 - loss: 1.1950 - val_accuracy: 0.5655 - val_loss: 1.3152
Epoch 4/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 259ms/step - accuracy: 0.6066 - loss: 1.1062 - val_accuracy: 0.5595 - val_loss: 1.1978
Epoch 5/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 258ms/step - accuracy: 0.6243 - loss: 1.0864 - val_accuracy: 0.5536 - val_loss: 1.1883
Epoch 6/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 275ms/step - accuracy: 0.6526 - loss: 1.0195 - val_accuracy: 0.5952 - val_loss: 1.1424
Epoch 7/10
[1m21/21[0m [32m━━━━━━━━━━━

W0000 00:00:1716129902.275919     104 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m12/21[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m9s[0m 1s/step - accuracy: 0.2141 - loss: 2.8943 

W0000 00:00:1716129914.337350     103 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m20/21[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 685ms/step - accuracy: 0.2588 - loss: 2.6511

W0000 00:00:1716129919.296111     104 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 1s/step - accuracy: 0.2667 - loss: 2.6069 - val_accuracy: 0.5254 - val_loss: 1.5823
Epoch 2/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 281ms/step - accuracy: 0.4615 - loss: 1.4484 - val_accuracy: 0.5731 - val_loss: 1.4026
Epoch 3/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 274ms/step - accuracy: 0.5621 - loss: 1.3663 - val_accuracy: 0.5791 - val_loss: 1.2143
Epoch 4/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 261ms/step - accuracy: 0.6329 - loss: 1.1336 - val_accuracy: 0.5731 - val_loss: 1.1931
Epoch 5/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 266ms/step - accuracy: 0.6079 - loss: 1.0998 - val_accuracy: 0.5761 - val_loss: 1.2586
Epoch 6/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 255ms/step - accuracy: 0.6595 - loss: 0.9768 - val_accuracy: 0.5731 - val_loss: 1.1599
Epoch 7/10
[1m21/21[0m [32m━━━━━━━━━━━

W0000 00:00:1716130013.863925     106 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m 7/21[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m11s[0m 856ms/step - accuracy: 0.1602 - loss: 2.7561

W0000 00:00:1716130018.922346     104 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m20/21[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 407ms/step - accuracy: 0.2647 - loss: 2.4898

W0000 00:00:1716130025.635820     103 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 719ms/step - accuracy: 0.2749 - loss: 2.4496 - val_accuracy: 0.4896 - val_loss: 1.3698
Epoch 2/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 274ms/step - accuracy: 0.5510 - loss: 1.3672 - val_accuracy: 0.5433 - val_loss: 1.2608
Epoch 3/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 283ms/step - accuracy: 0.5885 - loss: 1.1386 - val_accuracy: 0.5642 - val_loss: 1.2738
Epoch 4/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 281ms/step - accuracy: 0.5849 - loss: 1.2146 - val_accuracy: 0.5940 - val_loss: 1.1521
Epoch 5/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 280ms/step - accuracy: 0.6274 - loss: 1.0290 - val_accuracy: 0.6209 - val_loss: 1.1183
Epoch 6/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 254ms/step - accuracy: 0.6059 - loss: 1.0817 - val_accuracy: 0.5910 - val_loss: 1.1638
Epoch 7/10
[1m21/21[0m [32m━━━━━━━━

In [1]:

from io import BytesIO

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, KFold
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
import cv2
import os
dataset_path = '/kaggle/input/coffee-leaf-disease-akash/CoLeaf DATASET'

# Define dataset paths
dir_list = ['boron-B',
            'calcium-Ca',
            'iron-Fe',
            'magnesium-Mg',
            'manganese-Mn',
            'more-deficiencies',
            'nitrogen-N',
            'phosphorus-P',
            'potasium-K']

# Image data generator
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest'
)

# Function to generate images and load them into memory
def generate_images_in_memory(kaggle_input_dir, num_images=500):
    generated_images = datagen.flow_from_directory(
        kaggle_input_dir,
        batch_size=1,
        class_mode=None,
        shuffle=True
    )
    
    images = []
    for _ in range(num_images):
        img_batch = next(generated_images)
        img = img_batch[0].astype('uint8')  # Convert to uint8
        _, buffer = cv2.imencode('.jpg', img)
        img_bytes = BytesIO(buffer.tobytes())
        img_array = cv2.imdecode(np.frombuffer(img_bytes.getvalue(), np.uint8), cv2.IMREAD_COLOR)
        images.append(img_array)
    
    return images

# Load and generate images for each class
all_images = []
all_labels = []

for label, classes in enumerate(dir_list):
    kaggle_input_dir = f'/kaggle/input/coffee-leaf-disease-akash/CoLeaf DATASET/{classes}'
    images = generate_images_in_memory(kaggle_input_dir)
    labels = [label] * len(images)
    
    all_images.extend(images)
    all_labels.extend(labels)

# Convert to numpy arrays
all_images = np.array(all_images)
all_labels = np.array(all_labels)

# Print the shapes to verify
print('Images shape:', all_images.shape)
print('Labels shape:', all_labels.shape)

# Ensure that we have loaded images
if len(all_images) == 0:
    raise Exception("No images loaded. Check dataset path and image files.")


2024-05-19 15:47:13.081391: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-19 15:47:13.081514: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-19 15:47:13.257856: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Found 101 images belonging to 1 classes.
Found 162 images belonging to 1 classes.
Found 65 images belonging to 1 classes.
Found 79 images belonging to 1 classes.
Found 83 images belonging to 1 classes.
Found 104 images belonging to 1 classes.
Found 64 images belonging to 1 classes.
Found 246 images belonging to 1 classes.
Found 96 images belonging to 1 classes.
Images shape: (4500, 256, 256, 3)
Labels shape: (4500,)


In [2]:
categories = [d for d in os.listdir(dataset_path) if d in dir_list and os.path.isdir(os.path.join(dataset_path, d))]
num_classes = len(categories)

In [3]:
num_classes

9

In [4]:
images = all_images / 255.0

# Convert labels to one-hot encoding
labels = to_categorical(all_labels, num_classes)


In [39]:
labels.shape

(4500, 9)

In [5]:
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    zoom_range=0.2,
    shear_range=0.2,
    fill_mode='nearest'
)

In [None]:
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, LearningRateScheduler, ReduceLROnPlateau
from sklearn.model_selection import train_test_split

def create_mobilenetv2_model(input_shape, num_classes):
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=input_shape)
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    x = Dropout(0.5)(x)  # Regularization
    predictions = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=predictions)
    
    for layer in base_model.layers:
        layer.trainable = False

    model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Implementing K-fold Cross-Validation
kf = KFold(n_splits=3, shuffle=True, random_state=42)
fold_no = 1

for train_index, val_index in kf.split(images):
    print(f'Training on fold {fold_no}...')
    X_train, X_val = images[train_index], images[val_index]
    y_train, y_val = labels[train_index], labels[val_index]
    
    mobilenetv2_model = create_mobilenetv2_model(input_shape=(256, 256, 3), num_classes=num_classes)
    
    # Define callbacks
    callbacks = [
        ModelCheckpoint(f'mobilenetv2_best_model_fold_{fold_no}.keras', save_best_only=True, monitor='val_accuracy', mode='max'),
        EarlyStopping(monitor='val_accuracy', mode='max', patience=5, verbose=1)
    ]
    
    # Train the model
    history = mobilenetv2_model.fit(
        datagen.flow(X_train, y_train, batch_size=32),
        validation_data=(X_val, y_val),
        epochs=30,
        callbacks=callbacks
    )
    
    # Evaluate the model
    scores = mobilenetv2_model.evaluate(X_val, y_val, verbose=0)
    print(f'Score for fold {fold_no}: {mobilenetv2_model.metrics_names[0]} of {scores[0]}; {mobilenetv2_model.metrics_names[1]} of {scores[1]}')
    
    fold_no += 1


Training on fold 1...


  base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=input_shape)


Epoch 1/30


  self._warn_if_super_not_called()


[1m 2/94[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m4s[0m 53ms/step - accuracy: 0.1094 - loss: 2.6959  

I0000 00:00:1716134246.658325     871 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
W0000 00:00:1716134246.693998     871 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m38/94[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m37s[0m 665ms/step - accuracy: 0.2425 - loss: 2.4289

W0000 00:00:1716134271.210888     870 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m93/94[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 510ms/step - accuracy: 0.3301 - loss: 2.0690

W0000 00:00:1716134300.939980     868 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1716134310.036039     868 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 687ms/step - accuracy: 0.3322 - loss: 2.0605 - val_accuracy: 0.5867 - val_loss: 1.1618
Epoch 2/30
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 444ms/step - accuracy: 0.5553 - loss: 1.2658 - val_accuracy: 0.6113 - val_loss: 1.0648
Epoch 3/30
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 445ms/step - accuracy: 0.6144 - loss: 1.1035 - val_accuracy: 0.6600 - val_loss: 0.9587
Epoch 4/30
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 445ms/step - accuracy: 0.6149 - loss: 1.0666 - val_accuracy: 0.6653 - val_loss: 0.9263
Epoch 5/30
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 438ms/step - accuracy: 0.6351 - loss: 1.0253 - val_accuracy: 0.6767 - val_loss: 0.9015
Epoch 6/30
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 435ms/step - accuracy: 0.6388 - loss: 0.9819 - val_accuracy: 0.6647 - val_loss: 0.9265
Epoch 7/30
[1m94/94[0m [32m━━━