In [None]:
import os
import shutil
import random
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from IPython.display import display

from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support, classification_report

from PIL import Image
from PIL.ExifTags import TAGS, GPSTAGS

import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import InceptionV3, ResNet152, VGG19
from tensorflow.keras.utils import to_categorical, Sequence

In [None]:
# List all physical GPUs available
physical_devices = tf.config.list_physical_devices('GPU')
print("Num GPUs Available: ", len(physical_devices))

# Print details about the detected GPUs
for gpu in physical_devices:
    print("GPU details:", gpu)

In [None]:
# Enable device placement logging
tf.debugging.set_log_device_placement(True)

In [None]:
base_dir = '/kaggle/input/multimodal-plant-disease-dataset-by-subham-divakar'
crop_root = os.path.join(base_dir, 'color')
split_root = os.path.join(base_dir, 'split')

In [None]:
def remove_ds_store(directory):
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file == '.DS_Store' or '.DS_Store' in file:
                file_path = os.path.join(root, file)
                print(f"Removing {file_path}")
                os.remove(file_path)

In [None]:
remove_ds_store(base_dir)

In [None]:
def is_image_file(filename):
    return filename.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.bmp', '.gif'))

In [None]:
def split_data(base_dir, val_split=0.4, test_split=0.1):
    train_files = []
    val_files = []
    test_files = []

    classes = [d for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]
    for cls in classes:
        print(f'Processing class: {cls}')
        class_dir = os.path.join(base_dir, cls)

        images = [f for f in os.listdir(class_dir) if is_image_file(os.path.join(class_dir, f))]

        if len(images) == 0:
            print(f"No images found for class {cls}. Skipping...")
            continue

        random.shuffle(images)

        try:
            train, test = train_test_split(images, test_size=test_split)
            train, val = train_test_split(train, test_size=val_split / (1 - test_split))
        except ValueError as e:
            print(f"Not enough images to split for class {cls}: {e}")
            continue

        train_files.extend([(os.path.join(class_dir, img), cls) for img in train])
        val_files.extend([(os.path.join(class_dir, img), cls) for img in val])
        test_files.extend([(os.path.join(class_dir, img), cls) for img in test])

    return train_files, val_files, test_files, classes

In [None]:
train_files, val_files, test_files, classes = split_data(crop_root)

In [None]:
print(f"Train files: {len(train_files)}")
print(f"Validation files: {len(val_files)}")
print(f"Test files: {len(test_files)}")

In [None]:
inception_size = 299
other_size = 224

In [None]:
data_gen_args = dict(rescale=1./255,
                     shear_range=0.2,
                     zoom_range=0.2,
                     horizontal_flip=True)

In [None]:
data_transforms = {
    'InceptionV3': ImageDataGenerator(**data_gen_args),
    'Others': ImageDataGenerator(**data_gen_args),
}

In [None]:
class CustomDataset(Sequence):
    def __init__(self, file_paths, class_to_idx, batch_size=32, image_size=(224, 224), shuffle=True):
        self.file_paths = file_paths
        self.class_to_idx = class_to_idx
        self.batch_size = batch_size
        self.image_size = image_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.file_paths) / self.batch_size))

    def __getitem__(self, index):
        batch_paths = self.file_paths[index * self.batch_size:(index + 1) * self.batch_size]
        images = []
        labels = []
        for img_path, cls in batch_paths:
            image = Image.open(img_path).resize(self.image_size)
            image = np.array(image) / 255.0
            label = self.class_to_idx[cls]
            images.append(image)
            labels.append(label)
        return np.array(images), to_categorical(np.array(labels), num_classes=len(self.class_to_idx))

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.file_paths)

In [None]:
class_to_idx = {cls: idx for idx, cls in enumerate(classes)}

In [None]:
train_dataset_inception = CustomDataset(train_files, class_to_idx, image_size=(inception_size, inception_size))
val_dataset_inception = CustomDataset(val_files, class_to_idx, image_size=(inception_size, inception_size))
test_dataset_inception = CustomDataset(test_files, class_to_idx, image_size=(inception_size, inception_size))

In [None]:
train_dataset_others = CustomDataset(train_files, class_to_idx, image_size=(other_size, other_size))
val_dataset_others = CustomDataset(val_files, class_to_idx, image_size=(other_size, other_size))
test_dataset_others = CustomDataset(test_files, class_to_idx, image_size=(other_size, other_size))

In [None]:
def create_model(base_model, num_classes):
    x = base_model.output
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(1024, activation='relu')(x)
    predictions = layers.Dense(num_classes, activation='softmax')(x)
    return models.Model(inputs=base_model.input, outputs=predictions)

In [None]:
def train_model(model, train_dataset, val_dataset, epochs=40, initial_lr=0.001):
    model.compile(optimizer=optimizers.Adam(lr=initial_lr),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    model.fit(train_dataset, validation_data=val_dataset, epochs=epochs, callbacks=[early_stopping])


In [None]:
def evaluate_model(model, test_dataset):
    results = model.evaluate(test_dataset)
    test_loss, test_accuracy = results[0], results[1] * 100
    return test_loss, test_accuracy

In [None]:
crops = ['Cherry']
results = {}

for crop in crops:
    train_dataset_inception = CustomDataset(train_files, class_to_idx, image_size=(inception_size, inception_size))
    val_dataset_inception = CustomDataset(val_files, class_to_idx, image_size=(inception_size, inception_size))
    test_dataset_inception = CustomDataset(test_files, class_to_idx, image_size=(inception_size, inception_size))

    train_dataset_others = CustomDataset(train_files, class_to_idx, image_size=(other_size, other_size))
    val_dataset_others = CustomDataset(val_files, class_to_idx, image_size=(other_size, other_size))
    test_dataset_others = CustomDataset(test_files, class_to_idx, image_size=(other_size, other_size))

    num_classes = len(class_to_idx)

    pretrained_models = {
        'InceptionV3': InceptionV3(weights='imagenet', include_top=False, input_shape=(inception_size, inception_size, 3)),
        'ResNet152': ResNet152(weights='imagenet', include_top=False, input_shape=(other_size, other_size, 3)),
        'VGG19': VGG19(weights='imagenet', include_top=False, input_shape=(other_size, other_size, 3)),
    }

    crop_results = {}

    for model_name, base_model in pretrained_models.items():
        model = create_model(base_model, num_classes)
        
        if model_name == 'InceptionV3':
            train_dataset = train_dataset_inception
            val_dataset = val_dataset_inception
            test_dataset = test_dataset_inception
        else:
            train_dataset = train_dataset_others
            val_dataset = val_dataset_others
            test_dataset = test_dataset_others

        print(f'--------------- Training model: {model_name}')
        train_model(model, train_dataset, val_dataset)

        test_loss, test_accuracy = evaluate_model(model, test_dataset)

        crop_results[model_name] = {
            'model': model,
            'test_loss': test_loss,
            'test_accuracy': test_accuracy
        }
        print(f'{crop} - {model_name} Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%')
        print(f'\n')

    results[crop] = crop_results

In [None]:
def display_model_metrics_table(results, test_dataset):
    metrics_data = []

    for crop, crop_results in results.items():
        for model_name, model_info in crop_results.items():
            model = model_info['model']
            all_labels = []
            all_predicted = []

            for images, labels in test_dataset:
                outputs = model.predict(images)
                predicted = np.argmax(outputs, axis=1)
                all_labels.extend(np.argmax(labels, axis=1))
                all_predicted.extend(predicted)

            precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_predicted, average='macro')
            
            metrics_data.append({
                'Crop': crop,
                'Model': model_name,
                'Precision': precision,
                'Recall': recall,
                'F1-score': f1
            })

    metrics_df = pd.DataFrame(metrics_data)
    display(metrics_df)

In [None]:
def display_classification_report(model, test_dataset):
    all_labels = []
    all_predicted = []

    for images, labels in test_dataset:
        outputs = model.predict(images)
        predicted = np.argmax(outputs, axis=1)
        all_labels.extend(np.argmax(labels, axis=1))
        all_predicted.extend(predicted)

    report = classification_report(all_labels, all_predicted, target_names=[str(i) for i in range(test_dataset.num_classes)])
    print(report)

In [None]:
def display_classification_results(model, test_dataset, num_images=5):
    images, labels = next(iter(test_dataset))
    outputs = model.predict(images[:num_images])
    predicted = np.argmax(outputs, axis=1)
    
    fig, axes = plt.subplots(1, num_images, figsize=(20, 8))
    fig.suptitle('Classification Results', fontsize=16)
    
    for i in range(num_images):
        ax = axes[i]
        img = images[i]
        img = np.clip(img, 0, 1)
        ax.imshow(img)
        ax.set_title(f'True: {np.argmax(labels[i])}\n Pred: {predicted[i]}')
        ax.axis('off')

    plt.show()

In [None]:
display_model_metrics_table(results, test_dataset_inception)
for crop, crop_results in results.items():
    for model_name in crop_results.keys():
        print(f'Displaying results for {crop} - {model_name}')
        display_classification_results(crop_results[model_name]['model'], test_dataset_inception)
        print(f'Displaying classification report for {crop} - {model_name}')
        display_classification_report(crop_results[model_name]['model'], test_dataset_inception)