In [3]:
import os
import shutil
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB4
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau


In [4]:
def split_dataset(root_dir, output_dir, test_size=0.15, val_size=0.15, seed=42):
    """
    Splits dataset into train/val/test sets and creates directory structure
    """
    # Create output directories
    os.makedirs(os.path.join(output_dir, 'train'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'val'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'test'), exist_ok=True)

    # Process each class
    for class_name in ['Healthy', 'Unhealthy']:
        # Create class directories in train/val/test
        os.makedirs(os.path.join(output_dir, 'train', class_name), exist_ok=True)
        os.makedirs(os.path.join(output_dir, 'val', class_name), exist_ok=True)
        os.makedirs(os.path.join(output_dir, 'test', class_name), exist_ok=True)

        # Get list of images
        class_dir = os.path.join(root_dir, class_name)
        images = [f for f in os.listdir(class_dir) if f.endswith(('.jpg', '.jpeg', '.png'))]
        
        # Split into temp (85%) and test (15%)
        temp_images, test_images = train_test_split(images, 
                                                  test_size=test_size, 
                                                  random_state=seed)
        
        # Split temp into train (82.35%) and val (17.65%) to get final 70-15-15 split
        train_images, val_images = train_test_split(temp_images, 
                                                   test_size=val_size/(1-test_size), 
                                                   random_state=seed)
        
        # Function to copy images
        def copy_files(file_list, split_name):
            for f in file_list:
                src = os.path.join(class_dir, f)
                dst = os.path.join(output_dir, split_name, class_name, f)
                shutil.copyfile(src, dst)
        
        # Copy files to respective directories
        copy_files(train_images, 'train')
        copy_files(val_images, 'val')
        copy_files(test_images, 'test')

# Usage - modify these paths according to your setup
input_dir = './hibiscus_ext'  # Should contain 'diseased' and 'not_diseased' folders
output_dir = 'images_hibiscus_ext'  # New directory that will be created

split_dataset(input_dir, output_dir)



In [5]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG19
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, confusion_matrix

# Define paths
base_dir = './images_hibiscus_ext'
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'val')
test_dir = os.path.join(base_dir, 'test')

# Image preprocessing and augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255  # Normalize pixel values to [0, 1]
)

validation_datagen = ImageDataGenerator(rescale=1./255)  # Only rescale for validation
test_datagen = ImageDataGenerator(rescale=1./255)  # Only rescale for testing

# Create data generators
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),  # Resize images to 224x224 (VGG19 input size)
    batch_size=32,
    class_mode='binary'  # Binary classification (healthy/unhealthy)
)

validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    shuffle=False  # Do not shuffle for evaluation
)

# Load the VGG19 model (pre-trained on ImageNet)
base_model = VGG19(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the base model (do not train the pre-trained layers)
for layer in base_model.layers:
    layer.trainable = False

# Add custom layers on top of the base model
x = base_model.output
x = Flatten()(x)  # Flatten the output of the base model
x = Dense(512, activation='relu')(x)  # Add a fully connected layer
x = Dropout(0.5)(x)  # Add dropout for regularization
predictions = Dense(1, activation='sigmoid')(x)  # Output layer for binary classification

# Create the final model
model = Model(inputs=base_model.input, outputs=predictions)

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // validation_generator.batch_size,
    epochs=8  # Number of epochs
)

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(test_generator, steps=test_generator.samples // test_generator.batch_size)
print(f'Test Accuracy: {test_acc:.4f}')

# Predictions on the test set
y_pred = model.predict(test_generator)
y_pred = np.round(y_pred).astype(int)  # Convert probabilities to binary predictions

# Classification report and confusion matrix
print('Classification Report:')
print(classification_report(test_generator.classes, y_pred))

print('Confusion Matrix:')
print(confusion_matrix(test_generator.classes, y_pred))

Found 2238 images belonging to 2 classes.
Found 482 images belonging to 2 classes.
Found 480 images belonging to 2 classes.


  self._warn_if_super_not_called()


Epoch 1/8
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m840s[0m 12s/step - accuracy: 0.7169 - loss: 0.5538 - val_accuracy: 0.9208 - val_loss: 0.1854
Epoch 2/8
[1m 1/69[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m15:17[0m 13s/step - accuracy: 0.9062 - loss: 0.2100



[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m211s[0m 3s/step - accuracy: 0.9062 - loss: 0.2100 - val_accuracy: 0.9479 - val_loss: 0.1601
Epoch 3/8
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1037s[0m 15s/step - accuracy: 0.9241 - loss: 0.2047 - val_accuracy: 0.9833 - val_loss: 0.0846
Epoch 4/8
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m195s[0m 3s/step - accuracy: 0.9688 - loss: 0.0958 - val_accuracy: 0.9812 - val_loss: 0.0866
Epoch 5/8
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1030s[0m 15s/step - accuracy: 0.9630 - loss: 0.1180 - val_accuracy: 0.9917 - val_loss: 0.0651
Epoch 6/8
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m195s[0m 3s/step - accuracy: 1.0000 - loss: 0.0507 - val_accuracy: 0.9937 - val_loss: 0.0603
Epoch 7/8
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1030s[0m 15s/step - accuracy: 0.9804 - loss: 0.0834 - val_accuracy: 0.9854 - val_loss: 0.0546
Epoch 8/8
[1m69/69[0m [32m━━━━━━━━━━━━━━━