In [1]:
import pandas as pd
import os

# Paths
merged_dataset_path = '/Users/maggie/Desktop/project_3/Plant_ID_and_Diagnosis/Resources/plant_dr_master_dataset/master_dataset'
combined_csv_path = '/Users/maggie/Desktop/project_3/Plant_ID_and_Diagnosis/Resources/plant_dr_master_dataset/master_dataset/combined_labels.csv'

# Load the CSV file
df = pd.read_csv(combined_csv_path)

# Display the first few rows
print(df.head())

# Check the number of samples and classes
num_samples = df.shape[0]
num_classes = df['label'].nunique()
print(f"Total samples: {num_samples}")
print(f"Total classes: {num_classes}")


                          filename             label
0        flower_water_lily_0_1.jpg        water_lily
1  flower_great_masterwort_0_2.jpg  great_masterwort
2        flower_wallflower_0_3.jpg        wallflower
3              flower_rose_0_4.jpg              rose
4        flower_sword_lily_0_5.jpg        sword_lily
Total samples: 11054
Total classes: 124


In [2]:
from sklearn.model_selection import train_test_split

# Split dataframe into train and temp (validation + test)
train_df, temp_df = train_test_split(df, test_size=0.3, stratify=df['label'], random_state=42)

# Split temp_df into validation and test
valid_df, test_df = train_test_split(temp_df, test_size=0.5, stratify=temp_df['label'], random_state=42)

# Display the number of samples in each set
print(f"Training samples: {train_df.shape[0]}")
print(f"Validation samples: {valid_df.shape[0]}")
print(f"Test samples: {test_df.shape[0]}")


Training samples: 7737
Validation samples: 1658
Test samples: 1659


In [3]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Image dimensions
img_height, img_width = 128, 128
batch_size = 32

# Training data generator with data augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    zoom_range=0.15,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Validation data generator (no augmentation)
valid_datagen = ImageDataGenerator(rescale=1./255)

# Test data generator (no augmentation)
test_datagen = ImageDataGenerator(rescale=1./255)

# Training generator
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=merged_dataset_path,
    x_col='filename',
    y_col='label',
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical'
)

# Validation generator
valid_generator = valid_datagen.flow_from_dataframe(
    dataframe=valid_df,
    directory=merged_dataset_path,
    x_col='filename',
    y_col='label',
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

# Test generator
test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    directory=merged_dataset_path,
    x_col='filename',
    y_col='label',
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)


Found 7737 validated image filenames belonging to 124 classes.
Found 1658 validated image filenames belonging to 124 classes.
Found 1659 validated image filenames belonging to 124 classes.


In [4]:
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam

# Load the VGG16 model without the top layer
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(img_height, img_width, 3))

# Freeze the base model layers
for layer in base_model.layers:
    layer.trainable = False

# Add custom layers on top
x1 = base_model.output
x2 = Flatten()(x1)
x3 = Dense(1024, activation='relu')(x2)
x4 = Dropout(0.5)(x3)  # Adding dropout for regularization
x5 = Dense(512, activation='relu')(x4)
x6 = Dropout(0.5)(x5)
predictions = Dense(num_classes, activation='softmax')(x6)

# Define the complete model
model = Model(inputs=base_model.input, outputs=predictions)

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Summary of the model
model.summary()


In [7]:
# Define callbacks
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Early stopping to prevent overfitting
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Save the best model
checkpoint = ModelCheckpoint('best_model.keras', monitor='val_loss', save_best_only=True)

# Calculate steps per epoch
steps_per_epoch = train_generator.samples // batch_size
validation_steps = valid_generator.samples // batch_size

# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=steps_per_epoch,
    validation_data=valid_generator,
    validation_steps=validation_steps,
    epochs=5,
    callbacks=[early_stop, checkpoint]
)


Epoch 1/5
[1m241/241[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m819s[0m 3s/step - accuracy: 0.0423 - loss: 4.6176 - val_accuracy: 0.1955 - val_loss: 3.6251
Epoch 2/5
[1m  1/241[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m4:47[0m 1s/step - accuracy: 0.1562 - loss: 4.0966

2024-08-23 12:03:22.295116: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
  self.gen.throw(typ, value, traceback)


[1m241/241[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.1562 - loss: 4.0966 - val_accuracy: 0.2308 - val_loss: 3.6923
Epoch 3/5


2024-08-23 12:03:23.299768: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m241/241[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18029s[0m 75s/step - accuracy: 0.1201 - loss: 3.8779 - val_accuracy: 0.2733 - val_loss: 3.2009
Epoch 4/5
[1m241/241[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m997s[0m 4ms/step - accuracy: 0.1875 - loss: 3.4351 - val_accuracy: 0.3077 - val_loss: 3.3104
Epoch 5/5


2024-08-23 17:20:28.856553: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m241/241[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47614s[0m 198s/step - accuracy: 0.1807 - loss: 3.4922 - val_accuracy: 0.3401 - val_loss: 2.8248


In [None]:
# Load the best model
from tensorflow.keras.models import load_model

best_model = load_model('best_model.h5')

# Evaluate on test set
test_loss, test_accuracy = best_model.evaluate(test_generator, steps=test_generator.samples // batch_size)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")


In [None]:
# Path to directory with new images
new_images_path = '/path/to/new/images'

# Create a generator for new images
new_datagen = ImageDataGenerator(rescale=1./255)

new_generator = new_datagen.flow_from_directory(
    directory=new_images_path,
    target_size=(img_height, img_width),
    batch_size=1,
    class_mode=None,
    shuffle=False
)

# Make predictions
predictions = best_model.predict(new_generator)

# Get predicted class indices
predicted_class_indices = predictions.argmax(axis=1)

# Map class indices to class labels
labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predicted_labels = [labels[k] for k in predicted_class_indices]

# Get filenames
filenames = new_generator.filenames

# Create a DataFrame with filenames and predicted labels
results = pd.DataFrame({"Filename": filenames,
                        "Predicted Label": predicted_labels})

# Display results
print(results)


In [None]:
import matplotlib.pyplot as plt

# Plot accuracy
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy over epochs')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

# Plot loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss over epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.show()


In [None]:
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report

# Predict classes on test set
Y_pred = best_model.predict(test_generator)
y_pred = np.argmax(Y_pred, axis=1)

# Get true labels
y_true = test_generator.classes

# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred)

# Plot confusion matrix
plt.figure(figsize=(20, 20))
sns.heatmap(cm, annot=False, cmap='Blues', fmt='g')
plt.title('Confusion Matrix')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()

# Classification report
report = classification_report(y_true, y_pred, target_names=list(labels.values()))
print(report)
