In [1]:
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.applications.resnet50 import preprocess_input
import json
from sklearn.metrics import confusion_matrix, classification_report

In [2]:
import tensorflow as tf

# Load the validation dataset
validation_set = tf.keras.utils.image_dataset_from_directory(
    '/kaggle/input/plant-village-dataset-updated',
    labels="inferred",
    label_mode="categorical",
    color_mode="rgb",
    batch_size=32,
    image_size=(224, 224),  # Adjusting the image size to match ResNet50 input
    shuffle=True,
    interpolation="bilinear",
    subset="validation",
    validation_split=0.2,  # Split data into training and validation
    seed=42 # Provide a seed for reproducibility
)



Found 67118 files belonging to 9 classes.
Using 13423 files for validation.


In [3]:
# Load the training dataset
training_set = tf.keras.utils.image_dataset_from_directory(
    '/kaggle/input/plant-village-dataset-updated',
    labels="inferred",
    label_mode="categorical",
    color_mode="rgb",
    batch_size=32,
    image_size=(224, 224),  # Adjusting the image size to match ResNet50 input
    shuffle=True,
    interpolation="bilinear",
    subset="training",
    validation_split=0.2,  # Split data into training and validation
    seed=42  # Provide a seed for reproducibility
)


Found 67118 files belonging to 9 classes.
Using 53695 files for training.


In [4]:
# Preprocess the datasets to match ResNet50 input requirements
training_set = training_set.map(lambda x, y: (preprocess_input(x), y))
validation_set = validation_set.map(lambda x, y: (preprocess_input(x), y))

In [5]:
from tensorflow.keras.applications import VGG16

# Load VGG16 model without the top classification layer, using ImageNet weights
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [6]:
# Freeze the base model layers to prevent them from being trained
base_model.trainable = False

In [7]:
# Build the new model on top of the ResNet50 base
model = tf.keras.Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(1024, activation='relu'),
    Dropout(0.5),
    Dense(9, activation='softmax')  # 38 classes in the dataset
])

In [8]:
from tensorflow.keras.metrics import Precision, Recall, AUC

# Compile the model with additional metrics
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics=[
        'accuracy',           # Standard accuracy
        Precision(),          # Precision metric
        Recall(),             # Recall metric
        AUC(),                # AUC (Area Under Curve)
        'TruePositives',      # True positive count
        'TrueNegatives',      # True negative count
        'FalsePositives',     # False positive count
        'FalseNegatives'      # False negative count
    ]
)


In [9]:
# Model summary
model.summary()

In [10]:
# Training the model
training_history = model.fit(
    x=training_set,
    validation_data=validation_set,
    epochs=10
)


Epoch 1/10
[1m1678/1678[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m190s[0m 106ms/step - FalseNegatives: 5881.1729 - FalsePositives: 3708.8523 - TrueNegatives: 211330.9844 - TruePositives: 20998.8066 - accuracy: 0.7359 - auc: 0.9374 - loss: 0.9599 - precision: 0.7802 - recall: 0.7032 - val_FalseNegatives: 374.0000 - val_FalsePositives: 255.0000 - val_TrueNegatives: 107129.0000 - val_TruePositives: 13049.0000 - val_accuracy: 0.9760 - val_auc: 0.9994 - val_loss: 0.0816 - val_precision: 0.9808 - val_recall: 0.9721
Epoch 2/10
[1m1678/1678[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m151s[0m 90ms/step - FalseNegatives: 1095.6444 - FalsePositives: 778.3865 - TrueNegatives: 214261.4531 - TruePositives: 25784.3359 - accuracy: 0.9636 - auc: 0.9986 - loss: 0.1156 - precision: 0.9694 - recall: 0.9573 - val_FalseNegatives: 236.0000 - val_FalsePositives: 175.0000 - val_TrueNegatives: 107209.0000 - val_TruePositives: 13187.0000 - val_accuracy: 0.9846 - val_auc: 0.9997 - val_loss: 0.0531 - v

In [11]:
# Evaluate the model on the training set

results = model.evaluate(training_set)

# Extracting loss and accuracy along with additional metrics
train_loss = results[0]
train_acc = results[1]
precision = results[2]
recall = results[3]
auc = results[4]
true_positives = results[5]
true_negatives = results[6]
false_positives = results[7]
false_negatives = results[8]

# Printing the results
print(f'Train Loss: {train_loss}, Train Accuracy: {train_acc}')
print(f'Precision: {precision}, Recall: {recall}, AUC: {auc}')
print(f'True Positives: {true_positives}, True Negatives: {true_negatives}')
print(f'False Positives: {false_positives}, False Negatives: {false_negatives}')

[1m1678/1678[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 73ms/step - FalseNegatives: 27.0715 - FalsePositives: 20.4574 - TrueNegatives: 215019.3750 - TruePositives: 26852.9082 - accuracy: 0.9991 - auc: 1.0000 - loss: 0.0046 - precision: 0.9992 - recall: 0.9989
Train Loss: 0.004127086140215397, Train Accuracy: 0.9993109107017517
Precision: 0.9993667006492615, Recall: 0.9991991519927979, AUC: 0.9999995827674866
True Positives: 53652.0, True Negatives: 429526.0
False Positives: 34.0, False Negatives: 43.0


In [12]:
# Evaluate the model on the validation set


results = model.evaluate(validation_set)

# Extracting loss and accuracy along with additional metrics
train_loss = results[0]
train_acc = results[1]
precision = results[2]
recall = results[3]
auc = results[4]
true_positives = results[5]
true_negatives = results[6]
false_positives = results[7]
false_negatives = results[8]

# Printing the results
print(f'Train Loss: {train_loss}, Train Accuracy: {train_acc}')
print(f'Precision: {precision}, Recall: {recall}, AUC: {auc}')
print(f'True Positives: {true_positives}, True Negatives: {true_negatives}')
print(f'False Positives: {false_positives}, False Negatives: {false_negatives}')

[1m420/420[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 73ms/step - FalseNegatives: 50.0451 - FalsePositives: 44.5772 - TrueNegatives: 53970.1680 - TruePositives: 6701.7983 - accuracy: 0.9933 - auc: 0.9996 - loss: 0.0215 - precision: 0.9935 - recall: 0.9928
Train Loss: 0.02020644210278988, Train Accuracy: 0.9934440851211548
Precision: 0.9938105940818787, Recall: 0.9928480982780457, AUC: 0.9996892809867859
True Positives: 13327.0, True Negatives: 107301.0
False Positives: 83.0, False Negatives: 96.0


In [13]:
# Load the test dataset from the 'Test' subdirectories of the plant categories
test_set = tf.keras.utils.image_dataset_from_directory(
    '/kaggle/input/plant-village-dataset-updated',
    labels="inferred",
    label_mode="categorical",
    color_mode="rgb",
    batch_size=32,
    image_size=(224, 224),  # Adjusting the image size to match ResNet50 input
    shuffle=False,  # Typically, you don't shuffle the test set
    interpolation="bilinear",
)

# Evaluate the model on the test set
test_results = model.evaluate(test_set)

# Extracting test metrics
test_loss = test_results[0]
test_acc = test_results[1]
test_precision = test_results[2]
test_recall = test_results[3]
test_auc = test_results[4]
test_true_positives = test_results[5]
test_true_negatives = test_results[6]
test_false_positives = test_results[7]
test_false_negatives = test_results[8]

# Printing the results for the test set
print(f'Test Loss: {test_loss}, Test Accuracy: {test_acc}')
print(f'Test Precision: {test_precision}, Test Recall: {test_recall}, Test AUC: {test_auc}')
print(f'Test True Positives: {test_true_positives}, Test True Negatives: {test_true_negatives}')
print(f'Test False Positives: {test_false_positives}, Test False Negatives: {test_false_negatives}')


Found 67118 files belonging to 9 classes.
[1m2098/2098[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 69ms/step - FalseNegatives: 1696.8695 - FalsePositives: 1614.8685 - TrueNegatives: 267184.8750 - TruePositives: 31903.0977 - accuracy: 0.9546 - auc: 0.9944 - loss: 0.1722 - precision: 0.9558 - recall: 0.9538
Test Loss: 0.22326570749282837, Test Accuracy: 0.946243941783905
Test Precision: 0.9479731917381287, Test Recall: 0.9452754855155945, Test AUC: 0.992085874080658
Test True Positives: 63445.0, Test True Negatives: 533462.0
Test False Positives: 3482.0, Test False Negatives: 3673.0


In [14]:
# Save the model
model.save('plant-village_disease_vgg16.keras')

In [15]:
# Save training history to JSON
with open('hist_plant-village_disease_vgg16.json', 'w') as f:
    json.dump(training_history.history, f)