In [4]:
import math
import tensorflow as tf
import pandas as pd
import shutil
import random
import os
import json

In [5]:
# Directory containing dataset
data_directory = os.path.join(
    "archive", "plantvillage dataset", "color"
)

In [6]:
# Make folders to hold dataset
split_dataset_dir = "split_dataset"
os.mkdir(split_dataset_dir)
os.mkdir(os.path.join(split_dataset_dir, "train"))
os.mkdir(os.path.join(split_dataset_dir, "validation"))
os.mkdir(os.path.join(split_dataset_dir, "test"))

In [7]:
def split_train_validation_test(
    from_directory: str,
    destination_directory: str,
    test_percent: int = 1,
    validation_percent: int = 20,
) -> None:
    """
    Copy image files from folder into the destination folder split into three
    folders: train, validation, test.
    :param from_directory: Directory containing images.
    :param destination_directory: Destination directory.
    :param test_percent: Percent of total images by type to go into this folder.
    :param validation_percent: Percent of total images by type to go into this folder.
    :return: None.
    """
    dataset_dirs = ["test", "validation", "train"]
    split_percent = (test_percent, validation_percent, 100)
    images_copied = set()
    folders = os.listdir(from_directory)
    for index, split_dir in enumerate(dataset_dirs):
        print("\n")
        for folder in folders:
            folder_path = os.path.join(from_directory, folder)

            # Get list of images and filter out already copied images
            images = os.listdir(folder_path)
            images = list(set(images) - images_copied)

            # Number of images to copy
            num_of_images = math.ceil(len(images) * (split_percent[index] / 100))
            os.mkdir(os.path.join(destination_directory, split_dir, folder))
            print("Copying .. ", end="")
            for _ in range(num_of_images):
                # Copy a random image from the folder
                image = images.pop(random.randint(0, len(images) - 1))
                images_copied.add(image)
                shutil.copy(
                    os.path.join(folder_path, image),
                    os.path.join(destination_directory, split_dir, folder),
                )

In [8]:
# Load images into folder split into train, validation, and test folders.
split_train_validation_test(data_directory, split_dataset_dir)



Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. 

Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. 

Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copying .. Copy

In [43]:
# Used as the base model (Transfer Learning)
base_model = tf.keras.applications.MobileNet(
    input_shape=(224, 224, 3), weights="imagenet", include_top=False, 
)

In [44]:
# Do not train this model.
base_model.trainable = False

In [54]:
# Training data
train_ds = tf.keras.utils.image_dataset_from_directory(
    os.path.join(split_dataset_dir, "train"),
    labels="inferred",
    label_mode="int",
    seed=123,
    color_mode="rgb",
    batch_size=64,
    image_size=(256, 256),
    shuffle=True,
)

Found 42939 files belonging to 38 classes.


In [55]:
# Validation data
validation_ds = tf.keras.utils.image_dataset_from_directory(
    os.path.join(split_dataset_dir, "validation"),
    labels="inferred",
    label_mode="int",
    seed=123,
    color_mode="rgb",
    batch_size=64,
    image_size=(256, 256),
    shuffle=False,
)

Found 10815 files belonging to 38 classes.


In [56]:
# Category names
class_names = train_ds.class_names
train_ds.class_names

['Apple___Apple_scab',
 'Apple___Black_rot',
 'Apple___Cedar_apple_rust',
 'Apple___healthy',
 'Blueberry___healthy',
 'Cherry_(including_sour)___Powdery_mildew',
 'Cherry_(including_sour)___healthy',
 'Corn_(maize)___Cercospora_leaf_spot Gray_leaf_spot',
 'Corn_(maize)___Common_rust_',
 'Corn_(maize)___Northern_Leaf_Blight',
 'Corn_(maize)___healthy',
 'Grape___Black_rot',
 'Grape___Esca_(Black_Measles)',
 'Grape___Leaf_blight_(Isariopsis_Leaf_Spot)',
 'Grape___healthy',
 'Orange___Haunglongbing_(Citrus_greening)',
 'Peach___Bacterial_spot',
 'Peach___healthy',
 'Pepper,_bell___Bacterial_spot',
 'Pepper,_bell___healthy',
 'Potato___Early_blight',
 'Potato___Late_blight',
 'Potato___healthy',
 'Raspberry___healthy',
 'Soybean___healthy',
 'Squash___Powdery_mildew',
 'Strawberry___Leaf_scorch',
 'Strawberry___healthy',
 'Tomato___Bacterial_spot',
 'Tomato___Early_blight',
 'Tomato___Late_blight',
 'Tomato___Leaf_Mold',
 'Tomato___Septoria_leaf_spot',
 'Tomato___Spider_mites Two-spotted_

In [57]:
# Augmentation of data
augmentation = tf.keras.Sequential(
    [
        tf.keras.layers.RandomFlip("horizontal_and_vertical"),
        tf.keras.layers.RandomRotation(factor=0.2),
        tf.keras.layers.RandomZoom(height_factor=(-0.1, 0.1), width_factor=(-0.1, 0.1)),
        tf.keras.layers.RandomBrightness((-0.1, 0.1)),
    ]
)

In [58]:
# Rescale the image
image_size_and_scale = tf.keras.Sequential(
    [tf.keras.layers.Resizing(224, 224), tf.keras.layers.Rescaling(1.0 / 255)]
)


In [59]:
model = tf.keras.Sequential(
    [
        augmentation,
        image_size_and_scale,
        base_model,
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(1028, activation="relu"),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(len(class_names), activation="softmax"),
    ]
)

In [60]:
model.compile(
    optimizer="adam",
    loss=tf.keras.losses.sparse_categorical_crossentropy,
    metrics=["accuracy"],
)

In [61]:
stop_early = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=3)

In [62]:
results = model.fit(
    train_ds, validation_data=validation_ds, epochs=20, callbacks=[stop_early]
)

Epoch 1/20


2023-06-08 16:45:34.966183: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [42939]
	 [[{{node Placeholder/_4}}]]
2023-06-08 16:45:34.966619: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [42939]
	 [[{{node Placeholder/_4}}]]
2023-06-08 16:45:37.127562: I tensorflow/compiler/xla/service/service.cc:169] XLA service 0x353cb810 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-06-08 16:45:37.127604: I tensorflow/compiler/xla/service/service.cc:177]   StreamExecutor device (0): NVIDIA GeForce G



2023-06-08 16:46:30.347317: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [10815]
	 [[{{node Placeholder/_4}}]]
2023-06-08 16:46:30.347490: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [10815]
	 [[{{node Placeholder/_4}}]]


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20


In [65]:
results = pd.DataFrame(results.history)

In [66]:
# Save to file for python server.
with open("history.json", "w") as history_file:
    results.to_json(history_file)

In [67]:
# Save the model
model.save("image_model", overwrite=True)

2023-06-08 17:12:58.082243: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'random_flip_3_input' with dtype float and shape [?,256,256,3]
	 [[{{node random_flip_3_input}}]]
2023-06-08 17:12:58.182015: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,256,256,3]
	 [[{{node inputs}}]]
2023-06-08 17:12:58.195828: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'random_flip_3_input' with dtype float and s

INFO:tensorflow:Unsupported signature for serialization: ((TensorSpec(shape=(1024, 1028), dtype=tf.float32, name='gradient'), <tensorflow.python.framework.func_graph.UnknownArgument object at 0x7ff8c8f66cd0>, 140705954240096), {}).
INFO:tensorflow:Unsupported signature for serialization: ((TensorSpec(shape=(1028,), dtype=tf.float32, name='gradient'), <tensorflow.python.framework.func_graph.UnknownArgument object at 0x7ff8c8f1b850>, 140705954240256), {}).
INFO:tensorflow:Unsupported signature for serialization: ((TensorSpec(shape=(1028, 38), dtype=tf.float32, name='gradient'), <tensorflow.python.framework.func_graph.UnknownArgument object at 0x7ff8a95a4c10>, 140705954245216), {}).
INFO:tensorflow:Unsupported signature for serialization: ((TensorSpec(shape=(38,), dtype=tf.float32, name='gradient'), <tensorflow.python.framework.func_graph.UnknownArgument object at 0x7ff8a95b5590>, 140705954244976), {}).


2023-06-08 17:13:01.604880: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,256,256,3]
	 [[{{node inputs}}]]


INFO:tensorflow:Unsupported signature for serialization: ((TensorSpec(shape=(1024, 1028), dtype=tf.float32, name='gradient'), <tensorflow.python.framework.func_graph.UnknownArgument object at 0x7ff8c8f66cd0>, 140705954240096), {}).
INFO:tensorflow:Unsupported signature for serialization: ((TensorSpec(shape=(1028,), dtype=tf.float32, name='gradient'), <tensorflow.python.framework.func_graph.UnknownArgument object at 0x7ff8c8f1b850>, 140705954240256), {}).
INFO:tensorflow:Unsupported signature for serialization: ((TensorSpec(shape=(1028, 38), dtype=tf.float32, name='gradient'), <tensorflow.python.framework.func_graph.UnknownArgument object at 0x7ff8a95a4c10>, 140705954245216), {}).
INFO:tensorflow:Unsupported signature for serialization: ((TensorSpec(shape=(38,), dtype=tf.float32, name='gradient'), <tensorflow.python.framework.func_graph.UnknownArgument object at 0x7ff8a95b5590>, 140705954244976), {}).


2023-06-08 17:13:02.169794: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,1028]
	 [[{{node inputs}}]]
2023-06-08 17:13:02.198005: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,256,256,3]
	 [[{{node inputs}}]]
2023-06-08 17:13:02.244902: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,256,256,3]
	 [[{{node inputs}}]]
202

INFO:tensorflow:Assets written to: image_model/assets


INFO:tensorflow:Assets written to: image_model/assets


In [68]:
# Test dataset
test_ds = tf.keras.utils.image_dataset_from_directory(
    os.path.join(split_dataset_dir, "test"),
    labels="inferred",
    label_mode="int",
    seed=123,
    color_mode="rgb",
    batch_size=64,
    image_size=(256, 256),
    shuffle=False,
)

Found 551 files belonging to 38 classes.


In [69]:
test_results = model.evaluate(test_ds)

2023-06-08 17:14:32.785403: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [551]
	 [[{{node Placeholder/_4}}]]
2023-06-08 17:14:32.785696: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [551]
	 [[{{node Placeholder/_4}}]]




In [70]:
# Save to file for python server.
with open("test_results.json", "w") as test_results_file:
    json.dump({"results": {"loss": test_results[0], "accuracy": test_results[1]}}, test_results_file)

In [80]:
"Test accuracy: {:.4}%".format(test_results[1] * 100)

'Test accuracy: 96.19%'

In [7]:
# Genterating image stats.

# This will be stored for the Python server to use. The "map" field will be used by
# the server to find the category of a prediction made by this model.
image_info = {"categories": [], "map": []}

# Returns items that end with any of the filter strings.
def filter_list(items, *filters):
    return_list = []
    for item in items:
        if (any([item.casefold().endswith(string) for string in filters])):
            return_list.append(item)
    return return_list
   
total = 0
map_plant_index = -1
map_plant_disease_index = 0   

plant_obj = {}

for name in class_names:
    # Split the category names into plant and disease.
    plant, disease = name.split('___')
    
    # Collect the total number of images.
    # This is filtered in case the directory contains any metadata files. 
    total += len(filter_list(os.listdir(os.path.join(data_directory, name)), ".jpg", ".jpeg"))
    
    plant = plant.replace("_", " ").strip(" ")
    
    # A space in the name means that the follow text is another name for the disease.
    # This statement will wrap that text in parenthesis.
    if " " in disease:
        disease = disease.replace(" ", " (") + ")"
    
    disease = disease.replace("_", " ").strip(" ")
    
    
    # Append the disease of the plant it belongs to with the number of images in that disease.
    if plant in [plant_info["name"] for plant_info in image_info["categories"]]:
        plant_obj["diseases"].append({"name": disease, "number": len(filter_list(os.listdir(os.path.join(data_directory, name)), ".jpg", ".jpeg"))})
        image_info["map"].append([map_plant_index, map_plant_disease_index]) 
        map_plant_disease_index += 1
    else:
        plant_obj = {"name": plant, "diseases": []}
        map_plant_disease_index = 0
        map_plant_index += 1
        image_info["categories"].append(plant_obj)
        plant_obj["diseases"].append({"name": disease, "number": len(filter_list(os.listdir(os.path.join(data_directory, name)), ".jpg", ".jpeg"))})
        image_info["map"].append([map_plant_index, map_plant_disease_index]) 
        map_plant_disease_index = 1


image_info["total"] = total


with open("image_info.json", "w") as image_info_file:
    json.dump(image_info, image_info_file)