In [1]:
import os
import shutil
import random

In [2]:
dataset_path = "./PlantBalanced"

In [5]:
for category_folder in os.listdir(dataset_path):
    print(category_folder)

Bacterial_Spot
Curl_Virus
Early_Blight
Healthy
Late_Blight
Leaf_Mold
Mosaic_Virus
Septoria_Leaf_Spot
Spider_Mites
Target_Spot


In [8]:
desired_num_images = 1000

In [10]:
for category_folder in os.listdir(dataset_path):
    category_path = os.path.join(dataset_path, category_folder)

    # Check if it's a directory
    if os.path.isdir(category_path):
        # Get a list of all images in the category folder
        all_images = os.listdir(category_path)

        # If there are more than desired_num_images, randomly select only desired_num_images
        if len(all_images) > desired_num_images:
            selected_images = random.sample(all_images, desired_num_images)
            for image in all_images:
                if image not in selected_images:
                    image_path = os.path.join(category_path, image)
                    os.remove(image_path)
                    


In [16]:
import tensorflow as tf
from tensorflow.keras import models, layers
import matplotlib.pyplot as plt
from IPython.display import HTML
import splitfolders


In [17]:
splitfolders.ratio("./PlantBalanced", output="output",
    seed=1337, ratio=(.8, .1, .1), group_prefix=None, move=False)

Copying files: 10000 files [01:13, 135.51 files/s]


In [18]:
IMAGE_SIZE = 256
CHANNELS = 3

In [20]:
output_directory = "./AugmentedImages"

In [21]:
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

In [22]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [37]:
train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=10,
        horizontal_flip=True
)
train_generator = train_datagen.flow_from_directory(
        './output/train',
        target_size=(IMAGE_SIZE,IMAGE_SIZE),
        batch_size=32,
        class_mode="sparse",
            save_to_dir="./AugmentedImages"
)

Found 7999 images belonging to 10 classes.


In [25]:
train_generator.class_indices

{'Bacterial_Spot': 0,
 'Curl_Virus': 1,
 'Early_Blight': 2,
 'Healthy': 3,
 'Late_Blight': 4,
 'Leaf_Mold': 5,
 'Mosaic_Virus': 6,
 'Septoria_Leaf_Spot': 7,
 'Spider_Mites': 8,
 'Target_Spot': 9}

In [26]:
class_names = list(train_generator.class_indices.keys())
class_names

['Bacterial_Spot',
 'Curl_Virus',
 'Early_Blight',
 'Healthy',
 'Late_Blight',
 'Leaf_Mold',
 'Mosaic_Virus',
 'Septoria_Leaf_Spot',
 'Spider_Mites',
 'Target_Spot']

In [27]:
count=0
for image_batch, label_batch in train_generator:
    print(label_batch)
    print(image_batch[0])
    break

[2. 8. 4. 0. 2. 9. 0. 5. 1. 6. 7. 6. 8. 1. 4. 7. 1. 9. 4. 8. 4. 7. 4. 3.
 6. 5. 2. 7. 5. 3. 7. 0.]
[[[0.6308735  0.5642068  0.5877362 ]
  [0.6319302  0.5652635  0.5887929 ]
  [0.63298684 0.5663202  0.5898496 ]
  ...
  [0.83003414 0.80258316 0.83003414]
  [0.69489    0.66743904 0.69489   ]
  [0.7496614  0.72221035 0.7496614 ]]

 [[0.6207566  0.5540899  0.5776193 ]
  [0.6211089  0.55444217 0.57797164]
  [0.6214611  0.55479443 0.57832384]
  ...
  [0.82246125 0.79501027 0.82246125]
  [0.69753176 0.6700808  0.69753176]
  [0.75107026 0.7236193  0.75107026]]

 [[0.61626464 0.549598   0.5731274 ]
  [0.6164408  0.5497741  0.5733035 ]
  [0.61661685 0.5499502  0.5734796 ]
  ...
  [0.8148883  0.7874373  0.8148883 ]
  [0.70017344 0.67272246 0.70017344]
  [0.7524792  0.7250282  0.7524792 ]]

 ...

 [[0.5008043  0.42629448 0.4498239 ]
  [0.47565162 0.4011418  0.4246712 ]
  [0.58311707 0.50860727 0.5321367 ]
  ...
  [0.4963775  0.4453971  0.48069122]
  [0.4974342  0.44645378 0.48174793]
  [0.49849087 

In [28]:
validation_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=10,
        horizontal_flip=True)
validation_generator = validation_datagen.flow_from_directory(
        './output/val',
        target_size=(IMAGE_SIZE,IMAGE_SIZE),
        batch_size=32,
        class_mode="sparse"
)

Found 1000 images belonging to 10 classes.


In [29]:
test_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=10,
        horizontal_flip=True)

test_generator = test_datagen.flow_from_directory(
        'output/test',
        target_size=(IMAGE_SIZE,IMAGE_SIZE),
        batch_size=32,
        class_mode="sparse"
)

Found 1000 images belonging to 10 classes.


In [30]:
for image_batch, label_batch in test_generator:
    print(image_batch[0])
    break

[[[0.7824025  0.723579   0.696128  ]
  [0.78113633 0.7223128  0.6948618 ]
  [0.7803922  0.72156864 0.69411767]
  ...
  [0.7737638  0.70317554 0.6953324 ]
  [0.770535   0.6999467  0.69210356]
  [0.7744603  0.7038721  0.69602895]]

 [[0.7786981  0.7198745  0.6924235 ]
  [0.78502905 0.7262055  0.69875455]
  [0.7876104  0.7287868  0.70133585]
  ...
  [0.77566314 0.70507485 0.6972317 ]
  [0.76926875 0.69868046 0.6908373 ]
  [0.7757265  0.70513827 0.6972951 ]]

 [[0.7517265  0.692903   0.665452  ]
  [0.74982715 0.6910036  0.66355264]
  [0.7526589  0.6938354  0.6663844 ]
  ...
  [0.77574277 0.70515454 0.6973114 ]
  [0.7692524  0.6986642  0.69082105]
  [0.77647066 0.7058824  0.69803923]]

 ...

 [[0.6353061  0.5607963  0.53334534]
  [0.5962191  0.5217093  0.49425834]
  [0.5387922  0.4642824  0.4368314 ]
  ...
  [0.5645427  0.48611137 0.45081723]
  [0.5744333  0.49600196 0.46070784]
  [0.57886505 0.5004337  0.46513957]]

 [[0.6383004  0.5637906  0.53633964]
  [0.58734757 0.51283777 0.48538676]


In [33]:
input_shape = (IMAGE_SIZE, IMAGE_SIZE, CHANNELS)
n_classes = 10

model = models.Sequential([
    layers.InputLayer(input_shape=input_shape),
    layers.Conv2D(32, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Conv2D(64, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Conv2D(64, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Conv2D(64, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Conv2D(64, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Conv2D(64, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(n_classes, activation='softmax'),
])

In [34]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_6 (Conv2D)           (None, 254, 254, 32)      896       
                                                                 
 max_pooling2d_6 (MaxPoolin  (None, 127, 127, 32)      0         
 g2D)                                                            
                                                                 
 conv2d_7 (Conv2D)           (None, 125, 125, 64)      18496     
                                                                 
 max_pooling2d_7 (MaxPoolin  (None, 62, 62, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_8 (Conv2D)           (None, 60, 60, 64)        36928     
                                                                 
 max_pooling2d_8 (MaxPoolin  (None, 30, 30, 64)       

In [35]:
model.compile(
    optimizer=tf.optimizers.Adam(), 
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    metrics=['accuracy']
)