In [89]:
# creating a pipline for my dataset  using TensorFlow's image_dataset_from_directory function
dataset = tf.keras.preprocessing.image_dataset_from_directory(
    "PlantVillage",
    shuffle =True, 
    image_size = (IMAGE_SIZE,IMAGE_SIZE),
    batch_size =BATCH_SIZE
)

Found 2152 files belonging to 3 classes.


In [90]:
## btw the labels for the images are based on the folder names , that's how images are categorized .. so basically the folder names are the class names
class_names = dataset.class_names
class_names

['Potato___Early_blight', 'Potato___Late_blight', 'Potato___healthy']

In [91]:
# Calculate the number of training samples (80% of the dataset)
train_size = 0.8
len(dataset) * train_size  # Number of samples for training

54.400000000000006

In [92]:
# Skip the first 54 batches (already used for training) and use the remaining data for testing and validation
test_ds = dataset.skip(54)
len(test_ds)

14

In [93]:
# Calculate the number of samples for the validation set (10% of the total dataset)
val_size = 0.1
len(dataset) * val_size  # Number of samples for validation


6.800000000000001

In [94]:
## for our test data set 
test_ds = test_ds.skip(6)
len(test_ds)

8

In [95]:
def get_dataset_partitions_tf(ds, train_split =0.8, val_split =0.1, test_split =0.1, shuffle =True,  shuffle_size =10000):
    # Get the total size of the dataset
    ds_size =len(ds)
     # Shuffle the dataset if the 'shuffle' parameter is set to True
    if shuffle:
        ds =ds.shuffle(shuffle_size, seed =12) # Randomize the dataset with a buffer size
        
    # Calculate the number of samples for each split (train, validation, and test)
    train_size = int(train_split *ds_size) # Number of samples for training
    val_size = int(val_split * ds_size) # Number of samples for validation

    # Create the training dataset by taking the first 'train_size' samples
    train_ds =ds.take(train_size)
    # Create the validation dataset by skipping 'train_size' samples and taking 'val_size' samples
    val_ds = ds.skip(train_size).take(val_size)
     # Create the test dataset by skipping both 'train_size' and 'val_size' samples
    test_ds = ds.skip(train_size).skip(val_size)
    # Return the partitioned datasets
    return train_ds, val_ds,test_ds
    

In [96]:
# Call the 'get_dataset_partitions_tf' function to split the dataset into training, validation, and test sets
train_ds, val_ds, test_ds = get_dataset_partitions_tf(dataset)


In [97]:
len(train_ds)

54

In [98]:
len(val_ds)

6

In [99]:
len(test_ds)

8

In [100]:
# Cache the dataset to improve performance by storing data in memory, then shuffle it with a buffer of 10,000 items, 
# and prefetch data to allow parallel loading while training for faster data processing.
train_ds = train_ds.cache().shuffle(10000).prefetch(buffer_size=tf.data.AUTOTUNE)

# Same process applied to the validation dataset: caching, shuffling, and prefetching for efficient data pipeline.
val_ds = train_ds.cache().shuffle(10000).prefetch(buffer_size=tf.data.AUTOTUNE)

# Same process applied to the test dataset to ensure efficient loading during evaluation.
test_ds = train_ds.cache().shuffle(10000).prefetch(buffer_size=tf.data.AUTOTUNE)


In [101]:
# Create a sequential preprocessing pipeline
resize_and_rescale = tf.keras.Sequential([
   
   # Resize all images to a fixed size (IMAGE_SIZE x IMAGE_SIZE)
   tf.keras.layers.Resizing(IMAGE_SIZE, IMAGE_SIZE),
   
   # Rescale pixel values from [0, 255] to [0, 1] by dividing by 255
   tf.keras.layers.Rescaling(1.0/255)
])



In [102]:
#Data argumattion 

data_augmentation = tf.keras.Sequential([ 
   # Randomly flip the images horizontally
   tf.keras.layers.RandomFlip('horizontal_and_vertical'),
   # Randomly rotate images by up to 40 degrees
   tf.keras.layers.RandomRotation(0.2),
   # Randomly zoom into images by a factor of 0.2
   tf.keras.layers.RandomZoom(0.2),
   # Randomly change the brightness of the images
   tf.keras.layers.RandomBrightness(0.2)
])


#BUILD CNN AND TRAIN THIS NETWORK ON THE TRAIN DATASET AFTER WE HAVE DONE LOADING AND SOME PRE-PROCESSING UP

In [106]:

input_shape = (BATCH_SIZE,IMAGE_SIZE,IMAGE_SIZE,CHANNELS)
n_classes =3

model =models.Sequential([

    resize_and_rescale , # our first layer will be resizing and rescalling the image 
    data_augmentation ,# then data augmentation
    layers.Conv2D(32,(3,3),activation ='relu',input_shape= input_shape), # third layer is the conv layer
    layers.MaxPooling2D((2,2)), ## fouth layer is max pooling to reduce the size of the image but at the same time keeping the most import features available
    layers.Conv2D(64, kernel_size=(3,3),activation ='relu'), 
    layers.MaxPooling2D((2,2)),
    layers.Conv2D(64, kernel_size=(3,3),activation ='relu'), 
    layers.MaxPooling2D((2,2)),
    layers.Conv2D(64, kernel_size=(3,3),activation ='relu'), 
    layers.MaxPooling2D((2,2)),
    layers.Conv2D(64, kernel_size=(3,3),activation ='relu'), 
    layers.MaxPooling2D((2,2)),
    layers.Flatten(), # then a layer for flattening the out so that it's an  array  of neurons  and then a hidden layer next
    layers.Dense(64, activation ='relu') ,# then dense layer
    layers.Dense(n_classes, activation ='softmax'), # then my last layer will have three neurons
    
])

model.build(input_shape =input_shape)