Import the necessary libraries

In [1]:
import os
import glob
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn.metrics as metrics

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from google.colab import drive 

Connect with drive to load the dataset

In [2]:
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


Checking number of classes

In [3]:

root_path = '/content/drive/MyDrive/train'
class_names = sorted(os.listdir(root_path))
n_classes = len(class_names)
print(f"Total Number of Classes : {n_classes} \nClass Names : {class_names}")

Total Number of Classes : 15 
Class Names : ['Bean', 'Bitter_Gourd', 'Bottle_Gourd', 'Brinjal', 'Broccoli', 'Cabbage', 'Capsicum', 'Carrot', 'Cauliflower', 'Cucumber', 'Papaya', 'Potato', 'Pumpkin', 'Radish', 'Tomato']


Checking the shape, number of classes, and balance of classes

In [4]:

data_dir = '/content/drive/MyDrive/train'
categories = os.listdir(data_dir)

data = []
for category in categories:
    class_num = categories.index(category)
    for img in os.listdir(os.path.join(data_dir, category)):
        data.append([category, class_num])

df = pd.DataFrame(data, columns=['Category', 'Class'])
print(df['Category'].value_counts())

Carrot          1000
Brinjal         1000
Broccoli        1000
Capsicum        1000
Cauliflower     1000
Cabbage         1000
Bean            1000
Cucumber        1000
Bitter_Gourd    1000
Bottle_Gourd    1000
Pumpkin         1000
Potato          1000
Radish          1000
Tomato          1000
Papaya          1000
Name: Category, dtype: int64


In [5]:

# Defining constants like image shape and pathes to directories with data
iShape = (224, 224)
trainData = '/content/drive/MyDrive/train'
validationData = '/content/drive/MyDrive/validation'
testData = '/content/drive/MyDrive/test'

In [6]:

#Normalizing the images by dividing the pixel values by 255
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255
)
train_generator = datagen.flow_from_directory(
    trainData,
    shuffle=True,
    target_size=iShape,
)
valid_generator = datagen.flow_from_directory(
    validationData,
    shuffle=False,
    target_size=iShape,
)
test_generator = datagen.flow_from_directory(
    testData,
    shuffle=False,
    target_size=iShape,
)

Found 15000 images belonging to 15 classes.
Found 3000 images belonging to 15 classes.
Found 3000 images belonging to 15 classes.


#First model: a simple Convolutional Neural Network (CNN)

In [7]:
def create_cnn_model(input_shape, n_classes):
    model = keras.Sequential([
        layers.Input(shape=input_shape),
        layers.Conv2D(32, kernel_size=(3, 3), activation='relu'),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Conv2D(64, kernel_size=(3, 3), activation='relu'),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(n_classes, activation='softmax')
    ])

    return model

cnn_model = create_cnn_model(iShape + (3,), n_classes)
cnn_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
cnn_model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 222, 222, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 111, 111, 32)     0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 109, 109, 64)      18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 54, 54, 64)       0         
 2D)                                                             
                                                                 
 flatten (Flatten)           (None, 186624)            0         
                                                                 
 dense (Dense)               (None, 128)               2

#Training the CNN

In [None]:
epochs = 20
batch_size = 32

history_cnn = cnn_model.fit(train_generator, epochs=epochs, batch_size=batch_size, validation_data=valid_generator)


Epoch 1/20

#Second model: model with transfer learning (ResNet50)

In [None]:
def create_resnet50_model(input_shape, n_classes):
    base_model = keras.applications.ResNet50(input_shape=input_shape, include_top=False, weights='imagenet')
    base_model.trainable = False

    model = keras.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.Dense(n_classes, activation='softmax')
    ])

    return model

resnet50_model = create_resnet50_model(iShape + (3,), n_classes)
resnet50_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
resnet50_model.summary()


#Training the ResNet50 model

In [None]:
epochs = 10
batch_size = 20
history_resnet50 = resnet50_model.fit(train_generator, epochs=epochs, batch_size=batch_size, validation_data=valid_generator)


#Creating data generator with augmentation

In [None]:
augmented_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    zoom_range=0.15,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.15,
    horizontal_flip=True,
    fill_mode="nearest"
)


#Applying the data generator to the training set

In [None]:
train_augmented_generator = augmented_datagen.flow_from_directory(
    trainData,
    shuffle=True,
    target_size=iShape,
)


#Training the models with data augmentation

In [None]:
epochs = 10
batch_size = 20
history_cnn_augmented = cnn_model.fit(train_augmented_generator, epochs=epochs, batch_size=batch_size, validation_data=valid_generator)


In [None]:
epochs = 10
batch_size = 20
history_resnet50_augmented = resnet50_model.fit(train_augmented_generator, epochs=epochs, batch_size=batch_size, validation_data=valid_generator)


In [None]:
#the following function will plot training history
def plot_training_history(history, title):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

    ax1.plot(history.history['accuracy'])
    ax1.plot(history.history['val_accuracy'])
    ax1.set_title(f'{title} - Model Accuracy')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Accuracy')
    ax1.legend(['Train', 'Validation'], loc='upper left')

    ax2.plot(history.history['loss'])
    ax2.plot(history.history['val_loss'])
    ax2.set_title(f'{title} - Model Loss')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Loss')
    ax2.legend(['Train', 'Validation'], loc='upper left')

    plt.show()


#Plotting the training history for the CNN model

In [None]:
plot_training_history(history_cnn, 'CNN Model (No Augmentation)')
plot_training_history(history_cnn_augmented, 'CNN Model (With Augmentation)')

#Plotting the training history for the ResNet50 model

In [None]:
plot_training_history(history_resnet50, 'ResNet50 Model (No Augmentation)')
plot_training_history(history_resnet50_augmented, 'ResNet50 Model (With Augmentation)')

#Evaluation of models on test set

In [None]:
cnn_test_loss, cnn_test_accuracy = cnn_model.evaluate(test_generator)
cnn_augmented_test_loss, cnn_augmented_test_accuracy = cnn_model.evaluate(test_generator)

resnet50_test_loss, resnet50_test_accuracy = resnet50_model.evaluate(test_generator)
resnet50_augmented_test_loss, resnet50_augmented_test_accuracy = resnet50_model.evaluate(test_generator)

print(f"Test Accuracy (CNN, No Augmentation): {cnn_test_accuracy}")
print(f"Test Accuracy (CNN, With Augmentation): {cnn_augmented_test_accuracy}")

print(f"Test Accuracy (ResNet50, No Augmentation): {resnet50_test_accuracy}")
print(f"Test Accuracy (ResNet50, With Augmentation): {resnet50_augmented_test_accuracy}")


ResNet50 model showed the good perfomance both on train and test data, with an  AUC score=0.9625 on test dataset. Due to limited resources of my system and time constraints, I could not train my model sufficiently (there is some problem with my laptop- I trained for higher batch sizes and increased number of epochs, but the progress wasnt saved by system unfortunately). However, if I train the model sufficiently on increased number of epochs, I feel it could give more promising results. I will get my new laptop on Tuesday, April 4 and then I can provide better results. I apologize for inconvenience.
