### **Vegetable Image Dataset**
* The Dataset used is [Vegetable Image Dataset](https://www.kaggle.com/datasets/misrakahmed/vegetable-image-dataset) , uploaded by M Israk Ahmed on Kaggle.


In [5]:
# IMPORTING LIBRARIES
import numpy as np
import pandas as pd
import os
import tensorflow as tf
import random
import matplotlib.pyplot as plt

In [6]:
# DATA LOADING & IMAGE VISUALIZATION 
# let randomly select an image from each class and viusalize them
file_path = '../input/vegetable-image-dataset/Vegetable Images/train'
fig, axs = plt.subplots(3,5,figsize=(20,10))
for i,x in enumerate(os.listdir(file_path)):
    img_path = file_path+'/'+ x + '/' + random.choice(os.listdir(file_path+'/'+x))
    img = plt.imread(img_path)
    axs[int((i-i%5)/5),i%5].imshow(img)
    axs[int((i-i%5)/5),i%5].axis('off')
    axs[int((i-i%5)/5),i%5].set_title(x)
plt.show()

In [7]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale=1/255.)
test_datagen = ImageDataGenerator(rescale=1/255.)
valid_datagen = ImageDataGenerator(rescale=1/255.)

# Model Training without Data Augumentation
train_data = train_datagen.flow_from_directory('../input/vegetable-image-dataset/Vegetable Images/train',
                                              target_size=(224,224),
                                              batch_size=32,
                                              class_mode='categorical')
test_data = test_datagen.flow_from_directory('../input/vegetable-image-dataset/Vegetable Images/test',
                                              target_size=(224,224),
                                              batch_size=32,
                                              class_mode='categorical',
                                             shuffle=False)
valid_data = valid_datagen.flow_from_directory('../input/vegetable-image-dataset/Vegetable Images/validation',
                                              target_size=(224,224),
                                              batch_size=32,
                                              class_mode='categorical')

In [9]:
# MODEL CREATION
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Dropout, Flatten, Dense


model = tf.keras.Sequential([
    Input(shape=(224,224,3)),
    Conv2D(32,3,activation='relu'),
    MaxPooling2D(2),
    Dropout(.2),
    Conv2D(32,3,activation='relu'),
    MaxPooling2D(2),
    Dropout(.2),
    Conv2D(32,3,activation='relu'),
    MaxPooling2D(2),
    Dropout(.2),
    Flatten(),
    Dense(128,activation='relu'),
    Dense(15,activation='softmax')
])
model.compile(loss='categorical_crossentropy',
             optimizer='adam',
             metrics=['accuracy'])
    
history = model.fit(train_data,
                     epochs=10,
                     steps_per_epoch=len(train_data),
                     validation_data=valid_data,
                     validation_steps=len(valid_data))

In [10]:
model.evaluate(test_data)

In [11]:
import seaborn as sns
def plot_history(history):
    fig,ax = plt.subplots(1,2,figsize=(18,8))
    history_data = pd.DataFrame(history.history)
    sns.lineplot(data=history_data[['loss','val_loss']],ax=ax[0])
    sns.lineplot(data=history_data[['accuracy','val_accuracy']],ax=ax[1])
    plt.xlabel('Epochs');
    
plot_history(history)

In [12]:
from sklearn.metrics import classification_report

pred = tf.argmax(model.predict(test_data),axis=1)
classes = list(test_data.class_indices.keys())
pred_classes = [classes[x] for x in pred]
labels_classes = [classes[x] for x in test_data.labels]

print(classification_report(labels_classes,pred_classes))

In [15]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
cf_matrix = confusion_matrix(labels_classes,pred_classes)
sns.set_theme(rc={'figure.figsize':(11,10)})
ax = sns.heatmap(cf_matrix,annot=True,cmap='Blues',fmt="g",xticklabels=classes,yticklabels=classes,cbar=False)
ax.set_ylabel('True Labels')
ax.set_xlabel('Predicted Labels');

In [16]:
def create_fail_list(pred_classes,labels_classes):
    fail_pred = {}
    filename = test_data.filenames
    for i,x in enumerate(pred_classes):
        if pred_classes[i] != labels_classes[i]:
            fail_pred[filename[i]] = {}
            fail_pred[filename[i]]['y_true'] = labels_classes[i]
            fail_pred[filename[i]]['y_pred'] = pred_classes[i]
    return fail_pred
fail_pred = create_fail_list(pred_classes, labels_classes)

In [19]:
# Visualizing the Mistake
def plot_wrong_pred(fail_pred):
    fig, axs = plt.subplots(4,5,figsize=(22,16))
    choice_img = random.sample(list(fail_pred.keys()),20)
    file_path = '../input/vegetable-image-dataset/Vegetable Images/test'
    for i,file in enumerate(choice_img):
        label_true, label_pred = fail_pred[file]['y_true'],fail_pred[file]['y_pred']
        img = plt.imread(file_path+'/'+ file)
        axs[int((i-i%5)/5),i%5].imshow(img)
        axs[int((i-i%5)/5),i%5].axis('off')
        axs[int((i-i%5)/5),i%5].set_title(f'True label: {label_true},\n Predicted label: {label_pred}')
    plt.show()
plot_wrong_pred(fail_pred)

In [20]:
# TRAINING SET-DATA AUGMENTATION
train_datagen = ImageDataGenerator(rescale=1/255.,
                                  rotation_range=40,
                                  shear_range=.2,
                                  zoom_range=.2,
                                  width_shift_range=.2,
                                  height_shift_range=.2,
                                  horizontal_flip=True)
train_data_aug = train_datagen.flow_from_directory('../input/vegetable-image-dataset/Vegetable Images/train',
                                              target_size=(224,224),
                                              batch_size=32,
                                              class_mode='categorical')

In [21]:
model_aug = tf.keras.models.clone_model(model)
model_aug.compile(loss='categorical_crossentropy',
             optimizer='adam',
             metrics=['accuracy'])
history_aug = model_aug.fit(train_data_aug,
                         epochs=10,
                         steps_per_epoch=len(train_data_aug),
                         validation_data=valid_data,
                         validation_steps=len(valid_data))

In [23]:
model_aug.evaluate(test_data)

In [24]:
plot_history(history_aug)

In [25]:
pred = tf.argmax(model_aug.predict(test_data),axis=1)
pred_classes = [classes[x] for x in pred]
labels_classes = [classes[x] for x in test_data.labels]

print(classification_report(labels_classes,pred_classes))

In [26]:
# use Resnet feature vector for Transfer Learning
import tensorflow_hub as hub
resnet_url = 'https://tfhub.dev/google/imagenet/resnet_v2_50/feature_vector/5'
feature_extractor_layer = hub.KerasLayer(resnet_url,
                                        trainable=False,
                                        name='feature_extraction_layer',
                                        input_shape=(224,224,3))
model_res = tf.keras.Sequential([
    feature_extractor_layer,
    Dense(15,activation='softmax')
])
model_res.compile(loss='categorical_crossentropy',
                 optimizer='adam',
                 metrics=['accuracy'])
history_res = model_res.fit(train_data_aug,
                         epochs=10,
                         steps_per_epoch=len(train_data_aug),
                         validation_data=valid_data,
                         validation_steps=len(valid_data))

In [27]:
model_res.evaluate(test_data)

In [28]:
plot_history(history_res)

In [None]:
pred = tf.argmax(model_res.predict(test_data),axis=1)
pred_classes = [classes[x] for x in pred]
labels_classes = [classes[x] for x in test_data.labels]

print(classification_report(labels_classes,pred_classes))