# Fruits and Vegetables Image Classification using CNN
* The dataset used is [Fruits and Vegetables Image Recognition Dataset](https://www.kaggle.com/datasets/kritikseth/fruit-and-vegetable-image-recognition),uploaded by Kritik Seth on Kaggle.

In [3]:
# Importing Libraries
import os
from PIL import Image
import numpy as np
import keras
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Conv2D,MaxPooling2D,Dense,Flatten,Dropout
from tensorflow.keras.callbacks import EarlyStopping
from cv2 import cv2
import matplotlib.pyplot as plt
from tensorflow.keras.layers import BatchNormalization

In [4]:
path = '../input/fruit-and-vegetable-image-recognition/train/'
fruits = []
for x in os.listdir(path):
    fruits.append(x)

In [6]:
# JPG to Numpy arrays
data=[]
labels=[]
im_w = 224
im_h = 224

In [7]:
for x in range(len(fruits)):
    sub_path = path+fruits[x]+'/'
    for y in os.listdir(sub_path):        
        img_path = sub_path+y  
        last = img_path[-12:]
        imag=cv2.imread(img_path)  
        if last == 'Image_56.jpg':
            continue
        if last == 'Image_96.jpg': 
            continue
        img_from_ar = Image.fromarray(imag, 'RGB')
        resized_image = img_from_ar.resize((im_w, im_h))
        data.append(np.array(resized_image))
        labels.append(x)

In [8]:
# Shuffling Labels and Data
categories=np.array(data)
labels=np.array(labels)

s=np.arange(categories.shape[0])
np.random.shuffle(s)
categories=categories[s]
labels=labels[s]

num_classes=len(np.unique(labels))
data_length=len(categories)

In [10]:
# DATA PREPARATION 
(x_train,x_test)=categories[(int)(0.1*data_length):],categories[:(int)(0.1*data_length)]
x_train = x_train.astype('float32')/255
x_test = x_test.astype('float32')/255
train_length=len(x_train)
test_length=len(x_test)

(y_train,y_test)=labels[(int)(0.1*data_length):],labels[:(int)(0.1*data_length)]

y_train=keras.utils.np_utils.to_categorical(y_train,num_classes)
y_test=keras.utils.np_utils.to_categorical(y_test,num_classes)

In [11]:
# CNN MODEL CREATION
# DENSE=36= No of Categories,ReLU for Activation,Maxpool,Batch Normalization and Softmax at last.
model = Sequential()
model.add(Conv2D(32, kernel_size = (3, 3), activation='relu', input_shape=(im_w,im_h,3)))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Conv2D(64, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Conv2D(64, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Conv2D(96, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Conv2D(32, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
#model.add(Dropout(0.3))
model.add(Dense(36, activation = 'softmax'))

In [12]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=17)
history = model.fit(x_train,y_train,batch_size=50, epochs=90,verbose=1, validation_split=0.33, callbacks=[early_stop])

score = model.evaluate(x_test, y_test, verbose=1)
print('\n', 'Test accuracy:', score[1])

In [13]:
# Visualizing Model Loss
plt.plot(history.history['val_loss'])
plt.plot(history.history['loss'])
plt.title("Model Loss")
plt.ylabel("Loss")
plt.xlabel('Time')
plt.legend(['val_loss', 'loss'], loc='upper left')
plt.show()

In [14]:
# Visualizing Model Accuracy 
plt.plot(history.history['val_accuracy'])
plt.plot(history.history['accuracy'])
plt.title("Model Accuracy")
plt.ylabel("Epocs")
plt.xlabel('Time')
plt.legend(['val_accuracy', 'acc'], loc='upper left')
plt.show()

In [15]:
#An empty list for prediction and accuracy
pred_list = []
acc_list = []

In [17]:
# Converting image to numpy for prediction
def convert_to_array(img):
    im = cv2.imread(img)
    img = Image.fromarray(im, 'RGB')
    image = img.resize((im_w, im_h))
    return np.array(image)

Function for the result what prediction is.

In [18]:
def get_fruit_name(label):
    return fruits[label] 

In [22]:
def predict_fruit(file):
    print("Predicting .................................")
    ar=convert_to_array(file)
    ar=ar/255
    a=[]
    a.append(ar)
    a=np.array(a)
    score=model.predict(a,verbose=1)
    #print(score)
    label_index=np.argmax(score)
    #print(label_index)
    acc=np.max(score)
    fruit=get_fruit_name(label_index)
    pred_list.append(fruit)
    acc_list.append(acc)
    print("The predicted fruit is "+fruit+" with accuracy =    "+str(acc))

In [23]:
# Creating list for test data
test_path = '../input/fruit-and-vegetable-image-recognition/test/'
t_fruits = []
for x in os.listdir(test_path):
    t_fruits.append(x)

In [24]:
# Predicting images in test list.
for x in range(len(t_fruits)):
    sub_path = test_path+t_fruits[x]+'/'
    for y in os.listdir(sub_path):
        img_path = sub_path+y
        predict_fruit(img_path)

Creating a list in test directory. These are real images and they will be compare with the predicted results.This list will be a kind of manual confusion matrix. 

In [25]:
# Creatig list in Test for comparision of real and predicted results.
real_fruits = []
for f in fruits:
    for i in range(10):
        real_fruits.append(f)

In [26]:
# Creating a table including real images, predicted images and score for per image.
complist = list(zip(pred_list, real_fruits, acc_list))

In [27]:
tp,fp = 0,0
for i in range(len(complist)):
    if complist[i][0] == complist[i][1]:
        tp += 1
    else:
        fp += 1

In [28]:
rate = tp/(tp+fp)# Precision Rate
tp,fp,rate

In [29]:
complist