# Fruit classification

## Apple, Banana, Orange

## Train, Valid, Test

## ~70%/15%/15%

In [1]:
from keras.models import Sequential
from keras.layers import Dense, Flatten
from keras.preprocessing.image import ImageDataGenerator

# 1, Training
## Define base model

Input shape 64x64x3, dense, dense, dense

In [2]:
num_classes = 3

model = Sequential([
    Flatten(input_shape=(64, 64, 3)),
    Dense(512, activation='relu', input_shape=(64,)),
    Dense(512, activation='relu'),
    Dense(num_classes, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 12288)             0         
_________________________________________________________________
dense (Dense)                (None, 512)               6291968   
_________________________________________________________________
dense_1 (Dense)              (None, 512)               262656    
_________________________________________________________________
dense_2 (Dense)              (None, 3)                 1539      
Total params: 6,556,163
Trainable params: 6,556,163
Non-trainable params: 0
_________________________________________________________________


## Define train/valid directory and images (64x64)

In [3]:
TRAINING_DIR = "./train"
train_datagen = ImageDataGenerator(rescale=1.0/255)

train_generator = train_datagen.flow_from_directory(TRAINING_DIR, 
                                                    batch_size=10, 
                                                    target_size=(64, 64))

VALIDATION_DIR = "./valid"
validation_datagen = ImageDataGenerator(rescale=1.0/255)

validation_generator = validation_datagen.flow_from_directory(VALIDATION_DIR, 
                                                         batch_size=10, 
                                                         target_size=(64, 64))

Found 464 images belonging to 3 classes.
Found 96 images belonging to 3 classes.


## Train and save the model

In [None]:
history = model.fit(train_generator,
                    epochs=5,
                    validation_data=validation_generator)
model.save("fruit.h5")

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

## See the training accuracy/loss

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

print(history.history.keys())
epochs=5

val_acc = history.history['val_acc']
val_loss = history.history['val_loss']
acc = history.history['acc']
loss = history.history['loss']

plt.plot(range(epochs), acc, 'b*-', label = 'Training accuracy')
plt.plot(range(epochs), val_acc, 'r', label = 'Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()

In [None]:
plt.plot(range(epochs), loss, 'b*-', label = 'Training loss')
plt.plot(range(epochs), val_loss, 'r', label = 'Validation loss')
plt.title('Training and validation loss')
plt.legend()

# 2, Inference

## First, load the trained model, then image to predict

In [None]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from tensorflow.keras.preprocessing import image as image_utils
from tensorflow.keras.applications.imagenet_utils import preprocess_input
from tensorflow import keras

model = keras.models.load_model('fruit.h5')

image_path = './test/Apple/1.png'

image = mpimg.imread(image_path)
plt.imshow(image)

def make_prediction(image_path):
    image = image_utils.load_img(image_path, target_size=(64, 64))
    image = image_utils.img_to_array(image)
    image = image.reshape(1,64,64,3)
    image = preprocess_input(image)
    preds = model.predict(image)
    return preds

make_prediction('./test/Apple/1.png')

## It's better to define the mapping for index/name

In [None]:
import numpy as np

image_mapping = {0: 'Apple', 1: 'Banana', 2: 'Orange'}
    
print(image_mapping[np.argmax(make_prediction('./test/Apple/1.png'))])

## Run the model inference for "test" dataset

In [None]:
test_datagen = ImageDataGenerator(rescale=1.0/255)
test_it = test_datagen.flow_from_directory('./test/', 
                                       target_size=(64, 64), 
                                       batch_size=1, 
                                       shuffle=False,
                                       class_mode="categorical")

pred_output = model.predict(test_it)
print(pred_output)
pred_output = np.argmax(pred_output, axis=1)
print(pred_output)

## See the classification report

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(test_it.classes, pred_output))

## See the confusion matrix

In [None]:
cf_matrix = confusion_matrix(test_it.classes, pred_output)

plt.figure(figsize=(8,5))
heatmap = sns.heatmap(cf_matrix, annot=True, fmt='d', color='blue')
plt.xlabel('Predicted class')
plt.ylabel('True class')
plt.title('Confusion matrix of model')