# Good and Bad Fruits Classification

## Part One: Imports and Data Visualization
Here, import the required libraries for this project. Furthermore, we define some constants. Furthermore, we load the data and visualize it.

In [2]:
import tensorflow as tensorflow
import matplotlib.pyplot as plt
import matplotlib.image as img
import pandas as pd 
from tensorflow.keras import regularizers
from sklearn.model_selection import train_test_split
from tensorflow import keras
import numpy as np
from tensorflow.keras import mixed_precision
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image_dataset_from_directory
import os, os.path
import csv
import tensorflow_hub as hub

for dirname, _, filenames in os.walk('dataset'): #See all files
    for filename in filenames:
        print(os.path.join(dirname, filename))

batch_size = 32
img_size = 256

ModuleNotFoundError: No module named 'tensorflow'

Checking how data is organized

In [None]:
train_categories = []
train_samples = []
train_path = 'dataset/train/'

for i in os.listdir(train_path):
    train_categories.append(i)
    train_samples.append(len(os.listdir(train_path + i)))

test_samples = []
test_path = 'dataset/test/'
for i in os.listdir(test_path):
    test_samples.append(test_path + i)

print("Count of images in Training set:", sum(train_samples))
print("Count of images in Set set:", len(test_samples))

Visualizing distributions of data

In [None]:
figure_size = plt.rcParams["figure.figsize"]
figure_size[0] = 40
figure_size[1] = 20
plt.rcParams["figure.figsize"] = figure_size
index = np.arange(len(train_categories))
plt.bar(index, train_samples)
plt.xlabel('Positions', fontsize=25)
plt.ylabel('Count of Images', fontsize=25)
plt.xticks(index, train_categories, fontsize=15, rotation=90)
plt.title('Distrubution of Positions with counts in Training Set', fontsize=35)
plt.show()

Here, we will visualize some individual images.

In [None]:
visualizer = ImageDataGenerator()
dir_it = visualizer.flow_from_directory(
    train_path,
    target_size=(img_size, img_size),
    class_mode='categorical',
    batch_size=5,
    seed = 42
)

x_batch, y_batch = next(dir_it)

for i in range(5):
    image = x_batch[i]
    label = y_batch[i]
    plt.subplot(1, 5, i+1)
    plt.imshow(image)
    plt.title(label)
    plt.show()

While the dataset is already augmented, it doesn't hurt to do some image augmentation ourselves.
But since the images are already augmented, we will only do some mild augmentation.
Next, we display the augmented images. Taking the first image for example.

In [None]:
visualizer = ImageDataGenerator(
        featurewise_center=False,
        samplewise_center=False,
        featurewise_std_normalization=False,
        samplewise_std_normalization=False,
        zca_whitening=False,
        rotation_range=10,
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=0.1,
        zoom_range=0.1,
        horizontal_flip=True,
        vertical_flip=True,
    )

dir_it = visualizer.flow_from_directory(
    train_path,
    target_size=(img_size, img_size),
    class_mode='categorical',
    batch_size=1,
    seed = 42,
)

for i in range(10):
    img, label = dir_it.next()
    plt.subplot(1, 5, i+1)
    plt.imshow(img[0])
    plt.title(label[0])
    plt.show()

## Part Two: Creating the datasets

Here we will be creating the dataset we will be using in training

In [None]:
def get_dataset(path, batch_size, rescale, preprocess_function=None):
    aug_gens = ImageDataGenerator(
        rescale=rescale, # normalize pixel values to [0,1]
        featurewise_center=False,
        samplewise_center=False,
        featurewise_std_normalization=False,
        samplewise_std_normalization=False,
        zca_whitening=False,
        rotation_range=10,
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=0.1,
        zoom_range=0.1,
        horizontal_flip=True,
        vertical_flip=True,
        validation_split=0.1
        preprocessing_function=preprocess_function
    )

    train_data = aug_gens.flow_from_directory(
        path,
        target_size=(img_size, img_size),
        batch_size=batch_size,
        class_mode='categorical',
        subset='training',
        seed = 1447)

    val_data = aug_gens.flow_from_directory(
        path,
        target_size=(img_size, img_size),
        batch_size=batch_size,
        class_mode='categorical',
        subset='validation',
        seed = 1447)
    
    return train_data, val_data

def get_testdataset(path, batch_size, rescale):
    aug_gens = ImageDataGenerator(
        rescale=rescale, # normalize pixel values to [0,1]
        preprocessing_function=preprocess_function
    )
    
    test_data = aug_gens.flow_from_directory(
        path,
        target_size=(img_size, img_size),
        batch_size=batch_size,
        class_mode='categorical',
        seed = 1447)

    return test_data

train, val = get_dataset('dataset/train', batch_size, 1./255)
test = get_testdataset('dataset/test', batch_size, 1./255)

Here we can see that the dataset generator initialized correctly

In [None]:
print(train.classes)
print(test.classes)

## Part Three: Basic Model

Here we will finally create our model!

We will start off with a very basic model to ensure everything is working!

In [None]:
model = keras.models.Sequential()

model.add(keras.layers.Input(shape=(img_size, img_size, 3)))
model.add(
    keras.layers.Conv2D(filters=32, kernel_size=(3, 3), strides = (1,1), padding = "same", activation='relu'))
model.add(keras.layers.MaxPooling2D((2, 2)))
model.add(
    keras.layers.Conv2D(filters=32, kernel_size=(3, 3), padding = "same", activation='relu'))
model.add(keras.layers.MaxPooling2D((2, 2)))
model.add(
    keras.layers.Conv2D(filters=64, kernel_size=(3, 3), padding = "same", activation='relu'))
model.add(keras.layers.MaxPooling2D((2, 2)))
model.add(
    keras.layers.Conv2D(filters=128, kernel_size=(3, 3), padding = "same", activation='relu'))
model.add(keras.layers.MaxPooling2D((2, 2)))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(1048, activation='swish'))
model.add(keras.layers.Dense(128, activation='swish'))
model.add(keras.layers.Dense(len(train_categories), activation='softmax'))

model.build(input_shape=(None, img_size, img_size, 3))
model.summary()

### Training
Now for the fun part.
We first start off by compiling the model with the Adam optimizer, and set the number of epochs to 10. Then we fit the model and display the accuracies in a graph.

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
num_epochs = 10
history = model.fit(train, workers = 8, epochs=num_epochs, validation_data=val, verbose = 1)

### Visualization
Here, we will visualize the accuracy of the model as it trains.

In [None]:
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label='val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.1, 1])
plt.legend(loc='lower right')
plt.show()

### Test data
Lets see how well our model does on the test data.

In [None]:
model.evaluate(test)

## Part Four: Advanced Model

Here we will utilize transfer learning with a pretrained model. We will be using the MobileNet V2 Model since it is small, efficient, and fast. We will also redefine the input due to EfficientNet expecting different preprocessing requirements.

In [None]:
train, val = get_dataset('dataset/train/', batch_size, 1, preprocess_function=keras.applications.mobilenet_v2.preprocess_input)
test = get_testdataset('dataset/test', batch_size, 1, preprocess_function=keras.applications.mobilenet_v2.preprocess_input)

In [None]:
mobNet = keras.applications.MobileNetV2(
    input_shape=(img_size, img_size, 3),
    include_top=False,
    weights='imagenet'
    pooling = 'avg'
)

model = keras.models.Sequential()
model.add(mobNet)
model.add(keras.layers.Dense(1024, activation='swish'))
model.add(keras.layers.Dense(128, activation='swish'))
model.add(keras.layers.Dense(12, activation='softmax'))
model.build(input_shape=(None, img_size, img_size, 3))
model.summary()

Again, we will compile the model. However, this time we will add three callbacks. 
 - The first call back will be a learning rate reduction. This will reduce the learning rate upon a plateau in loss to prevent overfitting. 
 - The second callback will be an early stopping callback. This call back will allow us to use a high number for epochs and yet the model will stop once it detects that the improvements have stopped.
 - The third callback will be a model checkpoint. This will allow us to save the model at the end of each epoch. This will allow us to use the best model for testing.

Once the three callbacks have been created, we will continue with training the model with the callbacks.

In [None]:
lr_reduction = keras.callbacks.ReduceLROnPlateau(monitor='val_loss',patience=4, verbose=1,  factor=0.4, min_lr=0.0001)
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0.00001, patience=8, mode='auto', restore_best_weights=True)

MCP = keras.callbacks.ModelCheckpoint('Best_points.h5',
                      verbose=1,
                      save_best_only=True,
                      monitor='val_accuracy',
                      mode='max')

history = model.fit(train,
                    workers = 8,
                    epochs=num_epochs,
                    validation_data=val,
                    batch_size=batch_size,
                    verbose = 1,
                    callbacks = [early_stop, lr_reduction, MCP])

Finally, we will evaluate the accuracy on the test dataset

In [None]:
model.evaluate(test)