# Fruit Classification
### Transfer learning of VGG16 model to classify fresh and rotten fruits from Kaggle dataset

### Load and prepare model for data

In [14]:
# Initial import of Keras for use throughout project
from tensorflow import keras

In [3]:
# Load base model (VGG16)
base = keras.applications.VGG16(
    weights = "imagenet", # Use imagenet weights to match
    input_shape = (224, 224, 3), # Specify input shape to match dataset (224 x 224 pixels, 3 color channels)
    include_top = False # Remove output layer
)

In [4]:
# Verify removal of output layer
base.summary()

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [5]:
# Freeze base layers to retain learning
base.trainable = False

In [6]:
# Specify inputs with correct shape
inputs = keras.Input(shape = (224, 224, 3))
x = base(inputs, training = False)

# Add pooling layer
x = keras.layers.GlobalAveragePooling2D()(x)

# Add dense output layer
outputs = keras.layers.Dense(6, activation = "softmax")(x)

# Combine to create new model
model = keras.Model(inputs, outputs)

In [7]:
# Verify model layers
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 vgg16 (Functional)          (None, 7, 7, 512)         14714688  
                                                                 
 global_average_pooling2d (G  (None, 512)              0         
 lobalAveragePooling2D)                                          
                                                                 
 dense (Dense)               (None, 6)                 3078      
                                                                 
Total params: 14,717,766
Trainable params: 3,078
Non-trainable params: 14,714,688
_________________________________________________________________


In [8]:
# Compile the model
model.compile(
    loss = keras.losses.CategoricalCrossentropy(),
    metrics = [keras.metrics.Accuracy()]
)

### Data Preparation

In [9]:
# Augment data to increase model accuracy when trained
from keras.preprocessing.image import ImageDataGenerator

# Create data generator for training dataset
datagen_train = ImageDataGenerator(
    samplewise_center = True, # Set sample mean to 0
    rotation_range = 10, # Randomly rotate images 10 degrees
    zoom_range = 0.1, # Randomly zoom image
    width_shift_range = 0.1, # Randomly shift image horizontally
    height_shift_range = 0.1, # Randomly shift image vertically
    horizontal_flip = True, # Allow horizontal flipping
    vertical_flip = False # Don't allow vertical flipping
)

# No augmenting needed for testing data
datagen_test = ImageDataGenerator(
    samplewise_center = True # Set sample mean to 0
)

In [10]:
# Load training dataset
data_train = datagen_train.flow_from_directory(
    "dataset/train/", # Dataset path
    target_size = (224, 224), # Image size (224 x 224 pixels)
    color_mode = "rgb", # Color mode (rgb == 3 channel)
    class_mode = "categorical"
)

# Load testing dataset
data_test = datagen_test.flow_from_directory(
    "dataset/test/", # Dataset path
    target_size = (224, 224), # Image size (224 x 224 pixels)
    color_mode = "rgb", # Color mode (rgb == 3 channel)
    class_mode = "categorical"
)

Found 10901 images belonging to 6 classes.
Found 2698 images belonging to 6 classes.


### Model Training & Fine Tuning

In [16]:
# Train & evaluate the model with training and testing datasets
model.fit(
    data_train, # Training data
    validation_data = data_test, # Testing data for validation
    steps_per_epoch = data_train.samples / data_train.batch_size, # Set steps per epoch
    validation_steps = data_test.samples / data_test.batch_size, # Set steps for validation
    epochs = 20 # Desired number of epochs for training
)

Epoch 1/20
  6/340 [..............................] - ETA: 10:32 - loss: 1.6698 - accuracy: 0.0000e+00

KeyboardInterrupt: 