<a href="https://colab.research.google.com/github/AngelZouYifan/cv-banana-ripeness-model/blob/main/banana_ripeness_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import Dependencies
import matplotlib.pyplot as plt
import numpy as np
import os
import tensorflow as tf
import keras
from keras.layers import Dense, GlobalAveragePooling2D, Dropout
from keras.applications.mobilenet_v2 import preprocess_input, MobileNetV2
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.utils import class_weight 

In [None]:
# Properties and hyperparameters
CLASSES = [' unripe', ' freshunripe', ' freshripe',' ripe', ' overripe',' rotten']
lr1 = 3e-4
lr2 = 1e-5

In [None]:
# Download and unzip dataset
!apt install gdown
!apt install unzip

!gdown 1JVzFkM7aVuMBKbNwloqYUC3a0gcrYpr7 # unbalanced dataset
!unzip banana_data.zip
ds = 'banana_data copy'

# !gdown 1lOUtrLT0LafkUaqxClE1sgyQ4Pu8MuvV # balanced dataset
# !unzip avg_ds.zip
# ds = 'avg_ds'

# !gdown 1r7aPqRkU0OTKjrjMiQOdBj-FfJHHJHQB # small dataset for developing
# !unzip dev_ds.zip
# ds = 'dev_ds'

In [None]:
# Preprocess data
# A DirectoryIterator yielding tuples of (x, y) where x is a numpy array containing 
# a batch of images with shape (batch_size, *target_size, channels) and y is a numpy array of corresponding labels.

train_datagen=ImageDataGenerator(preprocessing_function=preprocess_input) # Shuffled
train_generator=train_datagen.flow_from_directory('/content/'+ds+'/train', target_size=(224,224), color_mode='rgb', classes = CLASSES, batch_size=32, class_mode='categorical', shuffle=True)  # train

valid_datagen=ImageDataGenerator(preprocessing_function=preprocess_input) # No shuffle, no data augmentation
valid_generator=train_datagen.flow_from_directory('/content/'+ds+'/valid', target_size=(224,224), color_mode='rgb', classes = CLASSES, batch_size=32, class_mode='categorical', shuffle=False)  # valid

test_datagen=ImageDataGenerator(preprocessing_function=preprocess_input) # No shuffle, no data augmentation
test_generator=train_datagen.flow_from_directory('/content/'+ds+'/test', target_size=(224,224), color_mode='rgb', classes = CLASSES, batch_size=32,  class_mode='categorical', shuffle=False)  # test

Found 19926 images belonging to 6 classes.
Found 1902 images belonging to 6 classes.
Found 944 images belonging to 6 classes.


In [None]:
# Build the model
base_model=MobileNetV2(weights='imagenet',include_top=False) #imports the mobilenet model and discards the last 1000 neuron layer.

x=base_model.output
x=GlobalAveragePooling2D()(x)
x=Dense(1024,activation='relu')(x) #we add dense layers so that the model can learn more complex functions and classify for better results.
x=Dense(1024,activation='relu')(x) #dense layer 2
x=Dense(512,activation='relu')(x) #dense layer 3
preds=Dense(len(CLASSES),activation='softmax')(x) #final layer with softmax activation

model=Model(inputs=base_model.input,outputs=preds)
#specify the inputs
#specify the outputs
#now a model has been created based on our architecture

for layer in model.layers[:20]:
    layer.trainable=False
for layer in model.layers[20:]:
    layer.trainable=True

model.compile(optimizer=Adam(lr1),loss='categorical_crossentropy',metrics=['accuracy'])
# Adam optimizer
# loss function will be categorical cross entropy
# evaluation metric will be accuracy



Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5


In [None]:
# Evalutae raw model
score = model.evaluate(test_generator)



In [None]:
from sklearn.utils import class_weight 
class_weights = class_weight.compute_class_weight('balanced', classes = np.unique(train_generator.classes), y = np.ravel(train_generator.classes))
train_class_weights = dict(enumerate(class_weights))

In [None]:
# Train the model

hist = model.fit(train_generator, epochs=50, validation_data=valid_generator, class_weight = train_class_weights)

def plot(hs): 
  # Plot accuracy
    plt.plot(hs.history['accuracy'])
    plt.plot(hs.history['val_accuracy'])
    plt.title('Model accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Val'], loc='upper left')
    plt.show()

    # Plot loss
    plt.plot(hs.history['loss'])
    plt.plot(hs.history['val_loss'])
    plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Val'], loc='upper right')
    plt.show()

plot(hist)

In [None]:
# Fine tuning the model
base_model.trainable = True

model.compile(optimizer=Adam(lr2), loss='categorical_crossentropy', metrics=['accuracy']) # slow learning rate

hist = model.fit(train_generator, epochs=30, validation_data=valid_generator)
plot(hist)

In [None]:
# Save model with h5 format
model.save('/content/model_balancingWeights.h5')  

In [None]:
# Test the model
score = model.evaluate(test_generator)



In [None]:
# Save model to gdrive
# from google.colab import drive
# drive.mount('/content/drive')
# !cp -r '/content/model.h5' /content/drive/MyDrive/banana_model

In [None]:
labels = test_generator.classes
preds = model.predict(test_generator)
pred_labels = [np.argmax(elem) for elem in preds]

correct_preds = 0
for i in range(labels.size):
  if labels[i] == pred_labels[i]:
    correct_preds+=1
acc = correct_preds / labels.size
print(correct_preds)
print(labels.size)
print(acc)

from sklearn.metrics import classification_report
print(classification_report(labels, pred_labels, target_names=CLASSES))

727
944
0.7701271186440678
              precision    recall  f1-score   support

      unripe       0.76      0.66      0.71        77
 freshunripe       0.92      0.99      0.95        99
   freshripe       0.81      0.88      0.84       130
        ripe       0.80      0.69      0.74       205
    overripe       0.63      0.88      0.73       145
      rotten       0.78      0.68      0.72       288

    accuracy                           0.77       944
   macro avg       0.78      0.80      0.78       944
weighted avg       0.78      0.77      0.77       944

