# Import Dependencies

In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt

In [2]:
import gdown
import shutil
from sklearn.model_selection import train_test_split

# Mount Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

# Image Rename (DON'T NEED TO RUN)

In [None]:
i = 1
for name in os.listdir('/content/gdrive/MyDrive/CSC340 AI/Datasets/Mature'):
  new_name = 'mature_'+str(i).zfill(4)+'.'+name.split('.')[len(name.split('.'))-1]
  i+=1
  shutil.move('/content/gdrive/MyDrive/CSC340 AI/Datasets/Mature/'+name,'/content/gdrive/MyDrive/CSC340 AI/Datasets/Mature/'+new_name)
i = 1
for name in os.listdir('/content/gdrive/MyDrive/CSC340 AI/Datasets/Partially Mature'):
  new_name = 'partiallymature_'+str(i).zfill(4)+'.'+name.split('.')[len(name.split('.'))-1]
  i+=1
  shutil.move('/content/gdrive/MyDrive/CSC340 AI/Datasets/Partially Mature/'+name,'/content/gdrive/MyDrive/CSC340 AI/Datasets/Partially Mature/'+new_name)
i = 1
for name in os.listdir('/content/gdrive/MyDrive/CSC340 AI/Datasets/Unmature'):
  new_name = 'unmature_'+str(i).zfill(4)+'.'+name.split('.')[len(name.split('.'))-1]
  i+=1
  shutil.move('/content/gdrive/MyDrive/CSC340 AI/Datasets/Unmature/'+name,'/content/gdrive/MyDrive/CSC340 AI/Datasets/Unmature/'+new_name)

## Image Augmentation (DON'T NEED TO RUN)

In [None]:
import tensorflow.keras.layers.experimental.preprocessing as prep
data_gen = ImageDataGenerator(
      rescale=1./255,
      rotation_range=60,
      horizontal_flip=True,
      vertical_flip=True,
      width_shift_range=0.2,
      height_shift_range=0.2,
      shear_range=0.1,
      zoom_range=0.2,
      fill_mode='nearest')

In [None]:
base='/content/gdrive/MyDrive/CSC340 AI/Datasets'
def augmented_img(path):
  count = 0
  filelist = os.listdir(base+'/'+path)
  max = len(filelist)
  print(max, filelist)
  for i in filelist:
    img = plt.imread(base+'/'+path+'/'+i,0)
    img = tf.expand_dims(img, 0)
    data_gen.fit(img)
    extension = i.split('.')
    extension = extension[len(extension)-1]
    for x, val in zip(data_gen.flow(img,save_to_dir=base+'/'+path,save_prefix='aug',save_format=extension),range(10)):pass
    count+=1
    print(path+': '+str(count)+'/'+str(max))

In [None]:
augmented_img('Mature')
augmented_img('Partially Mature')
augmented_img('Unmature')

# Import Datasets

## Download Dataset from Google Drive

In [None]:
url = "https://drive.google.com/uc?id=1bJaxLQIzgUIrMhoh1kJ0UAJqoVQ0K93F"
output = "papaya_image.zip"
gdown.download(url,output,quiet=False)

Downloading...
From: https://drive.google.com/uc?id=1bJaxLQIzgUIrMhoh1kJ0UAJqoVQ0K93F
To: /content/papaya_image.zip
1.73GB [00:29, 57.8MB/s]


'papaya_image.zip'

In [None]:
!unzip papaya_image.zip

In [None]:
!rm -rf papaya_image.zip

# Train Validate Split

### Define directory path

In [None]:
root_dir = './'

In [None]:
base_dir = os.path.join(root_dir,'Datasets')
raw_mature_dir = os.path.join(base_dir,'Mature')
raw_partially_dir = os.path.join(base_dir,'Partially Mature')
raw_unmature_dir = os.path.join(base_dir,'Unmature')

In [None]:
train_dir = os.path.join(base_dir,'train')
train_mature_dir = os.path.join(train_dir,'Mature')
train_partially_mature_dir = os.path.join(train_dir,'Partially Mature')
train_unmature_dir = os.path.join(train_dir,'Unmature')

In [None]:
validate_dir = os.path.join(base_dir,'validate')
validate_mature_dir = os.path.join(validate_dir,'Mature')
validate_partially_mature_dir = os.path.join(validate_dir,'Partially Mature')
validate_unmature_dir = os.path.join(validate_dir,'Unmature')

### Create directory for train and validate

In [None]:
os.mkdir(train_dir)
os.mkdir(train_mature_dir)
os.mkdir(train_partially_mature_dir)
os.mkdir(train_unmature_dir)
os.mkdir(validate_dir)
os.mkdir(validate_mature_dir)
os.mkdir(validate_partially_mature_dir)
os.mkdir(validate_unmature_dir)

### Train and validate split and copy file to target directory

In [None]:
train_size = 0.75

In [None]:
mature_df = pd.DataFrame(data=os.listdir(raw_mature_dir),columns=["filename"])
partially_mature_df = pd.DataFrame(data=os.listdir(raw_partially_dir),columns=["filename"])
unmature_df = pd.DataFrame(data=os.listdir(raw_unmature_dir),columns=["filename"])

In [None]:
mature_train_df, mature_validate_df = train_test_split(mature_df,train_size=train_size,random_state=42)
partially_mature_train_df, partially_mature_validate_df = train_test_split(partially_mature_df,train_size=train_size,random_state=42)
unmature_train_df, unmature_validate_df = train_test_split(unmature_df,train_size=train_size,random_state=42)

In [None]:
mature_train_df

In [None]:
for file in mature_train_df.itertuples():
  shutil.copyfile(os.path.join(raw_mature_dir,file[1]),os.path.join(train_mature_dir,file[1]))
for file in partially_mature_train_df.itertuples():
  shutil.copyfile(os.path.join(raw_partially_dir,file[1]),os.path.join(train_partially_mature_dir,file[1]))
for file in unmature_train_df.itertuples():
  shutil.copyfile(os.path.join(raw_unmature_dir,file[1]),os.path.join(train_unmature_dir,file[1]))
for file in mature_validate_df.itertuples():
  shutil.copyfile(os.path.join(raw_mature_dir,file[1]),os.path.join(validate_mature_dir,file[1]))
for file in partially_mature_validate_df.itertuples():
  shutil.copyfile(os.path.join(raw_partially_dir,file[1]),os.path.join(validate_partially_mature_dir,file[1]))
for file in unmature_validate_df.itertuples():
  shutil.copyfile(os.path.join(raw_unmature_dir,file[1]),os.path.join(validate_unmature_dir,file[1]))

# Preprocessing

In [23]:
# For our CNN
IMAGE_SIZE = 300
BATCH_SIZE = 100
model_name = 'model_vgg.h5'

In [None]:
def showImage(img):
  plt.imshow(img)
  plt.show()

In [None]:
image_gen_train = ImageDataGenerator(
      rescale=1./255,
      rotation_range=60,
      horizontal_flip=True,
      vertical_flip=True,
      fill_mode='nearest')

In [None]:
train_data_gen = image_gen_train.flow_from_directory(batch_size=BATCH_SIZE,
                                                     directory=train_dir,
                                                     shuffle=True,
                                                     class_mode="categorical",
                                                     target_size=(IMAGE_SIZE,IMAGE_SIZE))

In [None]:
sample_image = train_data_gen[0][0][0]
showImage(sample_image)

In [None]:
train_data_gen.class_indices

In [None]:
image_gen_val = ImageDataGenerator(
      rescale=1./255,
      rotation_range=60,
      horizontal_flip=True,
      vertical_flip=True,
      fill_mode='nearest')

In [None]:
val_data_gen = image_gen_val.flow_from_directory(batch_size=BATCH_SIZE,
                                                 directory=validate_dir,
                                                 class_mode="categorical",
                                                 target_size=(IMAGE_SIZE, IMAGE_SIZE))

In [None]:
val_data_gen.class_indices

# Model Building

## Construct Model (CNN)

In [22]:
def getCNNModel():
  global IMAGE_SIZE
  IMAGE_SIZE = 300
  global model_name
  model_name = "model_cnn.h5"
  model = tf.keras.models.Sequential([
      tf.keras.layers.Conv2D(64, (3,3), activation='relu', input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3)),
      tf.keras.layers.MaxPooling2D(2, 2),
      tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
      tf.keras.layers.MaxPooling2D(2,2),
      tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
      tf.keras.layers.MaxPooling2D(2,2),
      tf.keras.layers.Dropout(0.5),
      tf.keras.layers.Flatten(),
      tf.keras.layers.Dense(512, activation='relu'),
      tf.keras.layers.Dense(256, activation='relu'),
      tf.keras.layers.Dense(128, activation='relu'),
      tf.keras.layers.Dense(3)
  ])
  return model

## Construct Model with Pretrained VGG Model

In [24]:
def getVGGModel():
  global IMAGE_SIZE
  IMAGE_SIZE = 224
  global model_name
  model_name = "model_vgg.h5"
  from tensorflow.keras.applications import VGG16
  from tensorflow.keras.layers import Flatten,Dense
  from tensorflow.keras import Model
  vgg = VGG16(input_shape=[IMAGE_SIZE,IMAGE_SIZE,3], weights='imagenet', include_top=False)
  x = Flatten()(vgg.output)
  predict = Dense(3, activation='softmax')(x)
  model = Model(inputs=vgg.input, outputs=predict)
  return model

# Construct Model

In [16]:
def getModel(model_name):
  if model_name == 'CNN':
    return getCNNModel()
  elif model_name == 'VGG':
    return getVGGModel()

In [27]:
model_name = "CNN" #@param ["CNN", "VGG"]
model = getModel(model_name)

In [None]:
model_name

## Compile model & Summary

In [None]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
model.summary()

## Train Model

In [None]:
total_train = len(mature_train_df) + len(partially_mature_train_df) + len(unmature_train_df)
total_validate = len(mature_validate_df) + len(partially_mature_validate_df) + len(unmature_validate_df)
epochs = 35
patience = 5

In [None]:
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    model_name, save_best_only=True)
early_stopping = tf.keras.callbacks.EarlyStopping(patience=patience)

In [None]:
history = model.fit(
    train_data_gen,
    epochs=epochs,
    validation_data=val_data_gen,
    batch_size=BATCH_SIZE,
    callbacks=[early_stopping, model_checkpoint]
)

## Plot Model Accuracy and Loss

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(20)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
history.history

## Load Save Model

In [None]:
model = tf.keras.models.load_model(model_name)

In [None]:
class_labels = ["mature", "partially mature","unmature"]

In [None]:
probability_model = tf.keras.Sequential([model, tf.keras.layers.Softmax()])

In [None]:
for i in ["/content/health-benefits-of-papaya_copy.jpeg","/content/unknown.png","/content/unknown (1).png","/content/unknown (2).png","/content/ripe.jpeg","/content/Datasets/validate/Unmature/unmature_0103.JPG"]:
  img = tf.keras.preprocessing.image.load_img(
      i, target_size=(IMAGE_SIZE, IMAGE_SIZE)
  )
  img_array = tf.keras.preprocessing.image.img_to_array(img)
  img_array = img_array / 255
  img_array = img_array.reshape(1,IMAGE_SIZE,IMAGE_SIZE,3)
  predictions = model.predict(img_array)
  score = tf.nn.softmax(predictions[0])
  imgplot = plt.imshow(img)
  plt.show()
  print(
      "This image most likely belongs to {} "
      .format(class_labels[np.argmax(score)])
  )

# Save Model to Google Drive

In [None]:
shutil.copy('model_vgg.h5','/content/drive/MyDrive/CSC340 AI/')

'/content/drive/MyDrive/CSC340 AI/model_vgg.h5'