# Import Dependencies

In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt

In [None]:
import gdown
import shutil
from sklearn.model_selection import train_test_split

# Clean Workspace

In [None]:
!rm -rf papaya_image*
!rm -rf __MACOSX

# Import Datasets

## Download from Google Drive

In [None]:
url = "https://drive.google.com/uc?id=1jxrBIk2DDHPV2xy0VblLal0C8ueqkZ3P"
output = "papaya_image.zip"
gdown.download(url,output,quiet=False)

## Unzip file

In [None]:
!unzip papaya_image.zip

## Train Validate Split

### Define directory path

In [None]:
root_dir = './'

In [None]:
base_dir = os.path.join(root_dir,'papaya_image')
raw_mature_dir = os.path.join(base_dir,'mature')
raw_partially_dir = os.path.join(base_dir,'partiallymature')
raw_unmature_dir = os.path.join(base_dir,'unmature')

In [None]:
train_dir = os.path.join(base_dir,'train')
train_mature_dir = os.path.join(train_dir,'mature')
train_partially_mature_dir = os.path.join(train_dir,'partiallymature')
train_unmature_dir = os.path.join(train_dir,'unmature')

In [None]:
validate_dir = os.path.join(base_dir,'validate')
validate_mature_dir = os.path.join(validate_dir,'mature')
validate_partially_mature_dir = os.path.join(validate_dir,'partiallymature')
validate_unmature_dir = os.path.join(validate_dir,'unmature')

### Create directory for train and validate

In [None]:
os.mkdir(train_dir)
os.mkdir(train_mature_dir)
os.mkdir(train_partially_mature_dir)
os.mkdir(train_unmature_dir)
os.mkdir(validate_dir)
os.mkdir(validate_mature_dir)
os.mkdir(validate_partially_mature_dir)
os.mkdir(validate_unmature_dir)

### Train and validate split and copy file to target directory

In [None]:
train_size = 0.8

In [None]:
mature_df = pd.DataFrame(data=os.listdir(raw_mature_dir),columns=["filename"])
partially_mature_df = pd.DataFrame(data=os.listdir(raw_partially_dir),columns=["filename"])
unmature_df = pd.DataFrame(data=os.listdir(raw_unmature_dir),columns=["filename"])

In [None]:
mature_train_df, mature_validate_df = train_test_split(mature_df,train_size=train_size,random_state=42)
partially_mature_train_df, partially_mature_validate_df = train_test_split(partially_mature_df,train_size=train_size,random_state=42)
unmature_train_df, unmature_validate_df = train_test_split(unmature_df,train_size=train_size,random_state=42)

In [None]:
for file in mature_train_df.itertuples():
  shutil.copyfile(os.path.join(raw_mature_dir,file[1]),os.path.join(train_mature_dir,file[1]))
for file in partially_mature_train_df.itertuples():
  shutil.copyfile(os.path.join(raw_partially_dir,file[1]),os.path.join(train_partially_mature_dir,file[1]))
for file in unmature_train_df.itertuples():
  shutil.copyfile(os.path.join(raw_unmature_dir,file[1]),os.path.join(train_unmature_dir,file[1]))
for file in mature_validate_df.itertuples():
  shutil.copyfile(os.path.join(raw_mature_dir,file[1]),os.path.join(validate_mature_dir,file[1]))
for file in partially_mature_validate_df.itertuples():
  shutil.copyfile(os.path.join(raw_partially_dir,file[1]),os.path.join(validate_partially_mature_dir,file[1]))
for file in unmature_validate_df.itertuples():
  shutil.copyfile(os.path.join(raw_unmature_dir,file[1]),os.path.join(validate_unmature_dir,file[1]))

# Preprocessing

In [None]:
IMAGE_SIZE = 300
BATCH_SIZE = 100

In [None]:
def showImage(img):
  plt.imshow(img)
  plt.show()

In [None]:
image_gen_train = ImageDataGenerator(
      rescale=1./255,
      rotation_range=40,
      horizontal_flip=True,
      vertical_flip=True,
      fill_mode='nearest')

In [None]:
train_data_gen = image_gen_train.flow_from_directory(batch_size=BATCH_SIZE,
                                                     directory=train_dir,
                                                     shuffle=True,
                                                     class_mode="categorical",
                                                     target_size=(IMAGE_SIZE,IMAGE_SIZE))

In [None]:
sample_image = train_data_gen[0][0][0]
showImage(sample_image)

In [None]:
train_data_gen.class_indices

In [None]:
image_gen_val = ImageDataGenerator(
      rescale=1./255,
      rotation_range=40,
      horizontal_flip=True,
      vertical_flip=True,
      fill_mode='nearest')

In [None]:
val_data_gen = image_gen_val.flow_from_directory(batch_size=BATCH_SIZE,
                                                 directory=validate_dir,
                                                 class_mode="categorical",
                                                 target_size=(IMAGE_SIZE, IMAGE_SIZE))

In [None]:
val_data_gen.class_indices

# Model Building

## Construct Model

In [None]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(64, (3,3), activation='relu', input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(3)
])

In [None]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
model.summary()

## Train Model

In [None]:
total_train = len(mature_train_df) + len(partially_mature_train_df) + len(unmature_train_df)
total_validate = len(mature_validate_df) + len(partially_mature_validate_df) + len(unmature_validate_df)
epochs = 100
patience = 0.15 * epochs
model_name = 'model.h5'

In [None]:
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    model_name, save_best_only=True)
early_stopping = tf.keras.callbacks.EarlyStopping(patience=patience)

In [None]:
history = model.fit(
    train_data_gen,
    steps_per_epoch=int(np.ceil(total_train / float(BATCH_SIZE))),
    epochs=epochs,
    validation_data=val_data_gen,
    validation_steps=int(np.ceil(total_validate / float(BATCH_SIZE))),
    callbacks=[early_stopping, model_checkpoint]
)

## Plot Model Accuracy and Loss

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
history.history

## Load Save Model

In [None]:
model = tf.keras.models.load_model("model.h5")

In [None]:
class_labels = ["mature", "partially mature","unmature"]

In [None]:
probability_model = tf.keras.Sequential([model, 
                                         tf.keras.layers.Softmax()])

In [None]:
for i in ["/content/papaya_image/validate/unmature/unmature_097.jpg","/content/papaya_image/validate/partiallymature/partiallymature_092.jpg","/content/papaya_image/validate/mature/Mature_088.jpg"]:
  img = tf.keras.preprocessing.image.load_img(
      i, target_size=(IMAGE_SIZE, IMAGE_SIZE)
  )
  img_array = tf.keras.preprocessing.image.img_to_array(img)
  img_array = img_array /255
  img_array = tf.expand_dims(img_array, 0)
  predictions = probability_model.predict(img_array)
  score = tf.nn.softmax(predictions[0])
  imgplot = plt.imshow(img)
  plt.show()
  print(
      "This image most likely belongs to {} with a {} percent confidence."
      .format(class_labels[np.argmax(score)],  np.max(score) * 100)
  )

# Save Model to Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
shutil.copy('model.h5','/content/gdrive/MyDrive/CSC340 AI/')