# Written by Jesse van Duijne on 04-06-2020

In [1]:
import os
from os import makedirs
from os import listdir
from shutil import copyfile
from random import seed
from random import random

In [2]:
# 1) Make directories

pwd = '../Dataset/'
root = os.path.join(pwd, 'Healthy_vs_Unhealthy_Tomatoes2')

subdirs = ['train', 'test']
for subdir in subdirs:
    # create label subdirs
    labeldirs = ['Healthy', 'Unhealthy']
    for labldir in labeldirs:
            newdir = os.path.join(root, subdir, labldir)
            makedirs(newdir, exist_ok=True)

In [4]:
# 2) Divide images among directories

seed(1)                      # Save state of random
validation_ratio = 0.2       # define ratio of pictures to be used for validation and range varies from 0.2 to 0.3

healthy_directory = pwd + 'Good v2 (total)/'
unhealthy_directory = pwd + 'Bad v2 (total)/'
input_dirs = [healthy_directory, unhealthy_directory]

# Divide datasets for healthy tomatoes. Around 20% should be in the test folder
for input_dir in input_dirs:
    health_status_folder = ''
    if input_dir == healthy_directory:
        health_status_folder = 'Healthy'
    else:
        health_status_folder = 'Unhealthy'    
    
    for file in listdir(input_dir):
        src = os.path.join(input_dir, file)
        dst_dir = 'train'
        if random() < validation_ratio:
            dst_dir = 'test'
        dst = os.path.join(root, dst_dir, health_status_folder, file)
        copyfile(src, dst)


In [7]:
# 3) Prepare dataset

# we need to rescale the pixel values as our original images are in RGB with coefficients in the 0-255, but such values would be too high for our model to process
datagen = ImageDataGenerator(rescale=1.0/255.0)
size = 224

# prepare Train and Validation dataset
traindata = datagen.flow_from_directory(os.path.join(root, 'train/'), target_size=(size, size))
testdata = datagen.flow_from_directory(os.path.join(root, 'test/'),target_size=(size, size))

Found 855 images belonging to 2 classes.
Found 190 images belonging to 2 classes.


In [8]:
import keras,os
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPool2D , Flatten
from keras.preprocessing.image import ImageDataGenerator
import numpy as np

In [9]:
keras.backend.clear_session()

In [10]:
# 4) Create the model

model = Sequential()
model.add(Conv2D(input_shape=(224,224,3), filters=64, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=64, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))

model.add(Flatten())
model.add(Dense(units=4096,activation="relu"))
model.add(Dense(units=4096,activation="relu"))
model.add(Dense(units=2, activation="softmax"))

In [11]:
# 4) Compile the model

from keras.optimizers import Adam
opt = Adam(lr=0.001)
model.compile(optimizer=opt, loss=keras.losses.categorical_crossentropy, metrics=['accuracy'])

In [12]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 224, 224, 64)      1792      
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 224, 224, 64)      36928     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 112, 112, 64)      0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 112, 112, 128)     73856     
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 112, 112, 128)     147584    
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 56, 56, 128)       0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 56, 56, 256)      

In [13]:
# 5) Start training with checkpoins & early stopping

from keras.callbacks import ModelCheckpoint, EarlyStopping
from PIL import Image

# ModelCheckpoint: helps saving the model by monitoring a parameter, which is validation accuracy (val_acc) in this case
checkpoint = ModelCheckpoint("vgg16_1.h5", monitor = 'val_acc', verbose = 1, save_best_only = True,
                             save_weights_only = False, mode = 'auto', period = 1)

# EarlyStopping: helps stopping training model early if there is no increase in val_acc. Patience means amount of epochs.
early = EarlyStopping(monitor = 'val_acc', min_delta = 0, patience = 20, verbose = 1, mode = 'auto')



In [14]:
hist = model.fit_generator(steps_per_epoch = 100, generator = traindata, validation_data = testdata,
                           validation_steps = 10, epochs = 100, callbacks = [checkpoint, early])

Epoch 1/100

KeyboardInterrupt: 