In [None]:
# from google.colab import drive
# drive.mount('/content/gdrive/')

In [None]:
# import os
# os.chdir("/content/gdrive/My Drive/Ece 9039/dataset")

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf

from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, classification_report,confusion_matrix
import cv2

from tensorflow import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import image_dataset_from_directory
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization, GlobalAveragePooling2D
from keras.wrappers.scikit_learn import KerasClassifier

from keras.utils import to_categorical, plot_model
from keras.applications import DenseNet121
from keras.applications import VGG16
from keras.applications.resnet import ResNet152
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping

In [None]:
# display image sample from dataset
fig, ax = plt.subplots(1,4, figsize=(15,8))
# Cataract Image
cataract_img = plt.imread('./cataract/_1_5346540.jpg')
ax[0].imshow(cataract_img)
ax[0].set_title('cataract, Resolution: {:}'.format(cataract_img.shape))

# Diabetic_retinopathy
diabetic_retinopathy_img = plt.imread('./diabetic_retinopathy/100_left.jpeg')
ax[1].imshow(diabetic_retinopathy_img)
ax[1].set_title('diabetic_retinopathy, Resolution: {:}'.format(diabetic_retinopathy_img.shape))

# Glaucoma Image
glaucoma_img = plt.imread('./glaucoma/_0_4517448.jpg')
ax[2].imshow(glaucoma_img)
ax[2].set_title('glaucoma, Resolution: {:}'.format(glaucoma_img.shape))

# Normal Image
normal_img = plt.imread('./normal/8_left.jpg')
ax[3].imshow(normal_img)
ax[3].set_title('normal, Resolution: {:}'.format(normal_img.shape))

plt.tight_layout()
plt.show()

In [None]:
# remove noise image
def remove_corrupt(path):
    corrupt = 0
    for folder_name in ('glaucoma', 'normal', 'cataract', 'diabetic_retinopathy'):
        folder_path = os.path.join(path, folder_name)
        for fname in os.listdir(folder_path):
            fpath = os.path.join(folder_path, fname)
            try:
                fobj = open(fpath, "rb")
                is_jfif = tf.compat.as_bytes("JFIF") in fobj.peek(10)
            finally:
                fobj.close()
            if not is_jfif:
                corrupt += 1
                # Drop corrupted image
                os.remove(fpath)
    print(f'Total %d corrupt images dataset: ' % corrupt)
    print('All corrupt images dropped.' + '\n')

remove_corrupt_info = remove_corrupt('./')

In [None]:
# display all contents of directory
dataset_path = os.listdir('./')
print (dataset_path)
print("Types of classes labels found: ", len(dataset_path))

def dataset_info(path_dir):
  label_iamge_info = []

  for item in path_dir: 
    all_image = os.listdir(item+'/')
  # Add them to the list
    
    label_iamge_info.append((item, str(len(all_image))))
  return label_iamge_info

label_iamge_info = dataset_info(dataset_path)

# print out the label images information
df = pd.DataFrame(data=label_iamge_info, columns=['Labels', 'image_number'])
df.head()

In [None]:
# split train dataset
train_data = image_dataset_from_directory('./',
                                        seed = 2022, 
                                        image_size=(224, 224),
                                        batch_size = 64,
                                        shuffle=True,
                                        color_mode = 'rgb',
                                        label_mode = 'categorical',
                                        validation_split=0.3, 
                                        subset='training')

# split validation dataset
validate_data = image_dataset_from_directory('./',
                                        seed = 2022, 
                                        image_size=(224, 224),
                                        batch_size = 64,
                                        shuffle=True,
                                        color_mode = 'rgb',
                                        label_mode = 'categorical',
                                        validation_split=0.3, 
                                        subset='validation')

# test train split
val_batches = tf.data.experimental.cardinality(validate_data)
test_data = validate_data.take((2*val_batches) // 3)
validate_data = validate_data.skip((2*val_batches) // 3)



In [None]:
# scale image 0.0 to 1.0
train_data = train_data.map(lambda x, y: (x/255, y))
validate_data = validate_data.map(lambda x, y: (x/255, y))
test_data = test_data.map(lambda x, y: (x/255, y))

print(f"# train batchs = {len(train_data)}, # validate batchs = {len(validate_data)}, # test batch = {len(test_data)}")

In [None]:
# this part is for tuning function
# set the image dimensions and batch size
img_width, img_height = 224, 224
batch_size = 64
# create the image data generators for training and validation data
train_datagen = ImageDataGenerator(
        rescale=1./255,
        validation_split=0.3)

In [None]:
# DenseNet121 hyperparameter tuning function
def Grid_Search_TL_model(active = 'relu', learning_rate = 0.01, dropout = 0.5):
  densenet = DenseNet121(weights = "imagenet", include_top = False, input_shape=(224,224,3), pooling='avg')

  model = Sequential([
      densenet,
      # add hidden layers after tuning
      Dense(512, activation=active),
      Dropout(dropout),
      Dense(256, activation=active),
      Dropout(dropout),
      Dense(4, activation='sigmoid')
  ])
   # Freeze the layers of the pre-trained model
  densenet.trainable = False

  adam = Adam(learning_rate=learning_rate)
  model.compile(optimizer = adam, loss='categorical_crossentropy', metrics=['accuracy'])
  return model

  #gridsearch on 2 activation function, 2 dropout rate and 3 learning_rate
param_grid = dict(active=['relu', 'tanh'],learning_rate = [0.01,0.001,0.0001], dropout = [0.5,0.8])

hp_model = KerasClassifier(build_fn=Grid_Search_TL_model, verbose=0)
#instantiate gridsearch object using 3 fold crossvaliadtion
grid = GridSearchCV(estimator=hp_model, param_grid=param_grid, cv=3,error_score='raise')

# get the training and validation data generators and their corresponding targets
X_train, y_train = train_datagen.flow_from_directory('./', subset='training', target_size=(img_width, img_height), batch_size=batch_size, class_mode='categorical').next()
X_validate, y_validate = train_datagen.flow_from_directory('./', subset='validation', target_size=(img_width, img_height), batch_size=batch_size, class_mode='categorical').next()

grid_result = grid.fit(X_train, y_train, validation_data=(X_validate, y_validate), epochs = 1, callbacks=[EarlyStopping(patience=3)])

#determine the best parameter
print(grid_result.best_params_)
print(grid_result.best_score_)

In [None]:
densenet = DenseNet121(weights = "imagenet", include_top = False, input_shape=(224,224,3), pooling='avg')

# Freeze the layers of the pre-trained model
for layer in densenet.layers:
      layer.trainable = False

# densenet.summary()

# Create the model
model = Sequential([
    densenet,
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(4, activation='sigmoid')
])
adam = Adam(learning_rate=0.001)
model.compile(optimizer= adam, loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

# plot of DenseNet121 model
plot_model(model, to_file='DenseNet121-plant.png', show_shapes=True)


In [None]:
# pick loss, val_loss, accuracy and val_accuracy
hist = model.fit(train_data, validation_data = validate_data, epochs = 20, verbose = 1, batch_size=64)
train_loss = hist.history['loss']
val_loss = hist.history['val_loss']
train_accuracy = hist.history['accuracy']
val_accuracy = hist.history['val_accuracy']

# print loss, val_loss, accuracy and val_accuracy
print(train_loss)
print(train_accuracy)
print(val_loss)
print(val_accuracy)

# loss for each iteration, and make a plot of iterations/epochs vs loss

epochs = list(range(1,21))
plt.figure(figsize=(9,3))
plt.plot(epochs,train_loss, color='blue', label='train loss')
plt.plot(epochs,val_loss, color='orange', label='validation loss')
plt.xlabel("epochs")
plt.ylabel("loss")
plt.title("Loss vs Epoch (DenseNet121)")
plt.legend(loc='best')
plt.show()

# accuracy for each iteration, and make a plot of iterations/epochs vs accuracy
plt.figure(figsize=(9,3))
plt.plot(epochs,train_accuracy, color='blue', label='train accuracy')
plt.plot(epochs,val_accuracy, color='orange', label='validation accuracy')
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.title('Accuracy vs Epoch (DenseNet121)')
plt.legend(loc='best')
plt.show()



In [None]:
# print out model accuracy
loss, accuracy = model.evaluate(test_data, verbose=0)
print('Test accuracy: %.2f%%' % (accuracy * 100))

In [None]:
real_value = []
pred_value = []

labels = dict()

# perdict each batch of images 
for images_batch, labels_batch in test_data:
    for i in range(images_batch.shape[0]):
        image = images_batch[i].numpy().astype('uint8')
        label = labels_batch[i].numpy()
        
        real_value.append(np.argmax(label, axis=0))
    
        batch_prediction = model.predict(images_batch, verbose=0)

        pred_value.append(np.argmax(batch_prediction[i]))

real_value1 = []
pred_value1 = []

# convert perdict reult for future scores calculate
for i in real_value:
  if i == 0:
    real_value1.append([1,0,0,0])
  elif i == 1:
    real_value1.append([0,1,0,0])
  elif i == 2:
    real_value1.append([0,0,1,0])
  elif i == 3:
    real_value1.append([0,0,0,1])

for i in pred_value:
  if i == 0:
    pred_value1.append([1,0,0,0])
  elif i == 1:
    pred_value1.append([0,1,0,0])
  elif i == 2:
    pred_value1.append([0,0,1,0])
  elif i == 3:
    pred_value1.append([0,0,0,1])


# calculate precision, recall and f1 socres and print
precision = precision_score(real_value1, pred_value1, average='macro')
recall = recall_score(real_value1, pred_value1, average='macro')
f1 = f1_score(real_value1, pred_value1, average='macro')

print('Precision: {:.3f}'.format(precision))
print('Recall: {:.3f}'.format(recall))
print('F1 score: {:.3f}'.format(f1))

In [None]:
# VGG16 hyperparameter tuning function
def Grid_Search_TL_model(active = 'relu', learning_rate = 0.01, dropout = 0.5):
  vgg16 = VGG16(weights = "imagenet", include_top = False, input_shape=(224,224,3), pooling='avg')

  model = Sequential([
      vgg16,
      # add hidden layers after tuning
      Dense(512, activation=active),
      Dropout(dropout),
      Dense(128, activation=active),
      Dropout(dropout),
      Dense(4, activation='sigmoid')
  ])
   # Freeze the layers of the pre-trained model
  for layer in vgg16.layers:
      layer.trainable = False

  adam = Adam(learning_rate=learning_rate)
  model.compile(optimizer = adam, loss='categorical_crossentropy', metrics=['accuracy'])
  return model

  #gridsearch on 2 activation function, 2 dropout rate and 3 learning_rate
param_grid = dict(active=['relu', 'tanh'],learning_rate = [0.01,0.001,0.0001], dropout = [0.5,0.8])

hp_model = KerasClassifier(build_fn=Grid_Search_TL_model, verbose=0)
#instantiate gridsearch object using 3 fold crossvaliadtion
grid = GridSearchCV(estimator=hp_model, param_grid=param_grid, cv=3,error_score='raise')

# get the training and validation data generators and their corresponding targets
X_train, y_train = train_datagen.flow_from_directory('./', subset='training', target_size=(img_width, img_height), batch_size=batch_size, class_mode='categorical').next()
X_validate, y_validate = train_datagen.flow_from_directory('./', subset='validation', target_size=(img_width, img_height), batch_size=batch_size, class_mode='categorical').next()

grid_result = grid.fit(X_train, y_train, validation_data=(X_validate, y_validate), epochs = 1, callbacks=[EarlyStopping(patience=3)])

#determine the best parameter
print(grid_result.best_params_)
print(grid_result.best_score_)

In [None]:
vgg16 = VGG16(weights = "imagenet", include_top = False, input_shape=(224,224,3), pooling='avg')

# Freeze the layers of the pre-trained model
for layer in vgg16.layers:
    layer.trainable = False

# vgg16.summary()

# Create the model
model = Sequential([
    vgg16,
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(4, activation='sigmoid')
])
adam = Adam(learning_rate=0.001)
model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

# plot of VGG16 model
plot_model(model, to_file='VGG16-plant.png', show_shapes=True)

In [None]:
# pick loss, val_loss, accuracy and val_accuracy for comparsion
hist = model.fit(train_data, validation_data = validate_data, epochs = 20, verbose = 1, batch_size=64)
train_loss = hist.history['loss']
val_loss = hist.history['val_loss']
train_accuracy = hist.history['accuracy']
val_accuracy = hist.history['val_accuracy']

# print loss, val_loss, accuracy and val_accuracy
print(train_loss)
print(train_accuracy)
print(val_loss)
print(val_accuracy)

# loss for each iteration, and make a plot of iterations/epochs vs loss

epochs = list(range(1,21))
plt.figure(figsize=(9,3))
plt.plot(epochs,train_loss, color='blue', label='train loss')
plt.plot(epochs,val_loss, color='orange', label='validation loss')
plt.xlabel("epochs")
plt.ylabel("loss")
plt.title("Loss vs Epoch (VGG16)")
plt.legend(loc='best')
plt.show()

# accuracy for each iteration, and make a plot of iterations/epochs vs accuracy
plt.figure(figsize=(9,3))
plt.plot(epochs,train_accuracy, color='blue', label='train accuracy')
plt.plot(epochs,val_accuracy, color='orange', label='validation accuracy')
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.title('Accuracy vs Epoch (VGG16)')
plt.legend(loc='best')
plt.show()


In [None]:
# print out model accuracy
loss, accuracy = model.evaluate(test_data, verbose=0)
print('Test accuracy: %.2f%%' % (accuracy * 100))

In [None]:
real_value = []
pred_value = []



labels = dict()

# perdict each batch of images 
for images_batch, labels_batch in test_data:
    for i in range(images_batch.shape[0]):
        image = images_batch[i].numpy().astype('uint8')
        label = labels_batch[i].numpy()
        
        real_value.append(np.argmax(label, axis=0))
    
        batch_prediction = model.predict(images_batch, verbose=0)

        pred_value.append(np.argmax(batch_prediction[i]))

real_value1 = []
pred_value1 = []

# convert perdict reult for future scores calculate
for i in real_value:
  if i == 0:
    real_value1.append([1,0,0,0])
  elif i == 1:
    real_value1.append([0,1,0,0])
  elif i == 2:
    real_value1.append([0,0,1,0])
  elif i == 3:
    real_value1.append([0,0,0,1])

for i in pred_value:
  if i == 0:
    pred_value1.append([1,0,0,0])
  elif i == 1:
    pred_value1.append([0,1,0,0])
  elif i == 2:
    pred_value1.append([0,0,1,0])
  elif i == 3:
    pred_value1.append([0,0,0,1])


# calculate precision, recall and f1 socres and print
precision = precision_score(real_value1, pred_value1, average='macro')
recall = recall_score(real_value1, pred_value1, average='macro')
f1 = f1_score(real_value1, pred_value1, average='macro')

print('Precision: {:.3f}'.format(precision))
print('Recall: {:.3f}'.format(recall))
print('F1 score: {:.3f}'.format(f1))

In [None]:
# ResNet152 hyperparameter tuning function
def Grid_Search_TL_model(active = 'relu', learning_rate = 0.01, dropout = 0.5):
  resnet152 = ResNet152(weights = "imagenet", include_top = False, input_shape=(224,224,3), pooling='avg')

  model = Sequential([
      resnet152,
      # add hidden layers after tuning
      Dense(512, activation=active),
      Dropout(dropout),
      Dense(256, activation=active),
      Dropout(dropout),
      Dense(4, activation='sigmoid')
  ])
   # Freeze the layers of the pre-trained model
  for layer in resnet152.layers:
      layer.trainable = False

  adam = Adam(learning_rate=learning_rate)
  model.compile(optimizer = adam, loss='categorical_crossentropy', metrics=['accuracy'])
  return model

  #gridsearch on 2 activation function, 2 dropout rate and 3 learning_rate
param_grid = dict(active=['relu', 'tanh'],learning_rate = [0.01,0.001,0.0001], dropout = [0.5,0.8])

hp_model = KerasClassifier(build_fn=Grid_Search_TL_model, verbose=0)
#instantiate gridsearch object using 3 fold crossvaliadtion
grid = GridSearchCV(estimator=hp_model, param_grid=param_grid, cv=3,error_score='raise')

# get the training and validation data generators and their corresponding targets
X_train, y_train = train_datagen.flow_from_directory('./', subset='training', target_size=(img_width, img_height), batch_size=batch_size, class_mode='categorical').next()
X_validate, y_validate = train_datagen.flow_from_directory('./', subset='validation', target_size=(img_width, img_height), batch_size=batch_size, class_mode='categorical').next()

grid_result = grid.fit(X_train, y_train, validation_data=(X_validate, y_validate), epochs = 1, callbacks=[EarlyStopping(patience=3)])

#determine the best parameter
print(grid_result.best_params_)
print(grid_result.best_score_)

In [None]:
resnet152 = ResNet152(weights = "imagenet", include_top = False, input_shape=(224,224,3), pooling='avg')


# Freeze the layers of the pre-trained model
for layer in resnet152.layers:
    layer.trainable = False

# resnet152.summary()

# Create the model
model = Sequential([
    resnet152,
    Dense(512, activation='tanh'),
    Dropout(0.5),
    Dense(128, activation='tanh'),
    Dropout(0.5),
    Dense(4, activation='sigmoid')
])
adam = Adam(learning_rate=0.0001)
model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

# plot of ResNet152 model
plot_model(model, to_file='ResNet152-plant.png', show_shapes=True)

In [None]:
# pick loss, val_loss, accuracy and val_accuracy for comparsion
hist = model.fit(train_data, validation_data = validate_data, epochs = 20, verbose = 1, batch_size=64)
train_loss = hist.history['loss']
val_loss = hist.history['val_loss']
train_accuracy = hist.history['accuracy']
val_accuracy = hist.history['val_accuracy']

# print loss, val_loss, accuracy and val_accuracy
print(train_loss)
print(train_accuracy)
print(val_loss)
print(val_accuracy)

# loss for each iteration, and make a plot of iterations/epochs vs loss
epochs = list(range(1,21))
plt.figure(figsize=(9,3))
plt.plot(epochs,train_loss, color='blue', label='train loss')
plt.plot(epochs,val_loss, color='orange', label='validation loss')
plt.xlabel("epochs")
plt.ylabel("loss")
plt.title("Loss vs Epoch (ResNet152)")
plt.legend(loc='best')
plt.show()

# accuracy for each iteration, and make a plot of iterations/epochs vs accuracy
plt.figure(figsize=(9,3))
plt.plot(epochs,train_accuracy, color='blue', label='train accuracy')
plt.plot(epochs,val_accuracy, color='orange', label='validation accuracy')
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.title('Accuracy vs Epoch (ResNet152)')
plt.legend(loc='best')
plt.show()

In [None]:
# print out model accuracy
loss, accuracy = model.evaluate(test_data, verbose=0)
print('Test accuracy: %.2f%%' % (accuracy * 100))

In [None]:
real_value = []
pred_value = []



labels = dict()

# perdict each batch of images 
for images_batch, labels_batch in test_data:
    for i in range(images_batch.shape[0]):
        image = images_batch[i].numpy().astype('uint8')
        label = labels_batch[i].numpy()
        
        real_value.append(np.argmax(label, axis=0))
    
        batch_prediction = model.predict(images_batch, verbose=0)

        pred_value.append(np.argmax(batch_prediction[i]))

real_value1 = []
pred_value1 = []

# convert perdict reult for future scores calculate
for i in real_value:
  if i == 0:
    real_value1.append([1,0,0,0])
  elif i == 1:
    real_value1.append([0,1,0,0])
  elif i == 2:
    real_value1.append([0,0,1,0])
  elif i == 3:
    real_value1.append([0,0,0,1])

for i in pred_value:
  if i == 0:
    pred_value1.append([1,0,0,0])
  elif i == 1:
    pred_value1.append([0,1,0,0])
  elif i == 2:
    pred_value1.append([0,0,1,0])
  elif i == 3:
    pred_value1.append([0,0,0,1])

# calculate precision, recall and f1 socres and print
precision = precision_score(real_value1, pred_value1, average='macro')
recall = recall_score(real_value1, pred_value1, average='macro')
f1 = f1_score(real_value1, pred_value1, average='macro')

print('Precision: {:.3f}'.format(precision))
print('Recall: {:.3f}'.format(recall))
print('F1 score: {:.3f}'.format(f1))