# Source

[GitHub](https://github.com/JordiCorbilla/ocular-disease-intelligent-recognition-deep-learning)



# Pre-Processing

In [None]:
!ls ODIR-5K_Testing_Images_treated_224/| wc -l

## 1. Image Treatment Process

### Image cropper

In [None]:
# odir_image_crop.py 
import cv2
import numpy as np
import logging
import os


class ImageCrop:
    def __init__(self, source_folder, destination_folder, file_name):
        self.logger = logging.getLogger('odir')
        self.source_folder = source_folder
        self.destination_folder = destination_folder
        self.file_name = file_name

    def remove_black_pixels(self):
        file = os.path.join(self.source_folder, self.file_name)
        image = cv2.imread(file)

        # Mask of coloured pixels.
        mask = image > 0

        # Coordinates of coloured pixels.
        coordinates = np.argwhere(mask)

        # Binding box of non-black pixels.
        x0, y0, s0 = coordinates.min(axis=0)
        x1, y1, s1 = coordinates.max(axis=0) + 1  # slices are exclusive at the top

        # Get the contents of the bounding box.
        cropped = image[x0:x1, y0:y1]
        # overwrite the same file
        file_cropped = os.path.join(self.destination_folder, self.file_name)
        cv2.imwrite(file_cropped, cropped)

### Removing black pixels

In [None]:
import logging
import logging.config
from os import listdir
from os.path import isfile, join

# Note that this will alter the current training image set folder

def process_all_images():
    files = [f for f in listdir(source_folder) if isfile(join(source_folder, f))]
    for file in files:
        logger.debug('Processing image: ' + file)
        ImageCrop(source_folder, destination_folder, file).remove_black_pixels()

source_folder = r'ODIR-5K_Training_Dataset'
destination_folder = r'ODIR-5K_Training_Dataset_cropped'
# create logger
logging.config.fileConfig('logging.conf')
logger = logging.getLogger('odir')
process_all_images()

In [None]:
import logging
import logging.config
from os import listdir
from os.path import isfile, join

# Note that this will alter the current training image set folder

def process_all_images():
    files = [f for f in listdir(source_folder) if isfile(join(source_folder, f))]
    for file in files:
        logger.debug('Processing image: ' + file)
        ImageCrop(source_folder, destination_folder, file).remove_black_pixels()


source_folder = r'ODIR-5K_Testing_Images'
destination_folder = r'ODIR-5K_Testing_Images_cropped'
# create logger
logging.config.fileConfig('logging.conf')
logger = logging.getLogger('odir')
process_all_images()

### Image resizer

In [None]:
# odir_image_resizer.py
import logging
import PIL
import os
from PIL import Image

# This class allows you to resize and mirror an image of the dataset according to specific rules


class ImageResizer:
    def __init__(self, image_width, quality, source_folder, destination_folder, file_name, keep_aspect_ratio):
        self.logger = logging.getLogger('odir')
        self.image_width = image_width
        self.quality = quality
        self.source_folder = source_folder
        self.destination_folder= destination_folder
        self.file_name = file_name
        self.keep_aspect_ration = keep_aspect_ratio

    def run(self):
        """ Runs the image library using the constructor arguments.
        Args:
          No arguments are required.
        Returns:
          Saves the treated image into a separate folder.
        """
        # We load the original file, we resize it to a smaller width and correspondent height and
        # also mirror the image when we find a right eye image so they are all left eyes

        file = os.path.join(self.source_folder, self.file_name)
        img = Image.open(file)
        if self.keep_aspect_ration:
            # it will have the exact same width-to-height ratio as the original photo
            width_percentage = (self.image_width / float(img.size[0]))
            height_size = int((float(img.size[1]) * float(width_percentage)))
            img = img.resize((self.image_width, height_size), PIL.Image.ANTIALIAS)
        else:
            # This will force the image to be square
            img = img.resize((self.image_width, self.image_width), PIL.Image.ANTIALIAS)
        if "right" in self.file_name:
            self.logger.debug("Right eye image found. Flipping it")
            img.transpose(Image.FLIP_LEFT_RIGHT).save(os.path.join(self.destination_folder, self.file_name), optimize=True, quality=self.quality)
        else:
            img.save(os.path.join(self.destination_folder, self.file_name), optimize=True, quality=self.quality)
        self.logger.debug("Image saved")

### Resizing the images to 224 pixels

In [None]:
import logging
import logging.config
from os import listdir
from os.path import isfile, join

# This default job to 224px images, will shrink the dataset from 1,439,776,768 bytes
# to 116,813,824 bytes 91.8% size reduction


def process_all_images():
    files = [f for f in listdir(source_folder) if isfile(join(source_folder, f))]
    for file in files:
        logger.debug('Processing image: ' + file)
        ImageResizer(image_width, quality, source_folder, destination_folder, file, keep_aspect_ratio).run()


# Set the base width of the image to 200 pixels
image_width = 224
keep_aspect_ratio = False
# set the quality of the resultant jpeg to 100%
quality = 100
source_folder = r'ODIR-5K_Training_Dataset_cropped'
destination_folder = r'ODIR-5K_Training_Dataset_treated' + '_' + str(image_width)
# create logger
logging.config.fileConfig('logging.conf')
logger = logging.getLogger('odir')
process_all_images()

In [None]:
import logging
import logging.config
from os import listdir
from os.path import isfile, join

# This default job to 224px images, will shrink the dataset from 1,439,776,768 bytes
# to 116,813,824 bytes 91.8% size reduction

def process_all_images():
    files = [f for f in listdir(source_folder) if isfile(join(source_folder, f))]
    for file in files:
        logger.debug('Processing image: ' + file)
        ImageResizer(image_width, quality, source_folder, destination_folder, file, keep_aspect_ratio).run()


# Set the base width of the image to 200 pixels
image_width = 224
keep_aspect_ratio = False
# set the quality of the resultant jpeg to 100%
quality = 100
source_folder = r'ODIR-5K_Testing_Images_cropped'
destination_folder = r'ODIR-5K_Testing_Images_treated' + '_' + str(image_width)
# create logger
logging.config.fileConfig('logging.conf')
logger = logging.getLogger('odir')
process_all_images()

## 2. Data Augmentation

### Image treatment

In [None]:
import numpy as np
import tensorflow as tf
from skimage import exposure


class ImageTreatment:
    def __init__(self, image_size):
        self.image_size = image_size

    def scaling(self, image, scale_vector):
        # Resize to 4-D vector
        image = np.reshape(image, (1, self.image_size, self.image_size, 3))
        boxes = np.zeros((len(scale_vector), 4), dtype=np.float32)
        for index, scale in enumerate(scale_vector):
            x1 = y1 = 0.5 - 0.5 * scale
            x2 = y2 = 0.5 + 0.5 * scale
            boxes[index] = np.array([y1, x1, y2, x2], dtype=np.float32)
        box_ind = np.zeros((len(scale_vector)), dtype=np.int32)
        crop_size = np.array([self.image_size, self.image_size], dtype=np.int32)

        output = tf.image.crop_and_resize(image, boxes, box_ind, crop_size)
        output = np.array(output, dtype=np.uint8)
        return output

    def brightness(self, image, delta):
        output = tf.image.adjust_brightness(image, delta)
        output = np.array(output, dtype=np.uint8)
        return output

    def contrast(self, image, contrast_factor):
        output = tf.image.adjust_contrast(image, contrast_factor)
        output = np.array(output, dtype=np.uint8)
        return output

    def saturation(self, image, saturation_factor):
        output = tf.image.adjust_saturation(image, saturation_factor)
        output = np.array(output, dtype=np.uint8)
        return output

    def hue(self, image, delta):
        output = tf.image.adjust_hue(image, delta)
        output = np.array(output, dtype=np.uint8)
        return output

    def central_crop(self, image, central_fraction):
        output = tf.image.central_crop(image, central_fraction)
        output = np.array(output, dtype=np.uint8)
        return output

    def crop_to_bounding_box(self, image, offset_height, offset_width, target_height, target_width):
        output = tf.image.crop_to_bounding_box(image, offset_height, offset_width, target_height, target_width)
        output = tf.image.resize(output, (self.image_size, self.image_size))
        output = np.array(output, dtype=np.uint8)
        return output

    def gamma(self, image, gamma):
        output = tf.image.adjust_gamma(image, gamma)
        output = np.array(output, dtype=np.uint8)
        return output

    def rot90(self, image, k):
        output = tf.image.rot90(image, k)
        output = np.array(output, dtype=np.uint8)
        return output

    def rescale_intensity(self, image):
        p2, p98 = np.percentile(image, (2, 98))
        img_rescale = exposure.rescale_intensity(image, in_range=(p2, p98))
        return img_rescale

    def equalize_histogram(self, image):
        img_eq = exposure.equalize_hist(image)
        return img_eq

    def equalize_adapthist(self, image):
        img_adapted = exposure.equalize_adapthist(image, clip_limit=0.03)
        return img_adapted

### Data augmentation strategy

In [None]:
import csv
import os
import cv2

class DataAugmentationStrategy:
    def __init__(self, image_size, file_name):
        self.base_image = file_name
        self.treatment = ImageTreatment(image_size)
        self.file_path = r'ODIR-5K_Training_Dataset_treated_' + str(image_size)
        self.saving_path = r'ODIR-5K_Training_Dataset_augmented_' + str(image_size)
        self.file_id = file_name.replace('.jpg', '')

    def save_image(self, original_vector, image, sample):
        central = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        file = self.file_id + '_'+str(sample)+'.jpg'
        file_name = os.path.join(self.saving_path, file)
        exists = os.path.isfile(file_name)
        if exists:
            print("duplicate file found: " + file_name)

        status = cv2.imwrite(file_name, central)

        with open(r'ground_truth\odir_augmented.csv', 'a', newline='') as csv_file:
            file_writer = csv.writer(csv_file, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
            file_writer.writerow([file, original_vector[1], original_vector[2], original_vector[3], original_vector[4],
                                   original_vector[5], original_vector[6], original_vector[7], original_vector[8]])

        #print(file_name + " written to file-system : ", status)

    def generate_images(self, number_samples, original_vector, weights):
        eye_image = os.path.join(self.file_path, self.base_image)
        image = cv2.imread(eye_image)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        original_image = image
        saved = 0

        # For any repeating elements, just give the other output
        # We are only expecting up to 3 repetitions
        if weights == 20:
            original_image = self.treatment.rot90(original_image, 2)
        if weights == 400:
            original_image = self.treatment.rot90(original_image, 3)
        if weights > 401:
            print(str(self.file_id) + ' samples:' + str(number_samples))
            raise ValueError('this cannot happen')

        # for the sample type 14, just generate 1 image and leave the method
        if number_samples == 14:
            central = self.treatment.rot90(original_image, 1)
            self.save_image(original_vector, central, weights+14)
            saved = saved +1
            return saved

        if number_samples > 0:
            central = self.treatment.crop_to_bounding_box(original_image, 0, 0, 112, 112)
            self.save_image(original_vector, central, weights+0)
            saved = saved + 1

        if number_samples > 1:
            central = self.treatment.crop_to_bounding_box(original_image, 112, 0, 112, 112)
            self.save_image(original_vector, central, weights+1)
            saved = saved + 1

        if number_samples > 2:
            central = self.treatment.crop_to_bounding_box(original_image, 0, 112, 112, 112)
            self.save_image(original_vector, central, weights+2)
            saved = saved + 1

        if number_samples > 3:
            central = self.treatment.crop_to_bounding_box(original_image, 112, 112, 112, 112)
            self.save_image(original_vector, central, weights+3)
            saved = saved + 1

        if number_samples > 4:
            vector = [0.50]
            central = self.treatment.scaling(original_image, vector)
            self.save_image(original_vector, central[0], weights+4)
            saved = saved + 1

        if number_samples > 5:
            vector = [0.70]
            central = self.treatment.scaling(original_image, vector)
            self.save_image(original_vector, central[0], weights+5)
            saved = saved + 1

        if number_samples > 6:
            vector = [0.80]
            central = self.treatment.scaling(original_image, vector)
            self.save_image(original_vector, central[0], weights+6)
            saved = saved + 1

        if number_samples > 7:
            vector = [0.90]
            central = self.treatment.scaling(original_image, vector)
            self.save_image(original_vector, central[0], weights+7)
            saved = saved + 1

        if number_samples > 8:
            central = self.treatment.rescale_intensity(original_image)
            self.save_image(original_vector, central, weights+8)
            saved = saved + 1

        if number_samples > 9:
            central = self.treatment.contrast(original_image, 2)
            self.save_image(original_vector, central, weights+9)
            saved = saved + 1

        if number_samples > 10:
            central = self.treatment.saturation(original_image, 0.5)
            self.save_image(original_vector, central, weights+10)
            saved = saved + 1

        if number_samples > 11:
            central = self.treatment.gamma(original_image, 0.5)
            self.save_image(original_vector, central, weights+11)
            saved = saved + 1

        if number_samples > 12:
            central = self.treatment.hue(original_image, 0.2)
            self.save_image(original_vector, central, weights+12)
            saved = saved + 1

        return saved

### Groung truth files

In [None]:
import csv

class GroundTruthFiles:
    def __init__(self):
        self.amd = []
        self.cataract = []
        self.diabetes = []
        self.glaucoma = []
        self.hypertension = []
        self.myopia = []
        self.others = []

    def populate_vectors(self, ground_truth_file):
        with open(ground_truth_file) as csvDataFile:
            csv_reader = csv.reader(csvDataFile)

            for row in csv_reader:
                column_id = row[0]
                normal = row[1]
                diabetes = row[2]
                glaucoma = row[3]
                cataract = row[4]
                amd = row[5]
                hypertension = row[6]
                myopia = row[7]
                others = row[8]
                # just discard the first row
                if column_id != "ID":
                    print("Processing image: " + column_id + "_left.jpg")
                    if diabetes == '1':
                        self.diabetes.append([column_id, normal, diabetes, glaucoma, cataract, amd, hypertension, myopia, others])
                    if glaucoma == '1':
                        self.glaucoma.append([column_id, normal, diabetes, glaucoma, cataract, amd, hypertension, myopia, others])
                    if cataract == '1':
                        self.cataract.append([column_id, normal, diabetes, glaucoma, cataract, amd, hypertension, myopia, others])
                    if amd == '1':
                        self.amd.append([column_id, normal, diabetes, glaucoma, cataract, amd, hypertension, myopia, others])
                    if hypertension == '1':
                        self.hypertension.append([column_id, normal, diabetes, glaucoma, cataract, amd, hypertension, myopia, others])
                    if myopia == '1':
                        self.myopia.append([column_id, normal, diabetes, glaucoma, cataract, amd, hypertension, myopia, others])
                    if others == '1':
                        self.others.append([column_id, normal, diabetes, glaucoma, cataract, amd, hypertension, myopia, others])

In [None]:
import csv
import logging.config
import os
from absl import app


def write_header():
    with open(r'ground_truth/odir_augmented.csv', 'w', newline='') as csv_file:
        file_writer = csv.writer(csv_file, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
        file_writer.writerow(['ID', 'Normal', 'Diabetes', 'Glaucoma', 'Cataract', 'AMD', 'Hypertension',
                              'Myopia', 'Others'])
        return file_writer


def process_files(images, cache, files):
    total = 0
    for strategy in range(len(images)):
        images_to_process = images[strategy][0]
        samples_per_image = images[strategy][1]
        for image_index in range(images_to_process):
            image_vector = files[image_index]
            file_name = image_vector[0]

            # Only check during the first strategy
            if strategy == 0:
                if file_name not in cache:
                    cache[file_name] = 1
                else:
                    cache[file_name] = cache[file_name] * 20

            # print('Processing: ' + file_name)
            augment = DataAugmentationStrategy(image_size, file_name)
            count = augment.generate_images(samples_per_image, image_vector, cache[file_name])
            total = total + count
    return total


# def main(argv):
def main():
    # load the ground truth file
    files = GroundTruthFiles()
    files.populate_vectors(csv_path)

    print('files record count order by size ASC')
    print('hypertension ' + str(len(files.hypertension)))
    print('myopia ' + str(len(files.myopia)))
    print('cataract ' + str(len(files.cataract)))
    print('amd ' + str(len(files.amd)))
    print('glaucoma ' + str(len(files.glaucoma)))
    print('others ' + str(len(files.others)))
    print('diabetes ' + str(len(files.diabetes)))

    images_hypertension = [[len(files.hypertension), 13], [128, 14]]
    images_myopia = [[len(files.myopia), 9], [196, 14]]
    images_cataract = [[len(files.cataract), 9], [66, 14]]
    images_amd = [[len(files.amd), 9], [16, 14]]
    images_glaucoma = [[len(files.glaucoma), 7], [312, 14]]
    images_others = [[len(files.others), 1], [568, 14]]
    images_diabetes = [[1038, 1]]

    # Delete previous file
    exists = os.path.isfile(r'ground_truth/odir_augmented.csv')
    if exists:
        os.remove(r'ground_truth/odir_augmented.csv')

    write_header()

    images_processed = {}

    total_hypertension = process_files(images_hypertension, images_processed, files.hypertension)
    total_myopia = process_files(images_myopia, images_processed, files.myopia)
    total_cataract = process_files(images_cataract, images_processed, files.cataract)
    total_amd = process_files(images_amd, images_processed, files.amd)
    total_glaucoma = process_files(images_glaucoma, images_processed, files.glaucoma)
    total_others = process_files(images_others, images_processed, files.others)
    total_diabetes = process_files(images_diabetes, images_processed, files.diabetes)

    print("total generated hypertension: " + str(total_hypertension))
    print("total generated myopia: " + str(total_myopia))
    print("total generated cataract: " + str(total_cataract))
    print("total generated amd: " + str(total_amd))
    print("total generated glaucoma: " + str(total_glaucoma))
    print("total generated others: " + str(total_others))
    print("total generated diabetes: " + str(total_diabetes))

# create logger
logging.config.fileConfig('logging.conf')
logger = logging.getLogger('odir')
image_size = 224
csv_path = 'ground_truth/odir.csv'

In [None]:
# app.run(main)
main()

## 3. Image to tf.Data conversion and .npy storage

In [None]:
from absl import app
import logging
import logging.config
import time
import csv
import cv2
import os
import numpy as np
import glob


class NumpyDataGenerator:
    def __init__(self, training_path, testing_path, csv_path, csv_testing_path, augmented_path, csv_augmented_file):
        self.training_path = training_path
        self.testing_path = testing_path
        self.csv_path = csv_path
        self.csv_testing_path = csv_testing_path
        self.logger = logging.getLogger('odir')
        self.total_records_training = 0
        self.total_records_testing = 0
        self.csv_augmented_path = csv_augmented_file
        self.augmented_path = augmented_path

    def npy_training_files(self, file_name_training, file_name_training_labels):
        training = []
        training_labels = []

        self.logger.debug("Opening CSV file")
        with open(self.csv_path) as csvDataFile:
            csv_reader = csv.reader(csvDataFile)
            self.total_records_training = 0
            for row in csv_reader:
                column_id = row[0]
                normal = row[1]
                diabetes = row[2]
                glaucoma = row[3]
                cataract = row[4]
                amd = row[5]
                hypertension = row[6]
                myopia = row[7]
                others = row[8]
                # just discard the first row
                if column_id != "ID":
                    self.logger.debug("Processing image: " + column_id)
                    # load first the image from the folder
                    eye_image = os.path.join(self.training_path, column_id)
                    image = cv2.imread(eye_image)
                    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                    training.append(image)
                    training_labels.append([normal, diabetes, glaucoma, cataract, amd, hypertension, myopia, others])
                    self.total_records_training = self.total_records_training + 1

        training = np.array(training, dtype='uint8')
        training_labels = np.array(training_labels, dtype='uint8')
        # convert (number of images x height x width x number of channels) to (number of images x (height * width *3))
        # for example (6069 * 28 * 28 * 3)-> (6069 x 2352) (14,274,288)
        training = np.reshape(training, [training.shape[0], training.shape[1], training.shape[2], training.shape[3]])

        # save numpy array as .npy formats
        np.save(file_name_training, training)
        self.logger.debug("Saving NPY File: " + file_name_training)
        np.save(file_name_training_labels, training_labels)
        self.logger.debug("Saving NPY File: " + file_name_training_labels)
        self.logger.debug("Closing CSV file")

    def npy_testing_files(self, file_name_testing, file_name_testing_labels):
        testing = []
        testing_labels = []

        self.logger.debug("Opening CSV file")
        with open(self.csv_testing_path) as csvDataFile:
            csv_reader = csv.reader(csvDataFile)
            self.total_records_testing = 0
            for row in csv_reader:
                column_id = row[0]
                normal = row[1]
                diabetes = row[2]
                glaucoma = row[3]
                cataract = row[4]
                amd = row[5]
                hypertension = row[6]
                myopia = row[7]
                others = row[8]
                # just discard the first row
                if column_id != "ID":
                    self.logger.debug("Processing image: " + column_id + "_left.jpg")
                    # load first the image from the folder
                    eye_image = os.path.join(self.testing_path, column_id + "_left.jpg")
                    image = cv2.imread(eye_image)
                    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                    testing.append(image)
                    testing_labels.append([normal, diabetes, glaucoma, cataract, amd, hypertension, myopia, others])
                    self.total_records_testing = self.total_records_testing + 1

                    self.logger.debug("Processing image: " + column_id + "_right.jpg")
                    eye_image = os.path.join(self.testing_path, column_id + "_right.jpg")
                    image = cv2.imread(eye_image)
                    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                    testing.append(image)
                    testing_labels.append([normal, diabetes, glaucoma, cataract, amd, hypertension, myopia, others])
                    self.total_records_testing = self.total_records_testing + 1

        testing = np.array(testing, dtype='uint8')
        training_labels = np.array(testing_labels, dtype='uint8')
        # convert (number of images x height x width x number of channels) to (number of images x (height * width *3))
        # for example (6069 * 28 * 28 * 3)-> (6069 x 2352) (14,274,288)
        testing = np.reshape(testing, [testing.shape[0], testing.shape[1], testing.shape[2], testing.shape[3]])

        # save numpy array as .npy formats
        np.save(file_name_testing, testing)
        self.logger.debug("Saving NPY File: " + file_name_testing)
        np.save(file_name_testing_labels, training_labels)
        self.logger.debug("Saving NPY File: " + file_name_testing_labels)
        self.logger.debug("Closing CSV file")

    def npy_training_files_split(self, split_number, file_name_training, file_name_training_labels, file_name_testing,
                                 file_name_testing_labels):
        training = []
        training_labels = []
        testing = []
        testing_labels = []

        self.logger.debug("Opening CSV file")
        count = 0
        with open(self.csv_path) as csvDataFile:
            csv_reader = csv.reader(csvDataFile)
            self.total_records_training = 0
            self.total_records_testing = 0
            for row in csv_reader:
                column_id = row[0]
                label = row[1]
                # just discard the first row
                if column_id != "ID":
                    self.logger.debug("Processing image: " + column_id)
                    # load first the image from the folder
                    eye_image = os.path.join(self.training_path, column_id)
                    image = cv2.imread(eye_image)
                    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                    if count < split_number:
                        testing.append(image)
                        testing_labels.append(label)
                        self.total_records_testing = self.total_records_testing + 1
                    else:
                        training.append(image)
                        training_labels.append(label)
                        self.total_records_training = self.total_records_training + 1
                    count = count + 1

        testing = np.array(testing, dtype='uint8')
        testing_labels = np.array(testing_labels, dtype='uint8')
        testing = np.reshape(testing, [testing.shape[0], testing.shape[1], testing.shape[2], testing.shape[3]])

        # save numpy array as .npy formats
        np.save(file_name_testing, testing)
        np.save(file_name_testing_labels, testing_labels)

        training = np.array(training, dtype='uint8')
        training_labels = np.array(training_labels, dtype='uint8')
        # convert (number of images x height x width x number of channels) to (number of images x (height * width *3))
        # for example (6069 * 28 * 28 * 3)-> (6069 x 2352) (14,274,288)
        training = np.reshape(training, [training.shape[0], training.shape[1], training.shape[2], training.shape[3]])

        # save numpy array as .npy formats
        np.save(file_name_training, training)
        self.logger.debug("Saving NPY File: " + file_name_training)
        np.save(file_name_training_labels, training_labels)
        self.logger.debug("Saving NPY File: " + file_name_training_labels)
        self.logger.debug("Closing CSV file")

    def is_sickness(self, row, sickness):
        switcher = {
            "normal": row[1] == '1' and row[2] == '0' and row[3] == '0' and row[4] == '0' and row[5] == '0' and row[
                6] == '0' and row[7] == '0' and row[8] == '0',
            "diabetes": row[1] == '0' and row[2] == '1' and row[3] == '0' and row[4] == '0' and row[5] == '0' and row[
                6] == '0' and row[7] == '0' and row[8] == '0',
            "glaucoma": row[1] == '0' and row[2] == '0' and row[3] == '1' and row[4] == '0' and row[5] == '0' and row[
                6] == '0' and row[7] == '0' and row[8] == '0',
            "cataract": row[1] == '0' and row[2] == '0' and row[3] == '0' and row[4] == '1' and row[5] == '0' and row[
                6] == '0' and row[7] == '0' and row[8] == '0',
            "amd": row[1] == '0' and row[2] == '0' and row[3] == '0' and row[4] == '0' and row[5] == '1' and row[
                6] == '0' and row[7] == '0' and row[8] == '0',
            "hypertension": row[1] == '0' and row[2] == '0' and row[3] == '0' and row[4] == '0' and row[5] == '0' and
                            row[6] == '1' and row[7] == '0' and row[8] == '0',
            "myopia": row[1] == '0' and row[2] == '0' and row[3] == '0' and row[4] == '0' and row[5] == '0' and row[
                6] == '0' and row[7] == '1' and row[8] == '0',
            "others": row[1] == '0' and row[2] == '0' and row[3] == '0' and row[4] == '0' and row[5] == '0' and row[
                6] == '0' and row[7] == '0' and row[8] == '1'
        }
        return switcher.get(sickness, False)

    def npy_training_files_split_all(self, split_number, file_name_training, file_name_training_labels,
                                     file_name_testing,
                                     file_name_testing_labels, include_augmented):
        split_factor = 10820
        training = []
        training_labels = []
        training_2 = []
        training_labels_2 = []
        testing = []
        testing_labels = []
        images_used = []
        count_images = 0

        class_names = ['normal', 'diabetes', 'glaucoma', 'cataract', 'amd',
                       'hypertension', 'myopia', 'others']

        self.logger.debug("Opening CSV file")

        class_count = {'normal': 0, 'diabetes': 0, 'glaucoma': 0, 'cataract': 0, 'amd': 0, 'hypertension': 0,
                       'myopia': 0, 'others': 0}
        split_pocket = split_number / 8
        with open(self.csv_path) as csvDataFile:
            csv_reader = csv.reader(csvDataFile)
            self.total_records_training = 0
            self.total_records_testing = 0
            for row in csv_reader:
                column_id = row[0]
                normal = row[1]
                diabetes = row[2]
                glaucoma = row[3]
                cataract = row[4]
                amd = row[5]
                hypertension = row[6]
                myopia = row[7]
                others = row[8]
                # just discard the first row
                if column_id != "ID":
                    self.logger.debug("Processing image: " + column_id)
                    # load first the image from the folder
                    eye_image = os.path.join(self.training_path, column_id)
                    image = cv2.imread(eye_image)
                    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                    found = False
                    for sickness in class_names:
                        if self.is_sickness(row, sickness) and class_count[sickness] < split_pocket:
                            testing.append(image)
                            images_used.append(row[0] + ',' + sickness + ',' + str(class_count[sickness]))
                            testing_labels.append([normal, diabetes, glaucoma, cataract, amd, hypertension, myopia, others])
                            self.total_records_testing = self.total_records_testing + 1
                            class_count[sickness] = class_count[sickness] + 1
                            found = True
                            logger.debug('found ' + sickness + ' ' + str(class_count[sickness]))

                    if not found:
                        training.append(image)
                        training_labels.append([normal, diabetes, glaucoma, cataract, amd, hypertension, myopia, others])
                        self.total_records_training = self.total_records_training + 1
                        count_images = count_images + 1

        if include_augmented:
            with open(self.csv_augmented_path) as csvDataFile:
                csv_reader = csv.reader(csvDataFile)
                for row in csv_reader:
                    column_id = row[0]
                    normal = row[1]
                    diabetes = row[2]
                    glaucoma = row[3]
                    cataract = row[4]
                    amd = row[5]
                    hypertension = row[6]
                    myopia = row[7]
                    others = row[8]
                    # just discard the first row
                    if column_id != "ID":
                        self.logger.debug("Processing image: " + column_id)
                        # load first the image from the folder
                        eye_image = os.path.join(self.augmented_path, column_id)
                        image = cv2.imread(eye_image)
                        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                        if count_images >= split_factor:
                            training_2.append(image)
                            training_labels_2.append([normal, diabetes, glaucoma, cataract, amd, hypertension, myopia, others])
                        else:
                            training.append(image)
                            training_labels.append([normal, diabetes, glaucoma, cataract, amd, hypertension, myopia, others])
                        self.total_records_training = self.total_records_training + 1
                        count_images = count_images + 1

        testing = np.array(testing, dtype='uint8')
        testing_labels = np.array(testing_labels, dtype='uint8')
        testing = np.reshape(testing, [testing.shape[0], testing.shape[1], testing.shape[2], testing.shape[3]])

        # save numpy array as .npy formats
        np.save(file_name_testing, testing)
        np.save(file_name_testing_labels, testing_labels)

        training = np.array(training, dtype='uint8')
        training_labels = np.array(training_labels, dtype='uint8')
        # convert (number of images x height x width x number of channels) to (number of images x (height * width *3))
        # for example (6069 * 28 * 28 * 3)-> (6069 x 2352) (14,274,288)
        training = np.reshape(training, [training.shape[0], training.shape[1], training.shape[2], training.shape[3]])

        # convert (number of images x height x width x number of channels) to (number of images x (height * width *3))
        # for example (6069 * 28 * 28 * 3)-> (6069 x 2352) (14,274,288)
        if include_augmented:
            training_2 = np.array(training_2, dtype='uint8')
            training_labels_2 = np.array(training_labels_2, dtype='uint8')
            training_2 = np.reshape(training_2, [training_2.shape[0], training_2.shape[1], training_2.shape[2], training_2.shape[3]])

        self.logger.debug(testing.shape)
        self.logger.debug(testing_labels.shape)
        self.logger.debug(training.shape)
        self.logger.debug(training_labels.shape)
        if include_augmented:
            self.logger.debug(training_2.shape)
            self.logger.debug(training_labels_2.shape)

        # save numpy array as .npy formats
        np.save(file_name_training + '_1', training)
        np.save(file_name_training_labels + '_1', training_labels)
        if include_augmented:
            np.save(file_name_training + '_2', training_2)
            np.save(file_name_training_labels + '_2', training_labels_2)
        self.logger.debug("Closing CSV file")
        for sickness in class_names:
            self.logger.debug('found ' + sickness + ' ' + str(class_count[sickness]))
        csv_writer = csv.writer(open("files_used.csv", 'w', newline=''))
        for item in images_used:
            self.logger.debug(item)
            entries = item.split(",")
            csv_writer.writerow(entries)


# def main(argv):
def main():
    start = time.time()
    image_width = 224
    training_path = r'ODIR-5K_Training_Dataset_treated' + '_' + str(image_width)
    testing_path = r'ODIR-5K_Testing_Images_treated' + '_' + str(image_width)
    augmented_path = r'ODIR-5K_Training_Dataset_augmented' + '_' + str(image_width)
    csv_file = r'ground_truth/odir.csv'
    csv_augmented_file = r'ground_truth/odir_augmented.csv'
    training_file = r'ground_truth/testing_default_value.csv'
    logger.debug('Generating npy files')
    generator = NumpyDataGenerator(training_path, testing_path, csv_file, training_file, augmented_path,
                                   csv_augmented_file)

    # Generate testing file
    generator.npy_testing_files('odir_testing_challenge' + '_' + str(image_width), 'odir_testing_labels_challenge' + '_' + str(image_width))

    # Generate training file
    generator.npy_training_files('odir_training', 'odir_training_labels')
    generator.npy_training_files_split(1000, 'odir_training',
    'odir_training_labels', 'odir_testing', 'odir_testing_labels')

    generator.npy_training_files_split_all(400, 'odir_training' + '_' + str(image_width),
                                           'odir_training_labels' + '_' + str(image_width),
                                           'odir_testing' + '_' + str(image_width),
                                           'odir_testing_labels' + '_' + str(image_width),
                                           True)
    end = time.time()
    logger.debug('Training Records ' + str(generator.total_records_training))
    logger.debug('Testing Records ' + str(generator.total_records_testing))
    logger.debug('All Done in ' + str(end - start) + ' seconds')

In [None]:
# create logger
logging.config.fileConfig('logging.conf')
logger = logging.getLogger('odir')

In [None]:
# app.run(main)
main()

# Inception v3 model

## Required Functions

In [None]:
# odir_advance_plotting.py

from __future__ import absolute_import, division, print_function, unicode_literals

import sys

import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import numpy as np
import seaborn as sns
import matplotlib as mpl


class Plotter:
    def __init__(self, class_names):
        self.class_names = class_names

    def plot_metrics(self, history, test_run, index):
        metrics2 = ['loss', 'auc', 'precision', 'recall']
        for n, metric in enumerate(metrics2):
            name = metric.replace("_", " ").capitalize()
            plt.subplot(2, 2, n + 1)
            plt.plot(history.epoch, history.history[metric], color='green', label='Train')
            plt.plot(history.epoch, history.history['val_' + metric], color='green', linestyle="--", label='Val')
            plt.xlabel('Epoch')
            plt.ylabel(name)
            if metric == 'loss':
                plt.ylim([0, plt.ylim()[1]])
            elif metric == 'auc':
                plt.ylim([0, 1])
            else:
                plt.ylim([0, 1])

            plt.legend()

        #fig_manager = plt.get_current_fig_manager()
        #fig_manager.full_screen_toggle()
        plt.subplots_adjust(top=0.97, bottom=0.09, left=0.10, right=0.96, hspace=0.25, wspace=0.26)
        plt.savefig(test_run)
        plt.show()
        plt.close()

    def plot_input_images(self, x_train, y_train):
        plt.figure(figsize=(9, 9))
        for i in range(100):
            plt.subplot(10, 10, i + 1)
            plt.xticks([])
            plt.yticks([])
            plt.grid(False)
            plt.imshow(x_train[i])
            classes = ""
            for j in range(8):
                if y_train[i][j] >= 0.5:
                    classes = classes + self.class_names[j] + "\n"
            plt.xlabel(classes, fontsize=7, color='black', labelpad=1)

        plt.subplots_adjust(bottom=0.04, right=0.95, top=0.94, left=0.06, wspace=0.56, hspace=0.17)
        plt.show()

    def plot_image(self, i, predictions_array, true_label, img):
        predictions_array, true_label, img = predictions_array[i], true_label[i], img[i]
        plt.grid(False)
        plt.xticks([])
        plt.yticks([])

        plt.imshow(img)
        label_check = [0,0,0,0,0,0,0,0]
        ground = ""
        count_true = 0
        predicted_true = 0

        for index in range(8):
            if true_label[index] >= 0.5:
                count_true = count_true + 1
                ground = ground + self.class_names[index] + "\n"
                label_check[index] = 1
            if predictions_array[index] >= 0.5:
                predicted_true = predicted_true + 1
                label_check[index] = label_check[index] + 1

        all_match = True
        for index in range(8):
            if label_check[index]==1:
                all_match = False

        if count_true == predicted_true and all_match:
            color = 'green'
        else:
            color = 'red'

        first, second, third, i, j, k = self.calculate_3_largest(predictions_array, 8)
        prediction = "{} {:2.0f}% \n".format(self.class_names[i], 100 * first)
        if second >= 0.5:
            prediction = prediction + "{} {:2.0f}% \n".format(self.class_names[j], 100 * second)
        if third >= 0.5:
            prediction = prediction + "{} {:2.0f}% \n".format(self.class_names[k], 100 * third)
        plt.xlabel("Predicted: {} Ground Truth: {}".format(prediction, ground), color=color)

    def plot_accuracy(self, history, new_folder):
        # Hide meanwhile for now
        plt.plot(history.history['accuracy'], label='accuracy')
        plt.plot(history.history['val_accuracy'], label='val_accuracy')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.legend(loc='lower right')
        plt.savefig(new_folder)
        plt.show()

    def calculate_3_largest(self, arr, arr_size):
        if arr_size < 3:
            print(" Invalid Input ")
            return

        third = first = second = -sys.maxsize
        index_1 = 0
        index_2 = 0
        index_3 = 0

        for i in range(0, arr_size):
            if arr[i] > first:
                third = second
                second = first
                first = arr[i]
            elif arr[i] > second:
                third = second
                second = arr[i]
            elif arr[i] > third:
                third = arr[i]

        for i in range(0, arr_size):
            if arr[i] == first:
                index_1 = i
        for i in range(0, arr_size):
            if arr[i] == second and i != index_1:
                index_2 = i
        for i in range(0, arr_size):
            if arr[i] == third and i != index_1 and i!= index_2:
                index_3 = i
        return first, second, third, index_1, index_2, index_3

    def plot_value_array(self, i, predictions_array, true_label):
        predictions_array, true_label = predictions_array[i], true_label[i]
        plt.grid(False)
        plt.xticks([])
        plt.yticks([])
        bar_plot = plt.bar(range(8), predictions_array, color="#777777")
        plt.xticks(range(8), ('N', 'D', 'G', 'C', 'A', 'H', 'M', 'O'))
        plt.ylim([0, 1])

        for j in range(8):
            if true_label[j] >= 0.5:
                bar_plot[j].set_color('green')

        for j in range(8):
            if predictions_array[j] >= 0.5 and true_label[j] < 0.5:
                bar_plot[j].set_color('red')

        def bar_label(rects):
            for rect in rects:
                height = rect.get_height()
                value = height * 100
                if value > 1:
                    plt.annotate('{:2.0f}%'.format(value),
                                 xy=(rect.get_x() + rect.get_width() / 2, height),
                                 xytext=(0, 3),  # 3 points vertical offset
                                 textcoords="offset points",
                                 ha='center', va='bottom')

        bar_label(bar_plot)

    def ensure_test_prediction_exists(self, predictions):
        exists = False
        for j in range(8):
            if predictions[j] >= 0.5:
                exists = True
        return exists

    def plot_output(self, test_predictions_baseline, y_test, x_test_drawing, test_run):
        mpl.rcParams["font.size"] = 7
        num_rows = 5
        num_cols = 5
        num_images = num_rows * num_cols
        plt.figure(figsize=(2 * 2 * num_cols, 2 * num_rows))
        j = 0
        i = 0
        while j < num_images:
            if self.ensure_test_prediction_exists(test_predictions_baseline[i]):
                plt.subplot(num_rows, 2 * num_cols, 2 * j + 1)
                self.plot_image(i, test_predictions_baseline, y_test, x_test_drawing)
                plt.subplot(num_rows, 2 * num_cols, 2 * j + 2)
                self.plot_value_array(i, test_predictions_baseline, y_test)
                j = j + 1
            i = i + 1
            if i > 400:
                break

        plt.subplots_adjust(bottom=0.08, right=0.95, top=0.94, left=0.05, wspace=0.11, hspace=0.56)
        plt.savefig(test_run)
        plt.show()

    def plot_output_single(self, i, test_predictions_baseline, y_test, x_test_drawing):
        plt.figure(figsize=(6, 3))
        plt.subplot(1, 2, 1)
        self.plot_image(i, test_predictions_baseline, y_test, x_test_drawing)
        plt.subplot(1, 2, 2)
        self.plot_value_array(i, test_predictions_baseline, y_test)
        plt.show()

    def plot_confusion_matrix(self, y_true, y_pred, classes, normalize=False, title=None, cmap=plt.cm.Blues):
        """
        This function prints and plots the confusion matrix.
        Normalization can be applied by setting `normalize=True`.
        """
        if not title:
            if normalize:
                title = 'Normalized confusion matrix'
            else:
                title = 'Confusion matrix, without normalization'

        # Compute confusion matrix
        cm = confusion_matrix(y_true.argmax(axis=1), y_pred.argmax(axis=1))
        # Only use the labels that appear in the data
        if normalize:
            cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
            print("Normalized confusion matrix")
        else:
            print('Confusion matrix, without normalization')

        print(cm)

        fig, ax = plt.subplots()
        im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
        ax.figure.colorbar(im, ax=ax)
        # We want to show all ticks...
        ax.set(xticks=np.arange(cm.shape[1]),
               yticks=np.arange(cm.shape[0]),
               # ... and label them with the respective list entries
               # xticklabels=classes, yticklabels=classes,
               title=title,
               ylabel='True label',
               xlabel='Predicted label')
        ax.set_ylim(8.0, -1.0)
        # Rotate the tick labels and set their alignment.
        plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
                 rotation_mode="anchor")

        # Loop over data dimensions and create text annotations.
        fmt = '.2f' if normalize else 'd'
        thresh = cm.max() / 2.
        for i in range(cm.shape[0]):
            for j in range(cm.shape[1]):
                ax.text(j, i, format(cm[i, j], fmt),
                        ha="center", va="center",
                        color="white" if cm[i, j] > thresh else "black")
        fig.tight_layout()
        return ax

    def print_normalized_confusion_matrix(self, y_test, test_predictions_baseline):
        np.set_printoptions(precision=2)

        # Plot non-normalized confusion matrix
        self.plot_confusion_matrix(y_test, test_predictions_baseline, classes=self.class_names,
                                   title='Confusion matrix, without normalization')

        # Plot normalized confusion matrix
        self.plot_confusion_matrix(y_test, test_predictions_baseline, classes=self.class_names, normalize=True,
                                   title='Normalized confusion matrix')

        plt.show()

    def plot_confusion_matrix_generic(self, labels2, predictions, test_run, p=0.5):
        cm = confusion_matrix(labels2.argmax(axis=1), predictions.argmax(axis=1))
        plt.figure(figsize=(6, 6))
        ax = sns.heatmap(cm, annot=True, fmt="d")
        ax.set_ylim(8.0, -1.0)
        plt.title('Confusion matrix')
        plt.ylabel('Actual label')
        plt.xlabel('Predicted label')
        plt.savefig(test_run)
        plt.subplots_adjust(top=0.94, bottom=0.11, left=0.12, right=1.00, hspace=0.20, wspace=0.18)
        plt.show()
        plt.close()

In [None]:
# odir_kappa_score.py
import csv
import os

import numpy as np
from sklearn import metrics


class FinalScore:
    def __init__(self, new_folder):
        self.new_folder = new_folder


    def odir_metrics(self, gt_data, pr_data):
        th = 0.5
        gt = gt_data.flatten()
        pr = pr_data.flatten()
        kappa = metrics.cohen_kappa_score(gt, pr > th)
        f1 = metrics.f1_score(gt, pr > th, average='micro')
        auc = metrics.roc_auc_score(gt, pr)
        final_score = (kappa + f1 + auc) / 3.0
        return kappa, f1, auc, final_score

    def import_data(self, filepath):
        with open(filepath, 'r') as f:
            reader = csv.reader(f)
            header = next(reader)
            pr_data = [[int(row[0])] + list(map(float, row[1:])) for row in reader]
        pr_data = np.array(pr_data)
        return pr_data

    def output(self):
        gt_data = self.import_data(os.path.join(self.new_folder, 'odir_ground_truth.csv'))
        pr_data = self.import_data(os.path.join(self.new_folder, 'odir_predictions.csv'))
        kappa, f1, auc, final_score = self.odir_metrics(gt_data[:, 1:], pr_data[:, 1:])
        print("Kappa score:", kappa)
        print("F-1 score:", f1)
        print("AUC value:", auc)
        print("Final Score:", final_score)

In [None]:
# odir_predictions_writer.py
import csv
import os


class Prediction:
    def __init__(self, prediction, num_images_test, folder = ""):
        self.prediction = prediction
        self.num_images_test = num_images_test
        self.folder = folder

    def save(self):
        """Generate a CSV that contains the output of all the classes.
        Args:
          No arguments are required.
        Returns:
          File with the output
        """
        # The process here is to generate a CSV file with the content of the data annotations file
        # and also the total of labels per eye. This will help us later to process the images
        if self.folder != "":
            folder_to_save = os.path.join(self.folder, 'predictions.csv')
        else:
            folder_to_save = 'predictions.csv'
        with open(folder_to_save, 'w', newline='') as csv_file:
            file_writer = csv.writer(csv_file, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
            file_writer.writerow(['ID', 'Normal', 'Diabetes', 'Glaucoma', 'Cataract', 'AMD', 'Hypertension', 'Myopia', 'Others'])
            count = 0
            for sub in self.prediction:
                normal = sub[0]
                diabetes = sub[1]
                glaucoma = sub[2]
                cataract = sub[3]
                amd = sub[4]
                hypertension = sub[5]
                myopia = sub[6]
                others = sub[7]
                file_writer.writerow([count, normal, diabetes, glaucoma, cataract, amd, hypertension, myopia, others])
                count = count + 1

    def save_all(self, y_test):
        """Generate a CSV that contains the output of all the classes.
        Args:
          No arguments are required.
        Returns:
          File with the output
        """
        # The process here is to generate a CSV file with the content of the data annotations file
        # and also the total of labels per eye. This will help us later to process the images
        if self.folder != "":
            folder_to_save = os.path.join(self.folder, 'odir_predictions.csv')
        else:
            folder_to_save = 'odir_predictions.csv'
        with open(folder_to_save, 'w', newline='') as csv_file:
            file_writer = csv.writer(csv_file, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
            file_writer.writerow(['ID', 'N', 'D', 'G', 'C', 'A', 'H', 'M', 'O'])
            count = 0
            for i in range(self.num_images_test):
                normal = self.prediction[i][0]
                diabetes = self.prediction[i][1]
                glaucoma = self.prediction[i][2]
                cataract = self.prediction[i][3]
                amd = self.prediction[i][4]
                hypertension = self.prediction[i][5]
                myopia = self.prediction[i][6]
                others = self.prediction[i][7]
                file_writer.writerow([count, normal, diabetes, glaucoma, cataract, amd, hypertension, myopia, others])
                count = count + 1

        if self.folder != "":
            folder_to_save = os.path.join(self.folder, 'odir_ground_truth.csv')
        else:
            folder_to_save = 'odir_ground_truth.csv'
        with open(folder_to_save, 'w', newline='') as csv_file:
            file_writer = csv.writer(csv_file, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
            file_writer.writerow(['ID', 'N', 'D', 'G', 'C', 'A', 'H', 'M', 'O'])
            count = 0
            for i in range(self.num_images_test):
                normal2 = y_test[i][0]
                diabetes2 = y_test[i][1]
                glaucoma2 = y_test[i][2]
                cataract2 = y_test[i][3]
                amd2 = y_test[i][4]
                hypertension2 = y_test[i][5]
                myopia2 = y_test[i][6]
                others2 = y_test[i][7]

                file_writer.writerow([count, normal2, diabetes2, glaucoma2, cataract2, amd2, hypertension2, myopia2, others2])
                count = count + 1

In [None]:
# odir.py
import numpy as np


def load_data(image_size, index = 0, challenge = 0):
    """Loads the ODIR dataset.
    Returns:
      Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
    """

    if index == 0:
        x_train = np.load('odir_training' + '_' + str(image_size) + '.npy')
        y_train = np.load('odir_training_labels' + '_' + str(image_size) + '.npy')
    else:
        x_train = np.load('odir_training' + '_' + str(image_size) + '_' + str(index) + '.npy')
        y_train = np.load('odir_training_labels' + '_' + str(image_size) + '_' + str(index) + '.npy')

    if challenge == 0:
        x_test = np.load('odir_testing'+'_' + str(image_size)+'.npy')
        y_test = np.load('odir_testing_labels'+'_' + str(image_size)+'.npy')
    else:
        x_test = np.load('odir_testing_challenge'+'_' + str(image_size)+'.npy')
        y_test = np.load('odir_testing_labels_challenge'+'_' + str(image_size)+'.npy')

    return (x_train, y_train), (x_test, y_test)

In [None]:
def normalize_vgg16(training, testing):
    training, testing = training / 1.0, testing / 1.0

    # training[:, :, 0] -= 103.939
    # training[:, :, 1] -= 116.779
    # training[:, :, 2] -= 123.68
    # training = training.transpose((1, 0, 2))
    # training = np.expand_dims(training, axis=0)
    #
    # testing[:, :, 0] -= 103.939
    # testing[:, :, 1] -= 116.779
    # testing[:, :, 2] -= 123.68
    # testing = testing.transpose((1, 0, 2))
    # testing = np.expand_dims(testing, axis=0)

    training = training[..., ::-1]
    testing = testing[..., ::-1]
    mean = [103.939, 116.779, 123.68]
    training[..., 0] -= mean[0]
    training[..., 1] -= mean[1]
    training[..., 2] -= mean[2]
    testing[..., 0] -= mean[0]
    testing[..., 1] -= mean[1]
    testing[..., 2] -= mean[2]

    #training = (training - training.mean()) / training.std()
    #testing = (testing - testing.mean()) / testing.std()
    return training, testing

## Basic Model

In [None]:
import os
import tensorflow as tf
from tensorflow.keras.applications import inception_v3
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD

# os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/'
import secrets
import matplotlib.pyplot as plt
from tensorflow.keras.optimizers import SGD
from sklearn.utils import class_weight
import numpy as np

# batch_size = 32
batch_size = 16
num_classes = 8
# epochs = 100
epochs = 2
patience = 5

# class_weight = {0: 1.,
#                 1: 1.583802025,
#                 2: 8.996805112,
#                 3: 10.24,
#                 4: 10.05714286,
#                 5: 1.,
#                 6: 1.,
#                 7: 2.505338078}

token = secrets.token_hex(16)
folder = r'/content/drive/MyDrive/EyeDisease_Bahaloo/OcularDisease/Inception_basic_outputs'

new_folder = os.path.join(folder, token)

if not os.path.exists(new_folder):
    os.makedirs(new_folder)

base_model = inception_v3.InceptionV3

base_model = base_model(weights='imagenet', include_top=False)

# Comment this out if you want to train all layers
#for layer in base_model.layers:
#    layer.trainable = False

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(num_classes, activation='sigmoid')(x)
model = Model(inputs=base_model.input, outputs=predictions)
model.summary()

tf.keras.utils.plot_model(model, to_file=os.path.join(new_folder, 'model_inception_v3.png'), show_shapes=True, show_layer_names=True)

defined_metrics = [
    tf.keras.metrics.BinaryAccuracy(name='accuracy'),
    tf.keras.metrics.Precision(name='precision'),
    tf.keras.metrics.Recall(name='recall'),
    tf.keras.metrics.AUC(name='auc'),
]

# Adam Optimizer Example
# model.compile(loss='binary_crossentropy',
#               optimizer=Adam(lr=0.001),
#               metrics=defined_metrics)

# RMSProp Optimizer Example
# model.compile(loss='binary_crossentropy',
#               optimizer='rmsprop',
#               metrics=defined_metrics)

sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
print('Configuration Start -------------------------')
print(sgd.get_config())
print('Configuration End -------------------------')
model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=defined_metrics)

(x_train, y_train), (x_test, y_test) = load_data(224, index=1)

x_test_drawing = x_test

x_train = inception_v3.preprocess_input(x_train)
x_test = inception_v3.preprocess_input(x_test)

class_names = ['Normal', 'Diabetes', 'Glaucoma', 'Cataract', 'AMD', 'Hypertension', 'Myopia', 'Others']

# plot data input
plotter = Plotter(class_names)

callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, mode='min', verbose=1)

#class_weight = class_weight.compute_class_weight('balanced', np.unique(x_train), x_train)

history = model.fit(x_train, y_train,
                    epochs=epochs,
                    batch_size=batch_size,
                    shuffle=True, #class_weight= class_weight,
                    validation_data=(x_test, y_test), callbacks=[callback])


In [None]:
print("saving weights")
model.save(os.path.join(new_folder, 'model_weights.h5'))

print("plotting metrics")
plotter.plot_metrics(history, os.path.join(new_folder, 'plot1.png'), 2)

print("plotting accuracy")
plotter.plot_accuracy(history, os.path.join(new_folder, 'plot2.png'))

print("display the content of the model")
baseline_results = model.evaluate(x_test, y_test, verbose=2)
for name, value in zip(model.metrics_names, baseline_results):
    print(name, ': ', value)
print()

# test a prediction
test_predictions_baseline = model.predict(x_test)
print("plotting confusion matrix")
plotter.plot_confusion_matrix_generic(y_test, test_predictions_baseline, os.path.join(new_folder, 'plot3.png'), 0)

# save the predictions
prediction_writer = Prediction(test_predictions_baseline, 400, new_folder)
prediction_writer.save()
prediction_writer.save_all(y_test)

# show the final score
score = FinalScore(new_folder)
score.output()

# plot output results
plotter.plot_output(test_predictions_baseline, y_test, x_test_drawing, os.path.join(new_folder, 'plot4.png'))

## Enhanced Model

In [None]:
import os
from collections import Sequence

import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import resnet50, inception_v3, vgg16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Input
from tensorflow.keras.optimizers import Adam
# os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/'
import numpy as np
import secrets
import matplotlib.pyplot as plt

# batch_size = 32
batch_size = 16
num_classes = 8
# epochs = 30
epochs = 1


class Generator(Sequence):
    # Class is a dataset wrapper for better training performance
    def __init__(self, x_set, y_set, batch_size=256):
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size
        self.indices = np.arange(self.x.shape[0])

    def __len__(self):
        return np.math.ceil(self.x.shape[0] / self.batch_size)

    def __getitem__(self, idx):
        inds = self.indices[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_x = self.x[inds]
        batch_y = self.y[inds]
        return batch_x, batch_y

    def on_epoch_end(self):
        np.random.shuffle(self.indices)

def generator(train_a, labels_a, train_b, labels_b):
    while True:
        for i in range(len(train_a)):
            yield train_a[i].reshape(1, 224, 224, 3), labels_a[i].reshape(1, 8)
        for i in range(len(train_b)):
            yield train_b[i].reshape(1, 224, 224, 3), labels_b[i].reshape(1, 8)

def generator_validation(test, labels):
    while True:
        for i in range(len(test)):
            yield test[i].reshape(1, 224, 224, 3), labels[i].reshape(1, 8)

token = secrets.token_hex(16)
folder = r'/content/drive/MyDrive/EyeDisease_Bahaloo/OcularDisease/Inception_enhanced_outputs'

newfolder = os.path.join(folder, token)
if not os.path.exists(newfolder):
    os.makedirs(newfolder)

base_model = inception_v3.InceptionV3

base_model = base_model(weights='imagenet', include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(num_classes, activation='sigmoid')(x)
model = Model(inputs=base_model.input, outputs=predictions)
# model.summary()

tf.keras.utils.plot_model(model, to_file=os.path.join(newfolder, 'model_inception_v3.png'), show_shapes=True, show_layer_names=True)


#for layer in base_model.layers:
#    layer.trainable = False

defined_metrics = [
    tf.keras.metrics.BinaryAccuracy(name='accuracy'),
    tf.keras.metrics.Precision(name='precision'),
    tf.keras.metrics.Recall(name='recall'),
    tf.keras.metrics.AUC(name='auc'),
]

model.compile(loss='binary_crossentropy',
              optimizer=Adam(lr=0.001),
              metrics=defined_metrics)

(x_train, y_train), (x_test, y_test) = load_data(224, 1)
(x_train2, y_train2), (x_test, y_test) = load_data(224, 2)

x_test_drawing = x_test

In [None]:
# __________these 3 lines below commented because RAM crashed with them uncommented____________
# x_train = inception_v3.preprocess_input(x_train)
# x_train2 = inception_v3.preprocess_input(x_train2)
# x_test = inception_v3.preprocess_input(x_test)

#print(model.evaluate(x_train, y_train, batch_size=batch_size, verbose=0))
class_names = ['Normal', 'Diabetes', 'Glaucoma', 'Cataract', 'AMD', 'Hypertension', 'Myopia', 'Others']

# plot data input
plotter = Plotter(class_names)

callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, mode='min', verbose=1)

# class_weight = {0: 1.,
#                     1: 1.583802025,
#                     2: 8.996805112,
#                     3: 10.24,
#                     4: 10.05714286,
#                     5: 14.66666667,
#                     6: 10.7480916,
#                     7: 2.505338078} , class_weight=class_weight

In [None]:
history = model.fit(x_train, y_train,
          epochs=1,
          batch_size=batch_size,
          shuffle=True,
          validation_data=(x_test, y_test), callbacks=[callback])

In [None]:
# train_datagen = Generator(x_train, y_train, batch_size)
# # With Data Augmentation
# history = model.fit_generator(generator=generator(x_train, y_train, x_train2, y_train2), steps_per_epoch=len(x_train),
#                                epochs=epochs, verbose=1, callbacks=[callback], validation_data=generator_validation(x_test, y_test),
#                               validation_steps=len(x_test), shuffle=False )

In [None]:
print("saving")
model.save(os.path.join(newfolder, 'model_weights.h5'))

print("plotting")
plotter.plot_metrics(history, os.path.join(newfolder, 'plot1.png'), 2)

# Hide meanwhile for now
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label='val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')
plt.savefig(os.path.join(newfolder, 'plot2.png'))
plt.show()


# display the content of the model
baseline_results = model.evaluate(x_test, y_test, verbose=2)
for name, value in zip(model.metrics_names, baseline_results):
    print(name, ': ', value)
print()

# test a prediction
test_predictions_baseline = model.predict(x_test)
plotter.plot_confusion_matrix_generic(y_test, test_predictions_baseline, os.path.join(newfolder, 'plot3.png'), 0)

# save the predictions
prediction_writer = Prediction(test_predictions_baseline, 400, newfolder)
prediction_writer.save()
prediction_writer.save_all(y_test)

# show the final score
score = FinalScore(newfolder)
score.output()

# plot output results
plotter.plot_output(test_predictions_baseline, y_test, x_test_drawing, os.path.join(newfolder, 'plot4.png'))

## Inference

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals
import logging.config
import os
import tensorflow as tf
from absl import app
# from keras.applications.inception_v3 import keras_utils
from tensorflow.keras.applications import inception_v3

# def main(argv):
def main():
    print(tf.version.VERSION)
    image_size = 224
    test_run = 'zCSA'
    new_folder = r'/content/drive/MyDrive/EyeDisease_Bahaloo/OcularDisease/Inception_basic_outputs/682e72d08d5e4829d4d4250b545e29d2'
    # load the data
    (x_train, y_train), (x_test, y_test) = load_data(image_size, 1)

    class_names = ['Normal', 'Diabetes', 'Glaucoma', 'Cataract', 'AMD', 'Hypertension', 'Myopia', 'Others']

    # plot data input
    plotter = Plotter(class_names)
    plotter.plot_input_images(x_test, y_test)

    x_test_drawing = x_test

    # normalize input based on model
    #____________below commented_______________
    # x_test = inception_v3.preprocess_input(x_test)

    # load one of the test runs
    model = tf.keras.models.load_model(os.path.join(new_folder , 'model_weights.h5'))
    model.summary()

    # display the content of the model
    baseline_results = model.evaluate(x_test, y_test, verbose=2)
    for name, value in zip(model.metrics_names, baseline_results):
        print(name, ': ', value)
    print()

    # test a prediction
    test_predictions_baseline = model.predict(x_test)
    plotter.plot_confusion_matrix_generic(y_test, test_predictions_baseline, test_run, 0)

    # save the predictions
    prediction_writer = Prediction(test_predictions_baseline, 400)
    prediction_writer.save()
    prediction_writer.save_all(y_test)

    # show the final score
    score = FinalScore(new_folder)
    score.output()

    # plot output results
    plotter.plot_output(test_predictions_baseline, y_test, x_test_drawing, test_run)

In [None]:
# create logger
logging.config.fileConfig('logging.conf')
logger = logging.getLogger('odir')
# app.run(main)
main()

# VGG16

## Required Functions

In [None]:
from tensorflow.keras import models, layers
from abc import abstractmethod
import tensorflow as tf


class ModelBase:
    def __init__(self, input_shape, metrics):
        self.input_shape = input_shape
        self.metrics = metrics

    def show_summary(self, model):
        model.summary()

    def plot_summary(self, model, file_name):
        tf.keras.utils.plot_model(model, to_file=file_name, show_shapes=True, show_layer_names=True)

    @abstractmethod
    def compile(self):
        pass


class Advanced(ModelBase):

    def compile(self):
        model = models.Sequential()
        model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=self.input_shape))
        model.add(layers.MaxPooling2D((2, 2)))
        model.add(layers.Dropout(0.2))
        model.add(layers.Conv2D(64, (3, 3), activation='relu'))
        model.add(layers.MaxPooling2D((2, 2)))
        model.add(layers.Conv2D(64, (3, 3), activation='relu'))
        model.add(layers.Dropout(0.2))
        model.add(layers.Flatten())
        model.add(layers.Dense(64, activation='relu'))
        model.add(layers.Dense(8, activation='sigmoid'))
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=self.metrics)
        self.show_summary(model)
        self.plot_summary(model, 'model_advanced.png')
        return model

In [None]:
import tensorflow
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras import Model
from tensorflow.python.keras import Input

class InceptionV1(ModelBase):

    def compile(self):
        input_img = Input(shape=self.input_shape)
        layer_1 = Conv2D(10, (1, 1), padding='same', activation='relu')(input_img)
        layer_1 = Conv2D(10, (3, 3), padding='same', activation='relu')(layer_1)

        layer_2 = Conv2D(10, (1, 1), padding='same', activation='relu')(input_img)
        layer_2 = Conv2D(10, (5, 5), padding='same', activation='relu')(layer_2)

        layer_3 = MaxPooling2D((3, 3), strides=(1, 1), padding='same')(input_img)
        layer_3 = Conv2D(10, (1, 1), padding='same', activation='relu')(layer_3)

        mid_1 = tensorflow.keras.layers.concatenate([layer_1, layer_2, layer_3], axis=3)
        flat_1 = Flatten()(mid_1)

        dense_1 = Dense(1200, activation='relu')(flat_1)
        dense_2 = Dense(600, activation='relu')(dense_1)
        dense_3 = Dense(150, activation='relu')(dense_2)
        output = Dense(8, activation='sigmoid')(dense_3)
        model = Model([input_img], output)

        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=self.metrics)

        self.show_summary(model)
        self.plot_summary(model, 'model_inception_v1.png')

        return model

In [None]:
import tensorflow
from tensorflow.keras import models, layers
from tensorflow.keras.optimizers import SGD

class Vgg16(ModelBase):

    def compile(self):
        x = models.Sequential()
        trainable = False
        # Block 1
        layer = layers.Conv2D(input_shape=self.input_shape, filters=64,kernel_size=(3,3),padding="same", activation="relu")
        layer.trainable = trainable
        x.add(layer)
        layer = layers.Conv2D(filters=64,kernel_size=(3,3),padding="same", activation="relu")
        layer.trainable = trainable
        x.add(layer)
        layer = layers.MaxPooling2D((2, 2), strides=(2, 2))
        layer.trainable = trainable
        x.add(layer)

        # Block 2
        layer = layers.Conv2D(128, kernel_size=(3,3),padding="same", activation="relu")
        layer.trainable = trainable
        x.add(layer)
        layer = layers.Conv2D(128, kernel_size=(3,3),padding="same", activation="relu")
        layer.trainable = trainable
        x.add(layer)
        layer = layers.MaxPooling2D((2, 2), strides=(2, 2))
        layer.trainable = trainable
        x.add(layer)

        # Block 3
        layer = layers.Conv2D(256, kernel_size=(3,3),padding="same", activation="relu")
        layer.trainable = trainable
        x.add(layer)
        layer = layers.Conv2D(256, kernel_size=(3, 3), padding="same", activation="relu")
        layer.trainable = trainable
        x.add(layer)
        layer = layers.Conv2D(256, kernel_size=(3, 3), padding="same", activation="relu")
        layer.trainable = trainable
        x.add(layer)
        layer = layers.MaxPooling2D((2, 2), strides=(2, 2))
        layer.trainable = trainable
        x.add(layer)

        # Block 4
        layer = layers.Conv2D(512, kernel_size=(3, 3), padding="same", activation="relu")
        layer.trainable = trainable
        x.add(layer)
        layer = layers.Conv2D(512, kernel_size=(3, 3), padding="same", activation="relu")
        layer.trainable = trainable
        x.add(layer)
        layer = layers.Conv2D(512, kernel_size=(3, 3), padding="same", activation="relu")
        layer.trainable = trainable
        x.add(layer)
        layer = layers.MaxPooling2D((2, 2), strides=(2, 2))
        layer.trainable = trainable
        x.add(layer)

        # Block 5
        layer = layers.Conv2D(512, kernel_size=(3, 3), padding="same", activation="relu")
        layer.trainable = trainable
        x.add(layer)
        layer = layers.Conv2D(512, kernel_size=(3, 3), padding="same", activation="relu")
        layer.trainable = trainable
        x.add(layer)
        layer = layers.Conv2D(512, kernel_size=(3, 3), padding="same", activation="relu")
        layer.trainable = trainable
        x.add(layer)
        layer = layers.MaxPooling2D((2, 2), strides=(2, 2))
        layer.trainable = trainable
        x.add(layer)

        layer = layers.Flatten()
        layer.trainable = trainable
        x.add(layer)
        layer = layers.Dense(4096, activation='relu')
        layer.trainable = trainable
        x.add(layer)
        layer = layers.Dense(4096, activation='relu')
        layer.trainable = trainable
        x.add(layer)
        layer = layers.Dense(1000, activation='softmax')
        layer.trainable = trainable
        x.add(layer)

        # Transfer learning, load previous weights
        x.load_weights(r'/content/drive/MyDrive/AIMedic/OcularDisease/vgg16_weights_tf_dim_ordering_tf_kernels.h5')

        # Remove last layer
        x.pop()

        # Add new dense layer
        x.add(layers.Dense(8, activation='sigmoid'))
        #optimizer = tensorflow.keras.optimizers.SGD(learning_rate=1e-3)
        sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
        print('Configuration Start -------------------------')
        print(sgd.get_config())
        print('Configuration End -------------------------')
        x.compile(optimizer=sgd, loss='binary_crossentropy', metrics=self.metrics)

        self.show_summary(x)
        self.plot_summary(x, 'model_vgg16net.png')
        return x

In [None]:
import enum

class ModelTypes(enum.Enum):
    vgg16 = 1
    inception_v1 = 2
    advanced_testing = 3
    vgg19 = 4


class Factory:

    def __init__(self, input_shape, metrics):
        self.Makers = {
            ModelTypes.vgg16: Vgg16(input_shape, metrics),
            ModelTypes.inception_v1: InceptionV1(input_shape, metrics),
            ModelTypes.advanced_testing: Advanced(input_shape, metrics),
            ModelTypes.vgg19: Vgg19(input_shape, metrics)
        }

    def compile(self, model_type):
        return self.Makers[model_type].compile()

In [None]:
import tensorflow
from tensorflow.keras import models, layers
from tensorflow.keras.optimizers import SGD

class Vgg19(ModelBase):

    def compile(self):
        x = models.Sequential()
        trainable = False
        # Block 1
        layer = layers.Conv2D(input_shape=self.input_shape, filters=64, kernel_size=(3, 3), padding="same",
                              activation="relu")
        layer.trainable = trainable
        x.add(layer)
        layer = layers.Conv2D(filters=64, kernel_size=(3, 3), padding="same", activation="relu")
        layer.trainable = trainable
        x.add(layer)
        layer = layers.MaxPooling2D((2, 2), strides=(2, 2))
        layer.trainable = trainable
        x.add(layer)

        # Block 2
        layer = layers.Conv2D(128, kernel_size=(3, 3), padding="same", activation="relu")
        layer.trainable = trainable
        x.add(layer)
        layer = layers.Conv2D(128, kernel_size=(3, 3), padding="same", activation="relu")
        layer.trainable = trainable
        x.add(layer)
        layer = layers.MaxPooling2D((2, 2), strides=(2, 2))
        layer.trainable = trainable
        x.add(layer)

        # Block 3
        layer = layers.Conv2D(256, kernel_size=(3, 3), padding="same", activation="relu")
        layer.trainable = trainable
        x.add(layer)
        layer = layers.Conv2D(256, kernel_size=(3, 3), padding="same", activation="relu")
        layer.trainable = trainable
        x.add(layer)
        layer = layers.Conv2D(256, kernel_size=(3, 3), padding="same", activation="relu")
        layer.trainable = trainable
        x.add(layer)
        layer = layers.Conv2D(256, kernel_size=(3, 3), padding="same", activation="relu")
        layer.trainable = trainable
        x.add(layer)
        layer = layers.MaxPooling2D((2, 2), strides=(2, 2))
        layer.trainable = trainable
        x.add(layer)

        # Block 4
        layer = layers.Conv2D(512, kernel_size=(3, 3), padding="same", activation="relu")
        layer.trainable = trainable
        x.add(layer)
        layer = layers.Conv2D(512, kernel_size=(3, 3), padding="same", activation="relu")
        layer.trainable = trainable
        x.add(layer)
        layer = layers.Conv2D(512, kernel_size=(3, 3), padding="same", activation="relu")
        layer.trainable = trainable
        x.add(layer)
        layer = layers.Conv2D(512, kernel_size=(3, 3), padding="same", activation="relu")
        layer.trainable = trainable
        x.add(layer)
        layer = layers.MaxPooling2D((2, 2), strides=(2, 2))
        layer.trainable = trainable
        x.add(layer)

        # Block 5
        layer = layers.Conv2D(512, kernel_size=(3, 3), padding="same", activation="relu")
        layer.trainable = trainable
        x.add(layer)
        layer = layers.Conv2D(512, kernel_size=(3, 3), padding="same", activation="relu")
        layer.trainable = trainable
        x.add(layer)
        layer = layers.Conv2D(512, kernel_size=(3, 3), padding="same", activation="relu")
        layer.trainable = trainable
        x.add(layer)
        layer = layers.Conv2D(512, kernel_size=(3, 3), padding="same", activation="relu")
        layer.trainable = trainable
        x.add(layer)
        layer = layers.MaxPooling2D((2, 2), strides=(2, 2))
        layer.trainable = trainable
        x.add(layer)

        layer = layers.Flatten()
        layer.trainable = trainable
        x.add(layer)
        layer = layers.Dense(4096, activation='relu')
        layer.trainable = trainable
        x.add(layer)
        #layer = layers.Dropout(0.5)
        #layer.trainable = True
        #x.add(layer)
        layer = layers.Dense(4096, activation='relu')
        layer.trainable = trainable
        x.add(layer)
        #layer = layers.Dropout(0.5)
        #layer.trainable = True
        #x.add(layer)
        layer = layers.Dense(1000, activation='softmax')
        layer.trainable = trainable
        x.add(layer)

        # Transfer learning, load previous weights
        x.load_weights(r'/content/drive/MyDrive/AIMedic/OcularDisease/vgg19_weights_tf_dim_ordering_tf_kernels.h5')

        # Remove last layer
        x.pop()

        # Add new dense layer
        #x.add(layers.Dropout(0.1))
        x.add(layers.Dense(8, activation='sigmoid'))
        # optimizer = tensorflow.keras.optimizers.SGD(learning_rate=1e-3)
        sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=False)
        print('Configuration Start -------------------------')
        print(sgd.get_config())
        print('Configuration End -------------------------')
        x.compile(optimizer=sgd, loss='binary_crossentropy', metrics=self.metrics)

        self.show_summary(x)
        self.plot_summary(x, 'model_vgg19net.png')
        return x

## Basic Model

In [None]:
import os
import tensorflow as tf
# os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/'
import secrets
import matplotlib.pyplot as plt
from tensorflow.keras.applications import vgg16

# batch_size = 32
batch_size = 16
num_classes = 8
# epochs = 50
epochs = 1
patience = 5

token = secrets.token_hex(16)
folder = r'/content/drive/MyDrive/AIMedic/OcularDisease/VGG16_basic_outputs'

new_folder = os.path.join(folder, token)

if not os.path.exists(new_folder):
    os.makedirs(new_folder)

defined_metrics = [
    tf.keras.metrics.BinaryAccuracy(name='accuracy'),
    tf.keras.metrics.Precision(name='precision'),
    tf.keras.metrics.Recall(name='recall'),
    tf.keras.metrics.AUC(name='auc'),
]

factory = Factory((224, 224, 3), defined_metrics)
model = factory.compile(ModelTypes.vgg16)

(x_train, y_train), (x_test, y_test) = load_data(224, 1)

x_test_drawing = x_test

# _______________________below commented________________________
# x_train = vgg16.preprocess_input(x_train)
# x_test = vgg16.preprocess_input(x_test)

class_names = ['Normal', 'Diabetes', 'Glaucoma', 'Cataract', 'AMD', 'Hypertension', 'Myopia', 'Others']

# plot data input
plotter = Plotter(class_names)

callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, mode='min', verbose=1)

class_weight = {0: 1.,
                1: 1.583802025,
                2: 8.996805112,
                3: 10.24,
                4: 10.05714286,
                5: 1.,
                6: 1.,
                7: 2.505338078}

In [None]:
history = model.fit(x_train, y_train,
                    epochs=epochs,
                    batch_size=batch_size,
                    shuffle=True, #class_weight = class_weight,
                    validation_data=(x_test, y_test), callbacks=[callback])

In [None]:
print("saving")
model.save(os.path.join(new_folder, 'model_weights.h5'))

print("plotting")
plotter.plot_metrics(history, os.path.join(new_folder, 'plot1.png'), 2)

# Hide meanwhile for now
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label='val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')
plt.savefig(os.path.join(new_folder, 'plot2.png'))
plt.show()

# display the content of the model
baseline_results = model.evaluate(x_test, y_test, verbose=2)
for name, value in zip(model.metrics_names, baseline_results):
    print(name, ': ', value)
print()

# test a prediction
test_predictions_baseline = model.predict(x_test)
plotter.plot_confusion_matrix_generic(y_test, test_predictions_baseline, os.path.join(new_folder, 'plot3.png'), 0)

# save the predictions
prediction_writer = Prediction(test_predictions_baseline, 400, new_folder)
prediction_writer.save()
prediction_writer.save_all(y_test)

# show the final score
score = FinalScore(new_folder)
score.output()

# plot output results
plotter.plot_output(test_predictions_baseline, y_test, x_test_drawing, os.path.join(new_folder, 'plot4.png'))

## Inference

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import logging.config
import os
import tensorflow as tf
from absl import app
from tensorflow.keras.applications import vgg16


# def main(argv):
def main():
    print(tf.version.VERSION)
    image_size = 224
    test_run = 'zCSA2'
    new_folder = r'/content/drive/MyDrive/AIMedic/OcularDisease/VGG16_basic_outputs/e1e12c704633d1af537b61ece82c2860/'

    # load the data
    (x_train, y_train), (x_test, y_test) = load_data(image_size, 1)

    class_names = ['Normal', 'Diabetes', 'Glaucoma', 'Cataract', 'AMD', 'Hypertension', 'Myopia', 'Others']

    # plot data input
    plotter = Plotter(class_names)
    plotter.plot_input_images(x_test, y_test)

    x_test_drawing = x_test

    # normalize input based on model
    x_test = vgg16.preprocess_input(x_test)

    # load one of the test runs
    model = tf.keras.models.load_model(os.path.join(new_folder , 'model_weights.h5'))
    model.summary()

    # display the content of the model
    baseline_results = model.evaluate(x_test, y_test, verbose=2)
    for name, value in zip(model.metrics_names, baseline_results):
        print(name, ': ', value)
    print()

    # test a prediction
    test_predictions_baseline = model.predict(x_test)
    plotter.plot_confusion_matrix_generic(y_test, test_predictions_baseline, test_run, 0)

    # save the predictions
    prediction_writer = Prediction(test_predictions_baseline, 400)
    prediction_writer.save()
    prediction_writer.save_all(y_test)

    # show the final score
    score = FinalScore(new_folder)
    score.output()

    # plot output results
    plotter.plot_output(test_predictions_baseline, y_test, x_test_drawing, test_run)

In [None]:
# create logger
logging.config.fileConfig('logging.conf')
logger = logging.getLogger('odir')
# app.run(main)
main()

# VGG19

## Basic Model

In [None]:
import os
import tensorflow as tf
# os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/'
import secrets
import matplotlib.pyplot as plt
from tensorflow.keras.applications import vgg19

# batch_size = 32
batch_size = 16
num_classes = 8
# epochs = 50
epochs = 1
patience = 5

class_weight = {0: 1.,
                1: 1.583802025,
                2: 8.996805112,
                3: 10.24,
                4: 10.05714286,
                5: 1.,
                6: 1.,
                7: 2.505338078}

token = secrets.token_hex(16)
folder = r'/content/drive/MyDrive/AIMedic/OcularDisease/VGG19_basic_outputs'

new_folder = os.path.join(folder, token)

if not os.path.exists(new_folder):
    os.makedirs(new_folder)

defined_metrics = [
    tf.keras.metrics.BinaryAccuracy(name='accuracy'),
    tf.keras.metrics.Precision(name='precision'),
    tf.keras.metrics.Recall(name='recall'),
    tf.keras.metrics.AUC(name='auc'),
]

factory = Factory((224, 224, 3), defined_metrics)
model = factory.compile(ModelTypes.vgg19)

(x_train, y_train), (x_test, y_test) = load_data(224, 1)

x_test_drawing = x_test

#_______________________below commented____________________________
# x_train = vgg19.preprocess_input(x_train)
# x_test = vgg19.preprocess_input(x_test)

class_names = ['Normal', 'Diabetes', 'Glaucoma', 'Cataract', 'AMD', 'Hypertension', 'Myopia', 'Others']

# plot data input
plotter = Plotter(class_names)

callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, mode='min', verbose=1)


In [None]:
history = model.fit(x_train, y_train,
                    epochs=epochs,
                    batch_size=batch_size,
                    shuffle=True, #class_weight=class_weight,
                    validation_data=(x_test, y_test), callbacks=[callback])

In [None]:
print("saving weights")
model.save(os.path.join(new_folder, 'model_weights.h5'))

print("plotting metrics")
plotter.plot_metrics(history, os.path.join(new_folder, 'plot1.png'), 2)

print("plotting accuracy")
plotter.plot_accuracy(history, os.path.join(new_folder, 'plot2.png'))

print("display the content of the model")
baseline_results = model.evaluate(x_test, y_test, verbose=2)
for name, value in zip(model.metrics_names, baseline_results):
    print(name, ': ', value)
print()

# test a prediction
test_predictions_baseline = model.predict(x_test)
print("plotting confusion matrix")
plotter.plot_confusion_matrix_generic(y_test, test_predictions_baseline, os.path.join(new_folder, 'plot3.png'), 0)

# save the predictions
prediction_writer = Prediction(test_predictions_baseline, 400, new_folder)
prediction_writer.save()
prediction_writer.save_all(y_test)

# show the final score
score = FinalScore(new_folder)
score.output()

# plot output results
plotter.plot_output(test_predictions_baseline, y_test, x_test_drawing, os.path.join(new_folder, 'plot4.png'))

# ResNet50

## Basic Model

In [None]:
import os
import tensorflow as tf
from tensorflow.keras.applications import resnet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD

# os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/'
import secrets
import matplotlib.pyplot as plt
from tensorflow.keras.optimizers import SGD

# batch_size = 32
num_classes = 8
# epochs = 100
patience = 8

class_weight = {0: 1.,
                1: 1.583802025,
                2: 8.996805112,
                3: 10.24,
                4: 10.05714286,
                5: 1.,
                6: 1.,
                7: 2.505338078}

token = secrets.token_hex(16)
folder = r'/content/drive/MyDrive/AIMedic/OcularDisease/ResNet50_basic_outputs'

new_folder = os.path.join(folder, token)

if not os.path.exists(new_folder):
    os.makedirs(new_folder)

base_model = resnet50.ResNet50

base_model = base_model(weights='imagenet', include_top=False)

# Comment this out if you want to train all layers
#for layer in base_model.layers:
#    layer.trainable = False

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(num_classes, activation='sigmoid')(x)
model = Model(inputs=base_model.input, outputs=predictions)
model.summary()

tf.keras.utils.plot_model(model, to_file=os.path.join(new_folder, 'model_resnet50.png'), show_shapes=True, show_layer_names=True)

defined_metrics = [
    tf.keras.metrics.BinaryAccuracy(name='accuracy'),
    tf.keras.metrics.Precision(name='precision'),
    tf.keras.metrics.Recall(name='recall'),
    tf.keras.metrics.AUC(name='auc'),
]

# Adam Optimizer Example
# model.compile(loss='binary_crossentropy',
#               optimizer=Adam(lr=0.001),
#               metrics=defined_metrics)

# RMSProp Optimizer Example
# model.compile(loss='binary_crossentropy',
#               optimizer='rmsprop',
#               metrics=defined_metrics)

sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True)
print('Configuration Start -------------------------')
print(sgd.get_config())
print('Configuration End -------------------------')
model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=defined_metrics)

#________________exists above_____________
# (x_train, y_train), (x_test, y_test) = load_data(224)

x_test_drawing = x_test

#_____________________below commented____________________
# x_train = resnet50.preprocess_input(x_train)
# x_test = resnet50.preprocess_input(x_test)

class_names = ['Normal', 'Diabetes', 'Glaucoma', 'Cataract', 'AMD', 'Hypertension', 'Myopia', 'Others']

# plot data input
plotter = Plotter(class_names)

callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, mode='min', verbose=1)

In [None]:
history = model.fit(x_train, y_train,
                    epochs=epochs,
                    batch_size=batch_size,
                    shuffle=True, #class_weight=class_weight,
                    validation_data=(x_test, y_test), callbacks=[callback])

In [None]:
print("saving weights")
model.save(os.path.join(new_folder, 'model_weights.h5'))

print("plotting metrics")
plotter.plot_metrics(history, os.path.join(new_folder, 'plot1.png'), 2)

print("plotting accuracy")
plotter.plot_accuracy(history, os.path.join(new_folder, 'plot2.png'))

print("display the content of the model")
baseline_results = model.evaluate(x_test, y_test, verbose=2)
for name, value in zip(model.metrics_names, baseline_results):
    print(name, ': ', value)
print()

# test a prediction
test_predictions_baseline = model.predict(x_test)
print("plotting confusion matrix")
plotter.plot_confusion_matrix_generic(y_test, test_predictions_baseline, os.path.join(new_folder, 'plot3.png'), 0)

# save the predictions
prediction_writer = Prediction(test_predictions_baseline, 400, new_folder)
prediction_writer.save()
prediction_writer.save_all(y_test)

# show the final score
score = FinalScore(new_folder)
score.output()

# plot output results
plotter.plot_output(test_predictions_baseline, y_test, x_test_drawing, os.path.join(new_folder, 'plot4.png'))

# InceptionResNetV2

## Basic Model

In [None]:
import os
import tensorflow as tf
from tensorflow.keras.applications import inception_resnet_v2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, Flatten
from tensorflow.keras.models import Model
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
from tensorflow.keras.optimizers import SGD, Adam

# os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/'
import secrets
import matplotlib.pyplot as plt
from tensorflow.keras.optimizers import SGD
from sklearn.utils import class_weight
import numpy as np

# batch_size = 32
batch_size = 16
num_classes = 8
# epochs = 100
epochs = 3
patience = 3
freeze_layers = 2

# class_weight = {0: 1.,
#                 1: 1.583802025,
#                 2: 8.996805112,
#                 3: 10.24,
#                 4: 10.05714286,
#                 5: 1.,
#                 6: 1.,
#                 7: 2.505338078}

token = secrets.token_hex(16)
folder = r'/content/drive/MyDrive/AIMedic/OcularDisease/InceptionResNetV2_basic_outputs'

new_folder = os.path.join(folder, token)

if not os.path.exists(new_folder):
    os.makedirs(new_folder)

base_model = inception_resnet_v2.InceptionResNetV2

base_model = base_model(weights='imagenet', include_top=False)

# Comment this out if you want to train all layers
# for layer in base_model.layers:
#     layer.trainable = True

x = base_model.output
#x = Flatten()(x)
#x = Dropout(0.5)(x)
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(num_classes, activation='sigmoid')(x)
model = Model(inputs=base_model.input, outputs=predictions)

#for layer in model.layers[:freeze_layers]:
#    layer.trainable = True
#for layer in model.layers[freeze_layers:]:
#    layer.trainable = False

model.summary()

tf.keras.utils.plot_model(model, to_file=os.path.join(new_folder, 'model_inception_resnet_v2.png'), show_shapes=True, show_layer_names=True)

defined_metrics = [
    tf.keras.metrics.BinaryAccuracy(name='accuracy'),
    tf.keras.metrics.Precision(name='precision'),
    tf.keras.metrics.Recall(name='recall'),
    tf.keras.metrics.AUC(name='auc'),
]

# Adam Optimizer Example
# model.compile(loss='binary_crossentropy',
#               optimizer=Adam(lr=0.001),
#               metrics=defined_metrics)

# RMSProp Optimizer Example
# model.compile(loss='binary_crossentropy',
#               optimizer='rmsprop',
#               metrics=defined_metrics)

sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True)
print('Configuration Start -------------------------')
print(sgd.get_config())
print('Configuration End -------------------------')
model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=defined_metrics)

#________________exists above____________________
(x_train, y_train), (x_test, y_test) = load_data(224, 1)

x_test_drawing = x_test

#______________________below commented_____________________
# x_train = inception_resnet_v2.preprocess_input(x_train)
# x_test = inception_resnet_v2.preprocess_input(x_test)

class_names = ['Normal', 'Diabetes', 'Glaucoma', 'Cataract', 'AMD', 'Hypertension', 'Myopia', 'Others']

# plot data input
plotter = Plotter(class_names)

callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, mode='min', verbose=1)

#class_weight = class_weight.compute_class_weight('balanced', np.unique(x_train), x_train)

In [None]:
history = model.fit(x_train, y_train,
                    epochs=epochs,
                    batch_size=batch_size,
                    shuffle=True, #class_weight= class_weight,
                    validation_data=(x_test, y_test), callbacks=[callback])

In [None]:
print("saving weights")
model.save(os.path.join(new_folder, 'model_weights.h5'))

print("plotting metrics")
plotter.plot_metrics(history, os.path.join(new_folder, 'plot1.png'), 2)

print("plotting accuracy")
plotter.plot_accuracy(history, os.path.join(new_folder, 'plot2.png'))

print("display the content of the model")
baseline_results = model.evaluate(x_test, y_test, verbose=2)
for name, value in zip(model.metrics_names, baseline_results):
    print(name, ': ', value)
print()

# test a prediction
test_predictions_baseline = model.predict(x_test)
print("plotting confusion matrix")
plotter.plot_confusion_matrix_generic(y_test, test_predictions_baseline, os.path.join(new_folder, 'plot3.png'), 0)

# save the predictions
prediction_writer = Prediction(test_predictions_baseline, 400, new_folder)
prediction_writer.save()
prediction_writer.save_all(y_test)

# show the final score
score = FinalScore(new_folder)
score.output()

In [None]:
# plot output results
plotter.plot_output(test_predictions_baseline, y_test, x_test_drawing, os.path.join(new_folder, 'plot4.png'))