In [12]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.optimizers import Adam, Adamax
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras import regularizers
from tensorflow.keras.models import Model, load_model
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
import seaborn as sns
sns.set_style('darkgrid')
from sklearn.metrics import confusion_matrix, classification_report

In [13]:
sdir = r'G:\Dataset-image-Eye_Data_New'

slist = os.listdir(sdir)
classes = []
filepaths = []
labels = []

for d in slist:
    dpath = os.path.join(sdir, d)
    if d != "Test" and d != ".git":
        if os.path.isdir(dpath):
            classes.append(d)

class_count = len(classes)

for klass in classes:
    classpath = os.path.join(sdir, klass)
    filelist = os.listdir(classpath)
    for f in filelist:
        fpath = os.path.join(classpath, f)
        filepaths.append(fpath)
        labels.append(klass)

print('number of files: ', len(filepaths), '   number of labels: ', len(labels))

file_series = pd.Series(filepaths, name='filepaths')
label_series = pd.Series(labels, name='labels')
df = pd.concat([file_series, label_series], axis=1)
print(df.head())


number of files:  16016    number of labels:  16016
                                           filepaths       labels
0  G:\Dataset-image-Eye_Data_New\Keratoconus\KCN_...  Keratoconus
1  G:\Dataset-image-Eye_Data_New\Keratoconus\KCN_...  Keratoconus
2  G:\Dataset-image-Eye_Data_New\Keratoconus\KCN_...  Keratoconus
3  G:\Dataset-image-Eye_Data_New\Keratoconus\KCN_...  Keratoconus
4  G:\Dataset-image-Eye_Data_New\Keratoconus\KCN_...  Keratoconus


In [14]:
balance = df['labels'].value_counts()
print(balance)

train_split = .8
test_split = .1
dummy_split = test_split / (1 - train_split)

train_df, dummy_df = train_test_split(df, train_size=train_split, shuffle=True, random_state=125)
test_df, valid_df = train_test_split(dummy_df, train_size=dummy_split, shuffle=True, random_state=125)

print('train size: ', len(train_df), '  test size: ', len(test_df), '   valid size: ', len(valid_df))
length = len(test_df)


labels
Normal         5600
Keratoconus    5572
Suspect        4844
Name: count, dtype: int64
train size:  12812   test size:  1602    valid size:  1602


In [15]:
batch_size = 16

def scalar(x):
    return x / 127.5 - 1

trgen = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=scalar,
    horizontal_flip=True
)

train_gen = trgen.flow_from_dataframe(
    train_df,
    x_col='filepaths',
    y_col='labels',
    target_size=(448, 448),
    class_mode='categorical',
    batch_size=batch_size,
    shuffle=True,
    seed=123
)

tvgen = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=scalar
)

valid_gen = tvgen.flow_from_dataframe(
    valid_df,
    x_col='filepaths',
    y_col='labels',
    target_size=(448, 448),
    class_mode='categorical',
    batch_size=batch_size,
    shuffle=False
)

test_batch_size = sorted(
    [int(length / n) for n in range(1, length + 1)
     if length % n == 0 and length / n <= batch_size],
    reverse=True
)[0]

test_steps = int(length / test_batch_size)

test_gen = tvgen.flow_from_dataframe(
    test_df,
    x_col='filepaths',
    y_col='labels',
    target_size=(448, 448),
    class_mode='categorical',
    batch_size=test_batch_size,
    shuffle=False
)

test_labels = test_gen.labels


Found 12812 validated image filenames belonging to 3 classes.
Found 1602 validated image filenames belonging to 3 classes.
Found 1602 validated image filenames belonging to 3 classes.


In [16]:
def show_training_samples(gen):
    class_dict = gen.class_indices
    new_dict = {}
    for key, value in class_dict.items():
        new_dict[value] = key

    images, labels = next(gen)
    plt.figure(figsize=(15, 15))
    length = len(labels)
    if length < 25:
        r = length
    else:
        r = 25

    for i in range(r):
        plt.subplot(5, 5, i + 1)
        image = (images[i] + 1) / 2
        plt.imshow(image)
        index = np.argmax(labels[i])
        class_name = new_dict[index]
        plt.title(class_name, color='blue', fontsize=16)
        plt.axis('off')
    plt.show()


In [17]:
img_shape = (448, 448, 3)
neurons = 512
dropout = .3
lr = .001
freeze = True

base_model = tf.keras.applications.VGG19(include_top=False, input_shape=img_shape, pooling='max', weights='imagenet')

if freeze:
    base_model.trainable = False

x = base_model.output
x = tf.keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001)(x)

x = tf.keras.layers.Dense(
    neurons,
    kernel_regularizer=regularizers.l2(0.016),
    activity_regularizer=regularizers.l1(0.006),
    bias_regularizer=regularizers.l1(0.006),
    activation='relu',
    kernel_initializer=tf.keras.initializers.GlorotUniform(seed=123)
)(x)

x = tf.keras.layers.Dropout(rate=dropout, seed=123)(x)

output = tf.keras.layers.Dense(
    class_count,
    activation='softmax',
    kernel_initializer=tf.keras.initializers.GlorotUniform(seed=123)
)(x)

model = Model(inputs=base_model.input, outputs=output)

model.compile(Adamax(learning_rate=lr), loss='categorical_crossentropy', metrics=['accuracy'])


In [18]:
def print_in_color(txt_msg, fore_tupple, back_tupple):
    rf, gf, bf = fore_tupple
    rb, gb, bb = back_tupple
    msg = '{0}' + txt_msg
    mat = '\33[38;2;' + str(rf) + ';' + str(gf) + ';' + str(bf) + ';48;2;' + str(rb) + ';' + str(gb) + ';' + str(bb) + 'm'
    print(msg.format(mat), flush=True)
    print('\33[0m', flush=True)
    return


In [19]:
class LRA(keras.callbacks.Callback):
    def __init__(self, patience, stop_patience, threshold, factor, dwell, model_name, freeze, end_epoch):
        super(LRA, self).__init__()
        self.patience = patience
        self.stop_patience = stop_patience
        self.threshold = threshold
        self.factor = factor
        self.dwell = dwell
        self.lr = 0
        self.highest_tracc = 0.0
        self.lowest_vloss = np.inf
        self.count = 0
        self.stop_count = 0
        self.end_epoch = end_epoch
        self.best_weights = None

        msg = ' '
        if freeze:
            msgs = f' Starting training using base model {model_name} with weights frozen...'
        else:
            msgs = f' Starting training using base model {model_name} training all layers '
        print_in_color(msgs, (244, 252, 3), (55, 65, 80))

    def set_model(self, model):
        self.model_ = model
        self.lr = float(tf.keras.backend.get_value(model.optimizer.learning_rate))
        self.best_weights = self.model_.get_weights()

    def on_epoch_begin(self, epoch, logs=None):
        if epoch != 0:
            msgs = f'for epoch {epoch} '
            msgs = msgs + LRA.msg
            print_in_color(msgs, (255, 255, 0), (55, 65, 80))

    def on_epoch_end(self, epoch, logs=None):
        lr = float(tf.keras.backend.get_value(self.model_.optimizer.learning_rate))
        v_loss = logs.get('val_loss')
        acc = logs.get('accuracy')

        if acc < self.threshold:
            if acc > self.highest_tracc:
                LRA.msg = f' training accuracy improved from {self.highest_tracc} to {acc}...'
                self.highest_tracc = acc
                LRA.best_weights = self.model_.get_weights()
                self.count = 0
                self.stop_count = 0
                if v_loss < self.lowest_vloss:
                    self.lowest_vloss = v_loss
            else:
                if self.count >= self.patience - 1:
                    self.lr = lr * self.factor
                    self.model_.optimizer.learning_rate.assign(self.lr)
                    self.count = 0
                    self.stop_count = self.stop_count + 1
                    if self.dwell:
                        self.model_.set_weights(LRA.best_weights)
                else:
                    self.count = self.count + 1
        else:
            if v_loss < self.lowest_vloss:
                self.lowest_vloss = v_loss
                LRA.best_weights = self.model_.get_weights()
                self.count = 0
                self.stop_count = 0
            else:
                if self.count >= self.patience - 1:
                    self.lr = self.lr * self.factor
                    self.stop_count = self.stop_count + 1
                    self.count = 0
                    self.model_.optimizer.learning_rate.assign(self.lr)
                    if self.dwell:
                        self.model_.set_weights(LRA.best_weights)
                else:
                    self.count = self.count + 1

        if epoch == self.end_epoch:
            print_in_color(LRA.msg, (255, 255, 0), (55, 65, 80))

        if self.stop_count > self.stop_patience - 1:
            LRA.msg = f' training halted at epoch {epoch + 1}...'
            print_in_color(LRA.msg, (0, 255, 0), (55, 65, 80))
            self.model_.stop_training = True


In [None]:
patience=1
stop_patience=4
threshold=.9
factor=.5
dwell=False
model_type='ResNet50'
epochs=15
callbacks=[LRA(patience=patience,stop_patience=stop_patience, threshold=threshold, factor=factor,dwell=dwell, model_name=model_type, freeze=freeze, end_epoch=epochs - 1 )]

history=model.fit(x=train_gen,  epochs=epochs, verbose=1, callbacks=callbacks,  validation_data=valid_gen, validation_steps=None,  shuffle=False,  initial_epoch=0)

[38;2;244;252;3;48;2;55;65;80m Starting training using base model ResNet50 with weights frozen...
[0m
Epoch 1/15
[1m  7/801[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:21:19[0m 6s/step - accuracy: 0.4087 - loss: 27.7365

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.models import load_model
from sklearn.metrics import confusion_matrix, classification_report

model_path = 'G:\Dataset-image-Eye_Data_New\Test\Copy of vgg19_model.h5'
print(f"Loading model from: {model_path}")
model = load_model(model_path)

model_type = 'ResNet50'

def display_eval_metrics(e_data):
    msg='Model Metrics after Training'
    print_in_color(msg, (255,255,0), (55,65,80))
    msg='{0:^24s}{1:^24s}'.format('Metric', 'Value')
    print_in_color(msg, (255,255,0), (55,65,80))
    for key,value in e_data.items():
        print (f'{key:^24s}{value:^24.5f}')
    acc=e_data['accuracy']* 100
    return acc

subject='Disease 2'
save_dir = r'./'

print("Evaluating model...")
e_dict=model.evaluate(test_gen, batch_size=test_batch_size, verbose=1, steps=test_steps, return_dict=True)
acc=display_eval_metrics(e_dict)
msg=f'Accuracy on the test set is {acc:5.2f} %'
print_in_color(msg, (0,255,0),(55,65,80))

print("Generating predictions...")
preds=model.predict(test_gen, batch_size=test_batch_size, verbose=0, steps=None)

def print_info(test_gen, preds, print_code, save_dir, subject):
    class_dict=test_gen.class_indices
    labels= test_gen.labels
    file_names= test_gen.filenames
    error_list=[]
    true_class=[]
    pred_class=[]
    prob_list=[]
    new_dict={}
    error_indices=[]
    y_pred=[]
    for key,value in class_dict.items():
        new_dict[value]=key

    classes=list(new_dict.values())
    dict_as_text=str(new_dict)

    dict_name= subject + '-' + model_type + '-' + str(len(classes)) +'.txt'
    dict_path=os.path.join(save_dir,dict_name)
    with open(dict_path, 'w') as x_file:
        x_file.write(dict_as_text)
    errors=0
    for i, p in enumerate(preds):
        pred_index=np.argmax(p)
        true_index=labels[i]
        if pred_index != true_index:
            error_list.append(file_names[i])
            true_class.append(new_dict[true_index])
            pred_class.append(new_dict[pred_index])
            prob_list.append(p[pred_index])
            error_indices.append(true_index)
            errors=errors + 1
        y_pred.append(pred_index)
    if print_code !=0:
        if errors>0:
            if print_code>errors:
                r=errors
            else:
                r=print_code
            msg='{0:^28s}{1:^28s}{2:^28s}{3:^16s}'.format('Filename', 'Predicted Class' , 'True Class', 'Probability')
            print_in_color(msg, (0,255,0),(55,65,80))
            for i in range(r):
                msg='{0:^28s}{1:^28s}{2:^28s}{3:4s}{4:^6.4f}'.format(error_list[i], pred_class[i],true_class[i], ' ', prob_list[i])
                print_in_color(msg, (255,255,255), (55,65,60))
        else:
            msg='With accuracy of 100 % there are no errors to print'
            print_in_color(msg, (0,255,0),(55,65,80))
    if errors>0:
        plot_bar=[]
        plot_class=[]
        for key, value in new_dict.items():
            count=error_indices.count(key)
            if count!=0:
                plot_bar.append(count)
                plot_class.append(value)
        fig=plt.figure()
        fig.set_figheight(len(plot_class)/3)
        fig.set_figwidth(10)
        plt.style.use('fivethirtyeight')
        for i in range(0, len(plot_class)):
            c=plot_class[i]
            x=plot_bar[i]
            plt.barh(c, x, )
            plt.title(' Errors by Class on Test Set')
        plt.savefig(f"Errors_by_Class_{model_type}.png", bbox_inches='tight')

    if len(classes)<= 20:
        y_true= np.array(labels)
        y_pred=np.array(y_pred)
        cm = confusion_matrix(y_true, y_pred)
        clr = classification_report(y_true, y_pred, target_names=classes)
        length=len(classes)
        if length<8:
            fig_width=8
            fig_height=8
        else:
            fig_width=length
            fig_height=length
        plt.figure(figsize=(fig_width, fig_height))
        sns.heatmap(cm, annot=True, vmin=0, fmt='g', cmap='Blues', cbar=False)
        plt.xticks(np.arange(length)+.5, classes, rotation= 90)
        plt.yticks(np.arange(length)+.5, classes, rotation=0)
        plt.xlabel("Predicted")
        plt.ylabel("Actual")
        plt.title("Confusion Matrix")
        plt.savefig(f"confusion_matrix_{model_type}.png", bbox_inches='tight')
        plt.show()
        print("Classification Report:\n----------------------\n", clr)

print_code=20
print_info(test_gen, preds, print_code, save_dir, subject)
