# Case II: Diabetic Retinopathy Analysis<br>
MatiUllah Khan<br>
Last edited: 04.03.2018 <br>
Cognitive Systems for Health Technology Applications <br>
Helsinki Metropolia University of Applied Sciences <br>

# 1. Objectives
Our obejectices emphasis on using convolutional neural network to try to diagnose, whether the patient has Diabetic Retinopathy disease. In the training network, pictures of eyes have and donot have disease are used.

# 2. Required libraries and global variables

We include the necessary libraries for the pre-processing, and plotting of the given data.

In [1]:
# Required libraries

import numpy as np
import matplotlib.pyplot as plt
import time
import pickle
from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.metrics import accuracy_score, precision_score, f1_score, confusion_matrix
from sklearn.metrics import classification_report, recall_score
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import average_precision_score

import keras
import keras.layers as layers
import keras.models as models
from keras import optimizers
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import VGG16
from keras.applications import VGG19
% matplotlib inline

# Training parameters
batch_size = 32
epochs = 20
steps_per_epoch = 32 
validation_steps = 16
image_height = 200
image_width = 150

Using TensorFlow backend.


# 3. Data description and preprocessing

1. Data augmentation is used to prevent overfitting.
2. Validation dataset is not augmentated because you want to accurate validation. 

In [2]:
train_dir = "case2_dataset/train"
validation_dir = "case2_dataset/validation"
test_dir = "case2_dataset/test" 

train_datagen = ImageDataGenerator(
      rescale=1./255,
      zoom_range=0.3,
      horizontal_flip=True,
      fill_mode='nearest')

test_datagen = ImageDataGenerator(rescale=1./255)

# Generator for train dataset

train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size = (image_height, image_width),
        batch_size = batch_size,
        class_mode = 'binary')

# Generator for validation dataset

validation_generator = test_datagen.flow_from_directory(
        validation_dir,
        target_size = (image_height, image_width),
        batch_size = batch_size,
        class_mode = 'binary')

Found 1237 images belonging to 2 classes.
Found 413 images belonging to 2 classes.


# 4. Modeling and compilation

My plan was to use pretrained convolutional network as base of my model, freeze it and train only self added dense network.

I tried VGG16 and VGG19 models with imagenet weights and there where not much difference in the outcome between those two. (VGG16 has 16 layers and VGG19 has 19). I also tried different kind of dense networks but the accuracy was pretty much the same.


In [3]:
conv_base = VGG19(weights='imagenet',
                  include_top=False,
                  input_shape=(image_height, image_width, 3))

model = models.Sequential()
model.add(conv_base)
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
conv_base.trainable = False
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg19 (Model)                (None, 6, 4, 512)         20024384  
_________________________________________________________________
flatten_1 (Flatten)          (None, 12288)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               1572992   
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 129       
Total params: 21,597,505
Trainable params: 1,573,121
Non-trainable params: 20,024,384
_________________________________________________________________


model.compile(loss='binary_crossentropy',
              optimizer=optimizers.RMSprop(lr=2e-5),
              metrics=['acc'])

In [None]:
model.compile(loss='binary_crossentropy',
              optimizer=optimizers.RMSprop(lr=2e-5),
              metrics=['acc'])


# 5. Training 

When training the model I noticed that my computer doesn't use my GPU. Instead it uses CPU (and it was very time consuming). Im not sure why this is happening at this point but I guess that I migth have installed tensorflow cpu version instead of gpu version.

In [None]:
t1 = time.time()
h = model.fit_generator(
      train_generator,
      steps_per_epoch = steps_per_epoch,
      verbose = 1,
      epochs = epochs,
      validation_data = validation_generator,
      validation_steps = validation_steps)
t2 = time.time()

h.history.update({'time_elapsed': t2 - t1})
print('Total elapsed time for training: {:.3f} minutes'.format((t2-t1)/60))

Epoch 1/20
Epoch 2/20
Epoch 3/20
 1/32 [..............................] - ETA: 22:38 - loss: 0.5387 - acc: 0.7812

# 6. Validation

Around 10 epochs my model started to overfit a little bit. Model reached it's best accuracy very quickly. Also it was handy to save the model and the history for a later use.


In [None]:
# Save model and history

model.save('case2run3.h5')
pickle.dump(h.history, open('history_run3.p', 'wb'))

# How to load
#h = pickle.load(open('history_run2.p', 'rb'))

acc = h.history['acc']
val_acc = h.history['val_acc']
loss = h.history['loss']
val_loss = h.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.ylim([0, 1])
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.ylim([0, 1])
plt.legend()

plt.show()

# 7. Results and Discussion

By choosing 20 epochs, testing with a a overfitted model can be avoid 

In [None]:
test_generator = test_datagen.flow_from_directory(
        test_dir,
        target_size = (image_height,image_width),
        batch_size = batch_size,
        class_mode = 'binary')

r = model.evaluate_generator(test_generator, steps = 32)

# Loss and accuracy
r

#  Statistics

In [None]:
y_true = np.zeros(413)
y_score = np.zeros(413)
sample_count = 413
i = 0
for inputs_batch, labels_batch in test_generator:
        predicts_batch = model.predict(inputs_batch)
        L = labels_batch.shape[0]
        index = range(i, i + L)
        y_true[index] = labels_batch.ravel()
        y_score[index] = predicts_batch.ravel()
        i = i + L
        if i >= sample_count:
            break



In [None]:
fpr, tpr, thresholds = roc_curve(y_true, y_score)
auc = roc_auc_score(y_true, y_score)

plt.figure()
plt.plot(fpr, tpr)
plt.plot([0, 1], [0, 1], '--')
plt.grid()
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC curve   AUC = {:.3f}'.format(auc))
plt.show()

plt.figure()
plt.plot(thresholds, 1-fpr, label = 'specificity')
plt.plot(thresholds, tpr, label = 'sensitivity')
plt.legend()
plt.grid()
plt.xlabel('Threshold value')
plt.show()

In [None]:
# Metrics

th = 0.3

acc = accuracy_score(y_true, y_score > th)
prec = precision_score(y_true, y_score > th)
f1 = f1_score(y_true, y_score > th)
recall = recall_score(y_true, y_score > th)

print('Accuracy:  {:.3f}'.format(acc))
print('Precision: {:.3f}'.format(prec))
print('Recall:    {:.3f}'.format(recall))
print('F1:        {:.3f}'.format(f1))
print('Classification report')
print(classification_report(y_true, y_score > th, labels = [1.0, 0.0], target_names = ['Disease', 'Healthy']))

In [None]:
# Confusion matrix

tn, fp, fn, tp = confusion_matrix(y_true, y_score > th).ravel()

print('                      Confusion matrix')
print('                       True condition')
print('                      Positive Negative      Sum')
print('Predicted | Positive  {:8} {:8} {:8}'.format(tp, fp, tp + fp))
print('condition | Negative  {:8} {:8} {:8}'.format(fn, tn, fn + tn))
print('                 Sum  {:8} {:8} {:8}'.format(tp + fn, fp + tn, tp + fp + fn + tn))
print(' ')
print('Sensitivity: {:.3f}'.format(tp/(tp+fn)))
print('Specificity: {:.3f}'.format(tn/(tn+fp)))

I reached accuray of 75%. I expected to have better accuracy (at least 80%). Mayby with better image preprocessing, and unfreezing and fine-tuning couple last conv layers of the VGG model it would be possible. If you look at the confusion matrix you can see that model recognizes healty eyes correct the most.


# 8. Conclusions


I learned that convolutional networks perform well in image categorising problems. Pretrained models are good way of making models more accurate and learn faster. Also you don't have to try to make the model from scratch.

I had some major difficulties running the training on my GPU and I need to fix that problem before case 3 (or use cloud services)
