In [None]:
import os
os.chdir('/content/drive/My Drive/')

In [None]:
pip install git+https://github.com/qubvel/segmentation_models

In [None]:
pip install efficientnet



In [None]:
# for accessing tabular data
import pandas as pd
import numpy as np
import os
os.chdir('/content/drive/My Drive/')
# adding classweight
from sklearn.utils import class_weight
# Evaluation Metric
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import confusion_matrix, precision_score, recall_score
# for visualization
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
from prettytable import PrettyTable
# backend
import keras
from keras import backend as K
import tensorflow as tf
from keras.callbacks import Callback
# for transfer learning
from keras.applications.vgg16 import VGG16
from keras.applications.vgg19 import VGG19
from keras.applications.densenet import DenseNet121
from keras.applications.resnet import ResNet50, ResNet152
from keras.applications.inception_v3 import InceptionV3
from efficientnet.keras import EfficientNetB0, EfficientNetB3, EfficientNetB4
from keras.applications.xception import Xception
# for model architecture
from keras.models import Sequential
from keras.layers import GlobalAveragePooling2D, Dropout, Dense, Conv2D, MaxPooling2D, Activation, Flatten
# for Tensorboard visualization
from keras.callbacks import TensorBoard
# for Data Augmentation
from keras.preprocessing.image import ImageDataGenerator

In [None]:
import tensorflow
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from keras.applications.densenet import DenseNet121

In [None]:
x_train = np.load('/content/drive/MyDrive/DR-Data-Blindness_Detection/processed_images/training.npy')  #training set
x_validation = np.load('/content/drive/MyDrive/DR-Data-Blindness_Detection/processed_images/validation.npy')  #validation set
x_test = np.load('/content/drive/MyDrive/DR-Data-Blindness_Detection/processed_images/test.npy')    #test set

In [None]:
try:
    x_train = np.load("/content/drive/MyDrive/DR-Data-Blindness_Detection/processed_images/training.npy", mmap_mode=None, allow_pickle=False, fix_imports=True)
    x_validation = np.load("/content/drive/MyDrive/DR-Data-Blindness_Detection/processed_images/validation.npy",mmap_mode=None, allow_pickle=False, fix_imports=True)
    x_test = np.load("/content/drive/MyDrive/DR-Data-Blindness_Detection/processed_images/test.npy", mmap_mode = None,allow_pickle = False, fix_imports = True)
    print("Loaded Successfully...\n")
    print(x_train.shape)
    print(x_validation.shape)
    print(x_test.shape)
except:
    print("file not exists")

Loaded Successfully...

(3112, 512, 512, 3)
(550, 512, 512, 3)
(1928, 512, 512, 3)


In [None]:
train_labels = pd.read_csv('/content/drive/MyDrive/DR-Data-Blindness_Detection/training.csv')
train_labels = train_labels['diagnosis']
validation_labels = pd.read_csv('/content/drive/MyDrive/DR-Data-Blindness_Detection/validation.csv')
validation_labels = validation_labels['diagnosis']
print("Training:",train_labels.shape[0])
print("Validation:",validation_labels.shape[0])

Training: 3112
Validation: 550


#Transforming Target Labels:

In [None]:
def ordinal_regression(labels):
    y_train = pd.get_dummies(labels).values
    y_train_multi = np.empty(y_train.shape, dtype=y_train.dtype)
    y_train_multi[:, 4] = y_train[:, 4]

    for i in range(3, -1, -1):
         y_train_multi[:, i] = np.logical_or(y_train[:, i], y_train_multi[:, i+1])
    return y_train_multi
labels_train = ordinal_regression(train_labels)
labels_validation = ordinal_regression(validation_labels)
print(labels_train.shape)
print(labels_validation.shape)
print("Ex: Original Category: {}\n After encoding: {}".format(train_labels.iloc[4],labels_train[4]))

(3112, 5)
(550, 5)
Ex: Original Category: 2
 After encoding: [1 1 1 0 0]


#Metrics

In [None]:
#computing class weight
class_weights = class_weight.compute_class_weight(class_weight='balanced',
                                                  classes = [0,1,2,3,4],
                                                  y = train_labels)
print(class_weights)

[0.40573664 1.98216561 0.73309776 3.79512195 2.47968127]


In [None]:
def kappa_metric(y_true, y_pred):
    y_true = y_true.sum(axis=1) - 1
    y_pred = y_pred > 0.5
    y_pred = y_pred.astype(int).sum(axis=1) - 1
    _kappa_ = cohen_kappa_score( y_true, y_pred,  weights='quadratic' )
    return _kappa_

In [None]:
class Metrics(Callback):
    def __init__(self, path):
        ''' Initialization of variables '''
        super(Callback, self).__init__()
        self.path = path
    def ModelCheckPoint(self, path):
        ''' This function is used for Storing the model weights if val_kappa improves from all the previous epochs'''
        self.model.save(path)
    def on_train_begin(self, logs={}):
        self.val_kappas = []
    def on_epoch_end(self, epoch, logs={}):
        ''' This function is used for calculating kappa score on each epoch and updates the validation kappa score if it improves from the previous epochs'''
        X_val, y_val = self.validation_data[:2]
        y_val = y_val.sum(axis=1) - 1
        y_pred = self.model.predict(X_val) > 0.5
        y_pred = y_pred.astype(int).sum(axis=1) - 1
        _val_kappa = cohen_kappa_score(
            y_val,
            y_pred,
            weights='quadratic'
        )
        self.val_kappas.append(_val_kappa)
        print(f"\b - val_kappa:  {_val_kappa:.4f}")
        if _val_kappa == max(self.val_kappas):
            print("\n\t\tValidation Kappa has improved. Saving model to {}...".format(self.path))
            self.ModelCheckPoint(self.path)
        else:
            print("\n\t\tValidation kappa did not improved from {}".format(max(self.val_kappas)))
        return

In [None]:
class PerformanceMetric:
    def __init__(self,actual_labels,predicted_labels):
        ''' Initialization of variables '''
        self.actual_labels = actual_labels
        self.predicted_labels = predicted_labels
    def single_value_conversion(self):
        ''' This function is used for Converting model predicted values into single values
           Ex: model_predicted_value: [0,1,0,0,0] and it converts as 1'''
        predicted_labels = self.predicted_labels > 0.5
        prediction_ordinal = np.empty(predicted_labels.shape, dtype = int)
        prediction_ordinal[:,4] = predicted_labels[:,4]
        for i in range(3, -1, -1): prediction_ordinal[:, i] = np.logical_or(predicted_labels[:,i], prediction_ordinal[:,i+1])
        self.predicted_labels = prediction_ordinal.sum(axis = 1)-1
        self.actual_labels = self.actual_labels.sum(axis = 1)-1
    def confusionMatrix(self):
        ''' This function is used for calculating confusion matrix between model predicted values and true values using sklearn implementation.'''
        confusion_matrix_ = confusion_matrix(self.actual_labels, self.predicted_labels)
        return confusion_matrix_
    def precision(self, matrix):
        ''' This function is used for calculating precision matrix between predicted values and true values using confusion matrix'''
        precision_matrix =(((matrix.T)/(matrix.sum(axis=1))).T)
        return precision_matrix
    def recall(self, matrix):
        ''' this function is used for calculating recall matrix between predicted values and true values using confusion matrix'''
        recall_matrix =(matrix/matrix.sum(axis=0))
        return recall_matrix
    def subplot_(self, matrix, i, title):
        ''' This function is used for subplots'''
        plt.subplot(1,3,i)
        labels = [1,2,3,4,5]
        sns.heatmap(matrix, annot=True, cmap=sns.light_palette('green'),linewidths = 0.8,cbar = False, fmt=".3f", xticklabels=labels, yticklabels=labels)
        plt.title(title)
        plt.xlabel('Predicted Class Labels')
        plt.ylabel('Actual Class Labels')
    def plotting(self):
        """
        This function is used for calculating number of misclassified points, confusion, recall and precision matrixes and plotting it using subplots.
        """
        self.single_value_conversion()
        confusion_matrix = self.confusionMatrix()
        #print("Number of misclassified points: ",(len(self.actual_labels)-np.trace(confusion_matrix))/len(self.actual_labels)*100,"\n")
        precision_matrix = self.precision(confusion_matrix)
        recall_matrix = self.recall(confusion_matrix)
        plt.figure(figsize=(20,5))
        self.subplot_(confusion_matrix, 1, 'Confusion Matrix')
        self.subplot_(precision_matrix, 2, 'Precision')
        self.subplot_(recall_matrix, 3, 'Recall')
        plt.show()

In [None]:
def plotting(iter, val_kappa):
    epoch = [i for i in range(iter)]
    plt.plot(epoch,val_kappa)
    plt.title('validation_kappa on each epoch')
    plt.xlabel('epoch')
    plt.ylabel('val_kappa')
    plt.grid()
    plt.show()

In [None]:
def test_prediction(predicted_labels):
    predicted_labels = predicted_labels > 0.5
    prediction_ordinal = np.empty(predicted_labels.shape, dtype = int)
    prediction_ordinal[:,4] = predicted_labels[:,4]
    for i in range(3, -1, -1): prediction_ordinal[:, i] = np.logical_or(predicted_labels[:,i], prediction_ordinal[:,i+1])
    predicted_labels = prediction_ordinal.sum(axis = 1)-1
    return predicted_labels

#Model-Training:

In [None]:
def baseline_model():
    ''' This function is used for building a base line convolutional neural network architecture '''
    model = Sequential()
    model.add(Conv2D(filters=16, kernel_size=(2, 2), input_shape=[512,512,3], activation= 'relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(filters=32, kernel_size=(2, 2), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(filters=64, kernel_size=(2, 2), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(GlobalAveragePooling2D())
    model.add(Dense(units=128, activation = 'relu'))
    model.add(Dense(units=256, activation = 'relu'))
    model.add(Dropout(rate=0.2))
    model.add(Dense(units=512, activation='relu'))
    model.add(Dense(5, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer= tensorflow.keras.optimizers.Adam(lr=0.00005), metrics=['accuracy'])
    return model

In [None]:
baseline = baseline_model()
baseline.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 511, 511, 16)      208       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 255, 255, 16)     0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 254, 254, 32)      2080      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 127, 127, 32)     0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 126, 126, 64)      8256      
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 63, 63, 64)       0

  super(Adam, self).__init__(name, **kwargs)


In [None]:
x_validation

array([[[[128, 128, 128],
         [128, 128, 128],
         [128, 128, 128],
         ...,
         [128, 128, 128],
         [128, 128, 128],
         [128, 128, 128]],

        [[128, 128, 128],
         [128, 128, 128],
         [128, 128, 128],
         ...,
         [128, 128, 128],
         [128, 128, 128],
         [128, 128, 128]],

        [[128, 128, 128],
         [128, 128, 128],
         [128, 128, 128],
         ...,
         [128, 128, 128],
         [128, 128, 128],
         [128, 128, 128]],

        ...,

        [[128, 128, 128],
         [128, 128, 128],
         [128, 128, 128],
         ...,
         [128, 128, 128],
         [128, 128, 128],
         [128, 128, 128]],

        [[128, 128, 128],
         [128, 128, 128],
         [128, 128, 128],
         ...,
         [128, 128, 128],
         [128, 128, 128],
         [128, 128, 128]],

        [[128, 128, 128],
         [128, 128, 128],
         [128, 128, 128],
         ...,
         [128, 128, 128],
        

In [None]:
!cd baseline1/

'012-CC (1).pdf'
'012-CC (2).pdf'
 012-CC.pdf
'2020-11-18 (2) (1).png'
'2020-11-18 (2).png'
 2020-11-18.png
 20-pds-012_AVC2.pdf
 20_pds_012-AVC4.pdf
 20-PDS-012_AVC.pdf
 20PDS012-AVC.pdf
 20_pds_012-CC.pdf
 20PDS012_PDS3503.pdf
'20-pds-012_R-ppt (1).pptx'
 20-pds-012_R-ppt.pptx
'4_Linear Regression (1) (1).gdoc'
'4_Linear Regression (1) (2).gdoc'
'4_Linear Regression (1).gdoc'
'About the project (2).pdf'
 baseline1
 Classroom
'Colab Notebooks'
'COLLEGE REVIEW.gform'
 creditcard.csv.gsheet
 datapreprocessing.csv.gsheet
 DR-Data-Blindness_Detection
 DS-012.pdf
 JovitaV_CNN.ipynb
'missing data-py code.gdoc'
'Naive Bayes (1).gdoc'
 observation-11.pdf
 observation.pdf
'ppsize foto.jpg'
'Random Forest (1).gdoc'
'Random Forest.gdoc'
 r-prog-output.pdf
 Stella.gdoc
'Vaccine certificate.pdf'
'WhatsApp Image 2021-02-22 at 9.04.14 PM.jpeg'


In [None]:
baseline = baseline_model()
kappa_metrics = Metrics('/content/drive/My Drive/models/baseline1.h5')
tensorboard = TensorBoard(log_dir = '/content/drive/My Drive/baseline1')
call_backs = [kappa_metrics,tensorboard]
history = baseline.fit(x_train, labels_train, epochs = 3, batch_size = 32, verbose = 2,
                       class_weight = {0:0.40573664,1: 1.98216561,2: 0.73309776,3: 3.79512195,4: 2.47968127})

#BATCH_SIZE = 32

#history = baseline.fit(
#    x_train, labels_train,
#    epochs = 30,
#    batch_size = 32, verbose = 2,
#    class_weight = {0:0.40573664,1: 1.98216561,2: 0.73309776,3: 3.79512195,4: 2.47968127},
#    validation_data=(x_validation, labels_validation),
#    callbacks=[kappa_metrics]
#)

  super(Adam, self).__init__(name, **kwargs)


Epoch 1/3
98/98 - 21s - loss: 0.2546 - accuracy: 0.9775 - 21s/epoch - 219ms/step
Epoch 2/3
98/98 - 20s - loss: 0.1946 - accuracy: 0.9984 - 20s/epoch - 206ms/step
Epoch 3/3
98/98 - 20s - loss: 0.1805 - accuracy: 1.0000 - 20s/epoch - 206ms/step


In [None]:
baseline.predict(x_validation)

array([[9.9242920e-01, 9.9561775e-01, 7.8373802e-01, 3.4885564e-01,
        2.2130183e-05],
       [9.9278039e-01, 9.9596101e-01, 7.9019433e-01, 3.5375923e-01,
        1.8928253e-05],
       [9.9459630e-01, 9.9767250e-01, 7.9864109e-01, 3.8634405e-01,
        9.1678712e-06],
       ...,
       [9.9393266e-01, 9.9698907e-01, 7.9650152e-01, 3.6638811e-01,
        1.1974744e-05],
       [9.9324328e-01, 9.9672347e-01, 7.9417157e-01, 3.6904070e-01,
        1.4433109e-05],
       [9.9511296e-01, 9.9790537e-01, 7.9941082e-01, 3.9281830e-01,
        7.9925867e-06]], dtype=float32)

In [None]:
baseline = baseline_model()
baseline.load_weights("/content/drive/My Drive/models/baseline1.h5")
result1 = baseline.evaluate(x_validation,labels_validation)
y_pred = baseline.predict(x_validation)
print("After running the model for 30 epochs we got loss = {} Accuracy = {} kappa_score = {} on validation data".format(np.round(result1[0],4),np.round(result1[1],4),np.round(kappa_metric(labels_validation,y_pred),4)))

In [None]:
BATCH_SIZE = 32

def create_datagen():
    return ImageDataGenerator(
        zoom_range=0.2,  # set range for random zoom
        rotation_range = 180,
        horizontal_flip=True,  # randomly flip images
        vertical_flip=True,  # randomly flip images
    )

# Using generator
data_generator = create_datagen().flow(x_train,train_labels, batch_size=BATCH_SIZE, seed=2019)

#EfficientB3


In [None]:
def GAP2D():
    '''Global average pooling layer'''
    global_average_pooling = GlobalAveragePooling2D()
    return global_average_pooling
def dropout(value = 0.5):
    '''Dropout layer'''
    dropout_layer = Dropout(value)
    return dropout_layer
def dense():
    '''Dense layer'''
    dense_layer = Dense(5, activation='sigmoid')
    return dense_layer

In [None]:
global_average_pooling_layer = GAP2D()
dropout_layer = dropout()
dense_layer = dense()

In [None]:
def efficientnet_b3():
    '''This function is used for building a model architecture of pretrained EfficientB3 on imagenet data set.'''
    efficientnet_ = EfficientNetB3(include_top = False, input_shape = (512,512,3) )
    x = global_average_pooling_layer(efficientnet_.layers[-1].output)
    x = dropout_layer(x)
    output = dense_layer(x)
    model = Model(efficientnet_.layers[0].input,output)
    model.compile(loss='binary_crossentropy', optimizer=keras.optimizers.Adam(lr=0.00005), metrics=['accuracy'])
    return model

In [None]:
efficient = EfficientNetB5(
    weights=None,
    include_top=False,
    input_shape=(IMG_SIZE,IMG_SIZE,3)
)
efficient.load_weights('/content/efficientnet-b3_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5')
def build_model():
    model = Sequential()
    model.add(efficient)
    model.add(layers.GlobalAveragePooling2D())
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(5, activation='sigmoid'))


    model.compile(
        loss='binary_crossentropy',
        optimizer=Adam(lr=0.00005),
        metrics=['accuracy']
    )

    return model
model = build_model()
model.summary()

In [None]:
efficientnet_ = efficientnet_b3()
efficientnet_.summary()

In [None]:
!pip install tensorflow==2.6.0
!pip install keras~=2.6.0

In [None]:
tensorboard = TensorBoard(log_dir = '/content/drive/My Drive/efficientnet_b3')
kappa_metrics = Metrics('/content/efficientnet-b3_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5')
data_generator = ImageDataGenerator(horizontal_flip=True,vertical_flip=True,rotation_range=180,zoom_range = 0.2)
efficientnet_ = efficientnet_b3()
history = efficientnet_.fit_generator(
                   data_generator.flow(x_train, labels_train, batch_size = 8),
                    steps_per_epoch=len(x_train) / 8,
                    epochs=30,
                    initial_epoch=0,
                    verbose=1,
                    validation_data=(x_validation, labels_validation),
                    validation_steps=len(x_validation) / 8,
                    callbacks=[kappa_metrics,tensorboard], class_weight = class_weights)

In [None]:
efficientnet_ = efficientnet_b3()
efficientnet_.load_weights("/content/drive/My Drive/models/efficientnet_b3.h5")
result1 = efficientnet_.evaluate(x_validation,labels_validation, verbose = 2)
y_pred = efficientnet_.predict(x_validation, batch_size = 8)
print("After running the model for 30 epochs we got loss = {} Accuracy = {} kappa_score = {} on validation data".format(np.round(result1[0],4),np.round(result1[1],4),np.round(kappa_metric(labels_validation,y_pred),4)))

In [None]:
ytrain_efficientb1 = efficientnet_.predict(x_train)
ytrain_efficientb1 = test_prediction(ytrain_efficientb1)
print("First five data points predictions in training:",ytrain_efficientb1[:5])
print("length of traindata prediction:",ytrain_efficientb1.shape,"\n")

yvalidation_efficientb1 = efficientnet_.predict(x_validation)
yvalidation_efficientb1 = test_prediction(yvalidation_efficientb1)
print("First five data points predictions in validation:",yvalidation_efficientb1[:5])
print("length of validation data prediction:",yvalidation_efficientb1.shape,"\n")

ytest_efficientb1 = efficientnet_.predict(x_test)
ytest_efficientb1 = test_prediction(ytest_efficientb1)
print("First five data points predictions in test:",ytest_efficientb1[:5])
print("length of test data prediction:",ytest_efficientb1.shape)

In [None]:
metric = PerformanceMetric(labels_validation, y_pred)
metric.plotting()

In [None]:
%reload_ext tensorboard
%tensorboard --logdir='/content/drive/My Drive/efficientnet_b3'

#Xception

In [None]:
def xception():
    '''This function is used for building a model architecture of pretrained Xception on imagenet data set.'''
    xception_ = Xception(weights = 'imagenet',include_top = False, input_shape = (512,512,3) )
    x = global_average_pooling_layer(xception_.layers[-1].output)
    x = dropout_layer(x)
    output = dense_layer(x)
    model = Model(xception_.layers[0].input,output)
    model.compile(loss='binary_crossentropy', optimizer=tensorflow.keras.optimizers.Adam(lr=0.00005), metrics=['accuracy'])
    return model

In [None]:
xception_ = xception()
xception_.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 512, 512, 3) 0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 255, 255, 32) 864         input_2[0][0]                    
__________________________________________________________________________________________________
block1_conv1_bn (BatchNormaliza (None, 255, 255, 32) 128         block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_conv1_act (Activation)   (None, 255, 255, 32) 0           block1_conv1_bn[0][0]            
______________________________________________________________________________________________

  "The `lr` argument is deprecated, use `learning_rate` instead.")


In [None]:
tensorboard = TensorBoard(log_dir = '/content/drive/My Drive/xception')
kappa_metrics = Metrics('/content/drive/MyDrive/xception/xception.h5')
xception_ = xception()
history = xception_.fit(x_train, labels_train, batch_size=8,
                    steps_per_epoch=len(x_train) / 8,
                    epochs=30,
                    initial_epoch=0,
                    verbose=1,
                    validation_data=(x_validation, labels_validation),
                    validation_steps=len(x_validation) / 8,
                    callbacks=[kappa_metrics,tensorboard])

In [None]:
xception_ = xception()
xception_.load_weights("/content/drive/My Drive/models/xception.h5")
result1 = xception_.evaluate(x_validation,labels_validation, verbose = 2)
y_pred = xception_.predict(x_validation, batch_size = 8)
print("After running the model for 30 epochs we got loss = {} Accuracy = {} kappa_score = {} on validation data".format(np.round(result1[0],4),np.round(result1[1],4),np.round(kappa_metric(labels_validation,y_pred),4)))

In [None]:
ytrain_xception = xception_.predict(x_train)
ytrain_xception = test_prediction(ytrain_xception)
print("First five data points predictions in training:",ytrain_xception[:5])
print("length of traindata prediction:",ytrain_xception.shape,"\n")

yvalidation_xception = xception_.predict(x_validation)
yvalidation_xception = test_prediction(yvalidation_xception)
print("First five data points predictions in validation:",yvalidation_xception[:5])
print("length of validation data prediction:",yvalidation_xception.shape,"\n")

ytest_xception = xception_.predict(x_test)
ytest_xception = test_prediction(ytest_xception)
print("First five data points predictions in test:",ytest_xception[:5])
print("length of test data prediction:",ytest_xception.shape)

In [None]:
metric = PerformanceMetric(labels_validation, y_pred)
metric.plotting()

In [None]:
%reload_ext tensorboard
%tensorboard --logdir='/content/drive/My Drive/xception'