# Identificación de Retinopatía Diabética con MobileNet

In [1]:
from sklearn.metrics import classification_report

from tesis_lib.io.hdf5datasetgenerator import HDF5DatasetGenerator
from tesis_lib.datasets.rosenbrock_loader import RosenbrockLoader
from tesis_lib.callbacks.trainingmonitor import TrainingMonitor
from tesis_lib.preprocessing.imagetoarrayprocessor import ImageToArrayPreprocessor
from tesis_lib.preprocessing.aspectawareprocessor import AspectAwareProcessor
from tesis_lib.preprocessing.patchpreprocessor import PatchPreprocessor
from tesis_lib.preprocessing.meanpreprocessor import MeanPreprocessor
from tesis_lib.preprocessing.simplepreprocessor import SimpleProcessor

import tensorflow as tf

from tensorflow.keras import backend as K
from tensorflow.keras.models import load_model
from tensorflow.keras.applications import MobileNetV2, VGG16
from tensorflow.keras.callbacks import LearningRateScheduler, ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam

import matplotlib.pyplot as plt
from datetime import datetime
import numpy as np
import progressbar
import json
import cv2
import os

2023-02-01 15:13:57.750766: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-02-01 15:13:57.893632: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [2]:
TIME_STAMP = datetime.today().strftime("%d_%m_%Y_%H_%M")

In [3]:
DATASET_PATH = './DB'

FILENAME = f'ident_vgg16_{TIME_STAMP}'

MODEL_OUT_PATH = f'./models/{FILENAME}.h5'
OUTPUT_PATH = F'./output/{FILENAME}'

BATCH_SIZE = 32
EPOCHS = 20
IM_SIZE = 512
NUM_CLASSES = 2

LEARNING_RATE = 1e-3
DROP_FACTOR = 0.10
DROP_EPOCHS = 10

In [4]:
report_dict = {
    'learning-rate' : str(LEARNING_RATE),
    'epochs': EPOCHS,
    'image-size': IM_SIZE,
    'batch-size': BATCH_SIZE,
    'data': {}
}

In [5]:
train_batches = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.vgg16.preprocess_input,
    # rescale=1./255,
    rotation_range=25, 
    shear_range=0.20, 
    zoom_range=0.15,
    width_shift_range=0.1,
    height_shift_range=0.1, 
    horizontal_flip=True, 
    fill_mode='nearest'
).flow_from_directory(
    directory=os.path.sep.join([DATASET_PATH, 'Training']),
    target_size=(IM_SIZE,IM_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True,
    keep_aspect_ratio=True
)


val_batches = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.vgg16.preprocess_input,
    # rescale=1./255,
).flow_from_directory(
    directory=os.path.sep.join([DATASET_PATH, 'Validation']),
    target_size=(IM_SIZE,IM_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False,
    keep_aspect_ratio=True
)
test_batches = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.vgg16.preprocess_input,
    # rescale=1./255,
).flow_from_directory(
    directory=os.path.sep.join([DATASET_PATH, 'Test']),
    target_size=(IM_SIZE,IM_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False,
    keep_aspect_ratio=True
)

Found 752 images belonging to 2 classes.
Found 640 images belonging to 2 classes.
Found 640 images belonging to 2 classes.


In [6]:
if not os.path.exists(OUTPUT_PATH):
    os.mkdir(OUTPUT_PATH)

In [7]:
print("[INFO] Compiling model ...")

opt = Adam(lr=LEARNING_RATE)

DROPOUT_RATE = 0.5
REG_FACTOR = 0.0002

model = VGG16(
    input_shape=(IM_SIZE,IM_SIZE,3),
    # alpha=1.0,
    include_top=True,
    weights=None,
    pooling='max',
    classes=NUM_CLASSES,
    classifier_activation='softmax'
)

model.compile(
    loss="categorical_crossentropy",
    optimizer=opt,
    metrics=["accuracy"]
)

monitor_path = os.path.sep.join([OUTPUT_PATH,f"{FILENAME}.jpg"])

checkpoint = ModelCheckpoint(
    MODEL_OUT_PATH, 
    monitor="val_loss", 
    mode='min', 
    save_best_only=True,
    verbose=1)

callbacks = [
    checkpoint,
    # LearningRateScheduler(step_decay),
    TrainingMonitor(monitor_path)
]

display(model.summary())

print(f"[INFO] training model ...")

H = model.fit(
    x=train_batches,
    steps_per_epoch=len(train_batches),
    validation_data=val_batches,
    validation_steps=len(val_batches),
    epochs= EPOCHS,
    max_queue_size=10,
    callbacks=callbacks,
    verbose=1
)

2023-02-01 15:14:00.258319: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


[INFO] Compiling model ...
Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 512, 512, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 512, 512, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 512, 512, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 256, 256, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 256, 256, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 256, 256, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None

None

[INFO] training model ...
Epoch 1/20


2023-02-01 15:14:29.956226: W tensorflow/tsl/framework/bfc_allocator.cc:479] Allocator (mklcpu) ran out of memory trying to allocate 2.00GiB (rounded to 2147483648)requested by op sub_106
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2023-02-01 15:14:29.957290: I tensorflow/tsl/framework/bfc_allocator.cc:1034] BFCAllocator dump for mklcpu
2023-02-01 15:14:29.957311: I tensorflow/tsl/framework/bfc_allocator.cc:1041] Bin (256): 	Total Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.
2023-02-01 15:14:29.957314: I tensorflow/tsl/framework/bfc_allocator.cc:1041] Bin (512): 	Total Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.
2023-02-01 15:14:29.957316: I tensorflow/tsl/framework/bfc_allocator.cc:1041] Bin (1024): 	To

In [None]:
report_dict['optimizer'] = {k:str(v) for k,v in opt.get_config().items()}
report_dict['checkpoint'] = {
    'monitor': checkpoint.monitor,
    'best-loss': str(checkpoint.best)
}
# report_dict['regularization-factor'] = REG_FACTOR
# report_dict['dropout-rate'] = DROPOUT_RATE

## Making predictions and evaluating the model

In [None]:
import itertools

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.figure()
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
            horizontalalignment="center",
            color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    MATRIX_PATH = os.path.sep.join([OUTPUT_PATH, f'cm-{FILENAME}.jpg'])
    plt.savefig(MATRIX_PATH)
    plt.show()

In [None]:
from sklearn.metrics import confusion_matrix, classification_report, class_likelihood_ratios

# from tesis_lib.io.hdf5datasetgenerator import HDF5DatasetGenerator
# from tesis_lib.preprocessing.simplepreprocessor import SimpleProcessor
# from tesis_lib.preprocessing.meanpreprocessor import MeanPreprocessor
# from tesis_lib.preprocessing.croppreprocessor import CropPreprocessor
# from tesis_lib.preprocessing.imagetoarrayprocessor import ImageToArrayPreprocessor

from tesis_lib.utils.ranked import rank5_accuracy
from tensorflow.keras.models import load_model
import matplotlib.pyplot as plt
# import numpy as np
# import progressbar
# import json

BATCH_SIZE = 32
IM_SIZE = 256
NUM_CLASSES = 2

# means = json.loads(open('./DB/hdf5/diat_ret.json').read())

# sp = SimpleProcessor(IM_SIZE, IM_SIZE)
# mp = MeanPreprocessor(means["R"], means["G"], means["B"])
# iap = ImageToArrayPreprocessor()

# load the pretrained network
print(f"[INFO] loading model {MODEL_OUT_PATH}...")
model = load_model(MODEL_OUT_PATH)

# initialize the testing dataset generator, then make predictions on
# the testing data
print("[INFO] predicting on test data (no crops)...")

# testGen = HDF5DatasetGenerator(
# 	'./DB/hdf5/Testing.hdf5',
# 	BATCH_SIZE,
# 	preprocessors=[sp, mp, iap],
# 	classes=NUM_CLASSES)

predictions = model.predict(test_batches)

trueLabels = test_batches.classes
predictedLabels = list(predictions.argmax(axis=1))

# compute the rank-1 and rank-5 accuracies
(rank1, _) = rank5_accuracy(predictions, trueLabels)
print("[INFO] rank-1: {:.2f}%".format(rank1 * 100))

# testGen.close()

print("\n\n[INFO] Calculating Confusion Matrix")
cm = confusion_matrix(
    y_true = trueLabels,
    y_pred = predictedLabels
)
print(cm)
plot_confusion_matrix(cm, ['No DR',' DR'], False, "Confusion Matrix")

print("[INFO] Generating the Classification Report")
print(classification_report(
	y_true = trueLabels,
	y_pred = predictedLabels,
	target_names= ['No DR', 'DR']
))

print("[INFO] Calculating the class likelihood ratios")

ratios = class_likelihood_ratios(
	y_true = trueLabels,
	y_pred = predictedLabels
)

print(ratios)

In [None]:
# Saving a report of the parameters used for training

JSON_PATH = os.path.sep.join([OUTPUT_PATH, f'{FILENAME}.json'])

with open(JSON_PATH, "w") as f:
    f.write(json.dumps(report_dict, indent=1))