### Libraries

In [1]:
import pickle
import os

#basic
import pandas as pd
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

#tensorflow and keras
from tensorflow import keras
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling2D, Flatten, MaxPooling2D, Dropout, Resizing, Rescaling, RandomBrightness, RandomContrast, RandomCrop, RandomFlip, RandomRotation
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras import Model
from keras.utils import load_img, img_to_array

#sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

#open cv
import cv2 as cv


from cascid.configs import config, pad_ufes
from cascid import database

# Local py script
#from model import *

### Globals

In [2]:
FERNANDO_PATH = config.DATA_DIR / 'experiments' / 'fernando'
FERNANDO_PATH.mkdir(exist_ok=True, parents=True)

IMAGE_CACHE = FERNANDO_PATH / 'img_cache.pkl'
FEATURES_FILE = FERNANDO_PATH / 'features.pkl'
MODEL_PATH = FERNANDO_PATH / 'models' / 'deep_learning'

IMDIR = pad_ufes.IMAGES_DIR # Can also be pad_ufes.IMAGES_DIR 

In [3]:
RANDOM_STATE = 42
TRAIN_SIZE = 0.7
VALIDATION_SIZE = 0.15
TEST_SIZE = 0.15
EPOCHS = 3000
IMAGE_SHAPE = (64, 64, 3)
BATCH_SIZE = 256

### Load images

In [4]:
def load_image(name: str):
    pil_img = load_img(
        str(IMDIR / name),
        grayscale=False,
        color_mode='rgb',
        target_size=(IMAGE_SHAPE[0], IMAGE_SHAPE[1]),
        interpolation='nearest',
        keep_aspect_ratio=False
    )

    return img_to_array(pil_img, dtype=np.uint8)

In [5]:
df = database.get_db()

In [6]:
df.shape

(2298, 26)

In [7]:
MulticlassEncoder = OneHotEncoder(sparse=False) # OHE for y encoding
Y = MulticlassEncoder.fit_transform(df[["diagnostic"]].to_numpy())
x_train, x_test, y_train, y_test = train_test_split(df["img_id"].to_numpy(), Y, test_size=0.2, random_state=RANDOM_STATE)
x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.2, random_state=RANDOM_STATE)

In [8]:
reader = lambda img_path_list : np.array(list(map(load_image, img_path_list)))

In [9]:
image_dict = {
        "train": reader(x_train),
        "test": reader(x_test),
        "valid": reader(x_valid)
    }

In [10]:
# Write image cache
with open(IMAGE_CACHE, 'wb') as file:
    pickle.dump(image_dict, file)
print("Read operations done, cache file available at {}".format(IMAGE_CACHE))

Read operations done, cache file available at /home/fernandofincatti/.cascid_data/experiments/fernando/img_cache.pkl


In [11]:
# Return to original variables
x_train = image_dict["train"]
x_test = image_dict["test"]
x_valid = image_dict["valid"]

### Model

In [12]:
input_layer = keras.Sequential([
    Rescaling(1./255), # Rescale from 0 to 255 UINT8 to 0 to 1 float.
])

2022-09-18 11:18:19.962389: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-18 11:18:19.973668: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-18 11:18:19.973971: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-18 11:18:19.974437: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

In [13]:
augmentor = keras.Sequential([
    RandomBrightness(factor=(-0.3, 0.3), value_range=(0.0, 1.0), seed=RANDOM_STATE), # Randomly change brightness anywhere from -30% to +30%
    RandomContrast(factor=0.5, seed=RANDOM_STATE), # Randomly change contrast anywhere from -30% to +30%
    RandomFlip(mode="horizontal_and_vertical", seed=RANDOM_STATE), # Randomly flip images either horizontally, vertically or both
    RandomRotation(factor=(-0.3, 0.3), fill_mode="nearest", interpolation="bilinear", seed=RANDOM_STATE), # Randomly rotate anywhere from -30% * 2PI to +30% * 2PI, filling gaps by using 'nearest' strategy
])

In [14]:
resnet = keras.applications.ResNet101(
    weights='imagenet',
    input_shape=IMAGE_SHAPE,
    pooling='avg',
    include_top=False
)

In [15]:
resnet.trainable = False  #to make sure it's not being trained
# Augmentation only on training
feature_extractor_train = keras.Sequential([
    input_layer,
    augmentor,
    resnet
])

In [16]:
# Test/Validation only get rescaled
feature_extractor_test_valid = keras.Sequential([
    input_layer,
    resnet
])

In [17]:
features_train = feature_extractor_train(x_train)
features_valid = feature_extractor_test_valid(x_valid)
features_test = feature_extractor_test_valid(x_test)

2022-09-18 11:18:23.021720: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8101


In [18]:
features = {
        "train": features_train.numpy(),
        "test": features_test.numpy(),
        "valid": features_valid.numpy(),
        "y_train": y_train,
        "y_test": y_test,
        "y_valid": y_valid,
    }

In [19]:
with open(FEATURES_FILE, 'wb') as file:
        pickle.dump(features, file)

### Training

In [20]:
with open(FEATURES_FILE, 'rb') as file:
        features = pickle.load(file)

In [21]:
x_train = features["train"]
x_test = features["test"]
x_valid = features["valid"]
y_train = features["y_train"]
y_test = features["y_test"]
y_valid = features["y_valid"]

In [22]:
model = keras.Sequential([
    Input(shape = features["train"].shape[1]),
    Dense(128, activation='relu'),
    Dropout(0.1),
    Dense(128),
    Dropout(0.1),
    Dense(64),
    Dropout(0.1),
    Dense(y_train.shape[1], activation='softmax')
])

In [23]:
model.compile(optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy'])

In [24]:
early_stopping = EarlyStopping(
    monitor='val_accuracy',
    mode='max',
    verbose=1,
    patience=200,
    restore_best_weights=True
)

In [25]:
training_history = model.fit(
    features["train"],
    y_train,
    epochs=EPOCHS,
    validation_split=0.1,
    batch_size=BATCH_SIZE,
    callbacks=[early_stopping]
)


Epoch 1/3000
Epoch 2/3000
Epoch 3/3000
Epoch 4/3000
Epoch 5/3000
Epoch 6/3000
Epoch 7/3000
Epoch 8/3000
Epoch 9/3000
Epoch 10/3000
Epoch 11/3000
Epoch 12/3000
Epoch 13/3000
Epoch 14/3000
Epoch 15/3000
Epoch 16/3000
Epoch 17/3000
Epoch 18/3000
Epoch 19/3000
Epoch 20/3000
Epoch 21/3000
Epoch 22/3000
Epoch 23/3000
Epoch 24/3000
Epoch 25/3000
Epoch 26/3000
Epoch 27/3000
Epoch 28/3000
Epoch 29/3000
Epoch 30/3000
Epoch 31/3000
Epoch 32/3000
Epoch 33/3000
Epoch 34/3000
Epoch 35/3000
Epoch 36/3000
Epoch 37/3000
Epoch 38/3000
Epoch 39/3000
Epoch 40/3000
Epoch 41/3000
Epoch 42/3000
Epoch 43/3000
Epoch 44/3000
Epoch 45/3000
Epoch 46/3000
Epoch 47/3000
Epoch 48/3000
Epoch 49/3000
Epoch 50/3000
Epoch 51/3000
Epoch 52/3000
Epoch 53/3000
Epoch 54/3000
Epoch 55/3000
Epoch 56/3000
Epoch 57/3000
Epoch 58/3000
Epoch 59/3000
Epoch 60/3000
Epoch 61/3000
Epoch 62/3000
Epoch 63/3000
Epoch 64/3000
Epoch 65/3000
Epoch 66/3000
Epoch 67/3000
Epoch 68/3000
Epoch 69/3000
Epoch 70/3000
Epoch 71/3000
Epoch 72/3000
E

In [26]:
model.save(MODEL_PATH)

INFO:tensorflow:Assets written to: /home/fernandofincatti/.cascid_data/experiments/fernando/models/deep_learning/assets


In [27]:
with open(MODEL_PATH / 'history.pkl', 'wb') as fl:
    pickle.dump(training_history.history, fl)
training_history = training_history.history

In [28]:
model.evaluate(x=x_test, y=y_test)



[1.3240433931350708, 0.49130433797836304]

In [29]:
from sklearn.metrics import confusion_matrix


In [30]:
def plot_confusion_matrix(y_true, y_pred, classes,
                          normalize=True,
                          title=None,
                          cmap=plt.cm.Greens, save_to_file = False):
    if not title:
        if normalize:
            title = 'Normalized confusion matrix'
        else:
            title = 'Confusion matrix, without normalization'

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        #print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    #print(cm)

    fig, ax = plt.subplots(figsize = (16,16))
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.figure.colorbar(im, ax=ax)
    ax.set(xticks=np.arange(cm.shape[1]),
           yticks=np.arange(cm.shape[0]),
           xticklabels=classes, yticklabels=classes,
           title=title,
           ylabel='True label',
           xlabel='Predicted label')

    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
             rotation_mode="anchor")

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, format(cm[i, j], fmt),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()
    if save_to_file:
        plt.savefig('Assets/files/' + title + '.pdf')
    return ax