In [None]:
%matplotlib inline

# To show model architecture, might need to install these
# !pip install pydot
# !pip install pydotplus
# !pip install graphviz
from tensorflow.keras.utils import plot_model

from typing import Tuple
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import cv2
import sklearn
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import pickle as pk

mpl.rc("axes", labelsize=14)
mpl.rc("xtick", labelsize=12)
mpl.rc("ytick", labelsize=12)

import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.callbacks import EarlyStopping

from cascid.datasets.pad_ufes.database import get_df, get_train_test_images_raw


FORCE_TRAIN_MODELS = True


In [None]:
def process_image(filename: str) -> np.ndarray:
    '''
    Placeholder fucntion to process images, currently only reads from file, then resizes and grayscales.
    '''
    # Smallest size in dataset is 147x147
    img = cv2.imread("../../data/images/"+filename)
    return cv2.cvtColor(cv2.resize(img, (100,100)), cv2.COLOR_BGR2GRAY)

def plot_confusion_matrix(y_true, y_pred, classes,
                          normalize=True,
                          title=None,
                          cmap=plt.cm.Greens, save_to_file = False):
    if not title:
        if normalize:
            title = 'Normalized confusion matrix'
        else:
            title = 'Confusion matrix, without normalization'

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        #print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    #print(cm)

    fig, ax = plt.subplots(figsize = (9,9))
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    # ax.figure.colorbar(im, ax=ax)
    ax.set(xticks=np.arange(cm.shape[1]),
           yticks=np.arange(cm.shape[0]),
           xticklabels=classes, yticklabels=classes,
           title=title,
           ylabel='True label',
           xlabel='Predicted label')

    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
             rotation_mode="anchor")

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, format(cm[i, j], fmt),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()
    if save_to_file:
        plt.savefig('Assets/files/' + title + '.pdf')
    return ax

In [None]:
df = get_df()
df

In [None]:
df["diagnostic"].value_counts().plot.pie();

In [None]:
df['is_cancer'] = df['diagnostic'].apply(lambda s: "Cancer" if s in ["MEL", "BCC", "SCC"] else "Not")
df["is_cancer"].value_counts().plot.pie();

In [None]:
x_train, x_test, y_train, y_test = get_train_test_images_raw()

In [None]:
MulticlassEnconder = OneHotEncoder(sparse=False)
BinaryClassEnconder = OneHotEncoder(sparse=False)
Y = MulticlassEnconder.fit_transform(y_train)
Y_binary = BinaryClassEnconder.fit_transform(np.array(list(map(lambda x: "Cancer" if x in ['BCC', 'MEL', 'SCC'] else "Not", y_train))).reshape(-1,1))

In [None]:
def show_results(model: Sequential, history: keras.callbacks.History, y_test: np.ndarray, y_pred: np.ndarray, encoder: OneHotEncoder, cache_img_filename="cache.png") -> plt.figure:
    plot_model(model, show_shapes=True, show_layer_activations=True, to_file=cache_img_filename)
    fig = plt.figure(figsize=(10,30))
    
    ax = fig.add_subplot(121)
    ax.imshow(cv2.imread(cache_img_filename)[:,:,::-1])
    ax.axis('off')

    pd.DataFrame(history.history).plot(figsize=(8, 5))
    plt.grid(True)
    plt.xlabel("Epochs")
    plt.ylabel("Categorical Crossentropy (loss)")
    plt.ylim((0,10))
    plt.title("Model History")
    plt.show()
    print("Examples of predictions:\n {}".format(y_pred[:3]))
    y_pred_classes = encoder.inverse_transform(y_pred)
    ax = plot_confusion_matrix(y_true=encoder.inverse_transform(y_test), y_pred=y_pred_classes, classes=list(set(y_test.flatten())))
    fig.axes.append(ax)

    return fig

In [None]:
model = Sequential()
model.add(keras.layers.Input(x_train[0].shape))
model.add(Conv2D(64, kernel_size=(7, 7), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Conv2D(32, kernel_size=(5, 5), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Conv2D(16, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(6, activation='softmax'))

model.compile(optimizer='adam', loss=keras.losses.CategoricalCrossentropy(), metrics=["accuracy"])    
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=30, restore_best_weights=True)
history = model.fit(x_train, Y, epochs=1000, batch_size=64, validation_split=.2, callbacks=es, verbose=2)

In [None]:
# if FORCE_TRAIN_MODELS:
#     history, y_test, y_pred, model = model_run(X,Y_binary)
#     model.save("binary_cnn")
#     show_results(model, history, y_test, y_pred, BinaryClassEnconder, "cnn_simple_binary.png")

In [None]:
y_pred_classes = MulticlassEnconder.inverse_transform(y_pred)
y_true = MulticlassEnconder.inverse_transform(y_test).flatten()
ax = plot_confusion_matrix(y_true=y_true, y_pred=y_pred_classes, classes=list(set(y_true)), cmap="Blues", title="CNN confusion matrix")

In [None]:
x_train = f["x_train"]
x_test = f["x_test"]
y_train = f["y_train"]
y_test = f["y_test"]



In [None]:
model.evaluate(x_train, y_train)

In [None]:
model.evaluate(x_test, y_test)