In [None]:
import tensorflow as tf
import keras
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


import os
import time
import IPython

import h5py
import mitdeeplearning as mdl

import functools
from tqdm import tqdm

In [None]:
CWD = os.getcwd()
# print(CWD)
keras.backend.clear_session()

In [None]:
### Functions: Load and Visualize Datasets ###

### Function: load dataset ###
@keras.saving.register_keras_serializable(package='capstone',name='load_dataset')
def load_dataset(path_to_training_data):
    with h5py.File(path_to_training_data) as f:
        # Print the keys (names) of all groups and datasets in the file
        print("Keys:", list(f.keys()))

        # Iterate through each key and print more detailed information
        for key in f.keys():
            if isinstance(f[key], h5py.Dataset):
                print(f"Dataset: {key}")
                print("  Shape:", f[key].shape)
                print("  Data type:", f[key].dtype)
                
    ### Instantiate Loader Function ###
    return mdl.lab2.TrainingDatasetLoader(path_to_training_data)

### Function: visualize dataset ###
@keras.saving.register_keras_serializable(package='capstone',name='visualize_dataset')
def visualize_dataset(path_to_training_data, loader):
    ### Visualize our data ###
    number_of_training_examples = loader.get_train_size()
    print(number_of_training_examples)
    (images, labels) = loader.get_batch(100)
    malignant_images = images[np.where(labels==1)[0]]
    benign_images = images[np.where(labels==0)[0]]

    idx_malignant = 23
    idx_benign = 9

    plt.figure(figsize=(5,5))
    plt.subplot(1, 2, 1)
    plt.imshow(malignant_images[idx_malignant])
    plt.title("Malignant"); plt.grid(False)

    plt.subplot(1, 2, 2)
    plt.imshow(benign_images[idx_benign])
    plt.title("Benign"); plt.grid(False)

In [None]:
### Functions: Model Definitions ###

### Standard CNN ###

# Helper Functions

@keras.saving.register_keras_serializable(package='capstone', name='resize_images')
def resize_images(x):
    return tf.image.resize(x, (64, 64))

# CNN Function
@keras.saving.register_keras_serializable(package='capstone', name='make_standard_ResNet50_V2')
def make_standard_ResNet50_V2(n_outputs = 1):
    
    Resize = tf.keras.layers.Lambda(resize_images)
    Flatten = tf.keras.layers.Flatten
    Dense = functools.partial(tf.keras.layers.Dense, activation='relu')
    ResNet50V2 = tf.keras.applications.ResNet50V2(
        include_top=False,
        weights="imagenet", # Utilizing Transfer Learning, also maintains consistency
        input_tensor=None,
        input_shape=(64,64,3),
        pooling=None,
        classes=1000,
        classifier_activation="softmax",
    )
    ResNet50V2 = tf.keras.Model(inputs = ResNet50V2.layers[1].input, 
                                outputs = ResNet50V2.layers[-1].output)

    model = tf.keras.Sequential()
    
    model.add(Resize)
    model.add(ResNet50V2)
    model.add(Flatten())
    model.add(Dense(512))
    model.add(Dense(n_outputs, activation=None))

    return model

### DB-VAE ###

### Define Decoder Network ###
@keras.saving.register_keras_serializable(package='capstone', name='make_decoder_network')
def make_decoder_network(latent_dim = 100, n_filters = 12 ):
    """
    Layer Types, Functional Definition
    """
    Conv2DTranspose = functools.partial(tf.keras.layers.Conv2DTranspose, padding='same', activation='relu')
    Dense = functools.partial(tf.keras.layers.Dense, activation='relu')
    Reshape = tf.keras.layers.Reshape 
    BatchNormalization = tf.keras.layers.BatchNormalization
    LeakyReLU = tf.keras.layers.LeakyReLU
    # Decoder
    decoder = tf.keras.Sequential([
        Dense(units=4*4*6*n_filters),
        Reshape(target_shape=(4,4,6*n_filters)),

        Conv2DTranspose(256, (4, 4), strides=(2, 2), padding='same'),
        BatchNormalization(),
        LeakyReLU(alpha=0.2),

        Conv2DTranspose(128, (4, 4), strides=(2, 2), padding='same'),
        BatchNormalization(),
        LeakyReLU(alpha=0.2),

        Conv2DTranspose(64, (4, 4), strides=(2, 2), padding='same'),
        BatchNormalization(),
        LeakyReLU(alpha=0.2),

        Conv2DTranspose(3, (4, 4), strides=(2, 2), padding='same', activation='sigmoid')
    ])
    
    return decoder

### DB_VAE Helper Functions ###


### VAE Reparameterization ###
@keras.saving.register_keras_serializable(package='capstone', name='sampling_VAE_reparameterization')
def sampling(z_mean, z_logsigma):
    batch, latent_dim = z_mean.shape
    epsilon = tf.random.normal(shape=(batch, latent_dim))
    z = z_mean + tf.math.exp(0.5 * z_logsigma) * epsilon
    return z

### Defining the VAE loss function ###
@keras.saving.register_keras_serializable(package='capstone', name='vae_loss_function')
def vae_loss_function(x, x_recon, mu, logsigma, kl_weight=0.0005):
  latent_loss = 0.5 * tf.reduce_sum(tf.exp(logsigma) + tf.square(mu) - 1.0 - logsigma, axis=1)
  reconstruction_loss = tf.reduce_mean(tf.abs(x-x_recon), axis=(1,2,3))
  vae_loss = kl_weight * latent_loss + reconstruction_loss
  return vae_loss

### Loss function for DB-VAE ###
@keras.saving.register_keras_serializable(package='capstone',name='debiasing_loss_function')
def debiasing_loss_function(x, x_pred, y, y_logit, mu, logsigma):
  vae_loss = vae_loss_function(x, x_pred, mu, logsigma)
  classification_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=y_logit)
  malignance_indicator = tf.cast(tf.equal(y, 1), tf.float32)
  total_loss = tf.reduce_mean(
      classification_loss +
      malignance_indicator * vae_loss
  )
  return total_loss, classification_loss

### Defining and creating the DB-VAE ###
@keras.saving.register_keras_serializable(package='capstone')
class DB_VAE(tf.keras.Model):
  def __init__(self, latent_dim):
    super(DB_VAE, self).__init__()
    self.latent_dim = latent_dim

    # Define the number of outputs for the encoder. Recall that we have
    # `latent_dim` latent variables, as well as a supervised output for the
    # classification.
    num_encoder_dims = 2*self.latent_dim + 1

    self.encoder = make_standard_ResNet50_V2(num_encoder_dims)
    self.decoder = make_decoder_network()

  def encode(self, x):
    encoder_output = self.encoder(x)
    y_logit = tf.expand_dims(encoder_output[:, 0], -1)
    z_mean = encoder_output[:, 1:self.latent_dim+1]
    z_logsigma = encoder_output[:, self.latent_dim+1:]

    return y_logit, z_mean, z_logsigma

  def reparameterize(self, z_mean, z_logsigma):
    z = sampling(z_mean, z_logsigma)
    return z

  def decode(self, z):
    reconstruction = self.decoder(z)
    return reconstruction

  def call(self, x):
    y_logit, z_mean, z_logsigma = self.encode(x)
    z = self.reparameterize(z_mean, z_logsigma)
    recon = self.decode(z)
    return y_logit, z_mean, z_logsigma, recon

  def predict(self, x):
    y_logit, z_mean, z_logsigma = self.encode(x)
    return y_logit
  
### DB_VAE Training Helper Functions ###

# Function to return the means for an input image batch
@keras.saving.register_keras_serializable(package='capstone',name='get_latent_mu')
def get_latent_mu(images, dbvae, batch_size=1024, latent_dim=100):
    N = images.shape[0]
    mu = np.zeros((N, latent_dim))
    for start_ind in range(0, N, batch_size):
        end_ind = min(start_ind+batch_size, N+1)
        batch = (images[start_ind:end_ind]).astype(np.float32)/255.
        _, batch_mu, _ = dbvae.encode(batch)
        mu[start_ind:end_ind] = batch_mu
    return mu

@keras.saving.register_keras_serializable(package='capstone',name='get_training_sample_probabilities')
def get_training_sample_probabilities(images, dbvae, bins=10, smoothing_fac=0.001, latent_dim=100):
    print("Recomputing the sampling probabilities")
    mu = get_latent_mu(images, dbvae)
    training_sample_p = np.zeros(mu.shape[0])
    for i in range(latent_dim):
        latent_distribution = mu[:,i]
        hist_density, bin_edges =  np.histogram(latent_distribution, density=True, bins=bins)
        bin_edges[0] = -float('inf')
        bin_edges[-1] = float('inf')
        bin_idx = np.digitize(latent_distribution, bin_edges)
        hist_smoothed_density = hist_density + smoothing_fac
        hist_smoothed_density = hist_smoothed_density / np.sum(hist_smoothed_density)
        p = 1.0/(hist_smoothed_density[bin_idx-1])
        p = p / np.sum(p)
        training_sample_p = np.maximum(p, training_sample_p)
    training_sample_p /= np.sum(training_sample_p)

    return training_sample_p

In [None]:
### Instantiate Loaders ###
loaders = []
loader_20_ISIC_DiDI = load_dataset(f'{CWD}/datasets/split-80train-20test/test_ISIC_DiDI.h5')
loaders.append(loader_20_ISIC_DiDI)
loader_ISIC_DiDI = load_dataset(f'{CWD}/datasets/whole/ISIC_DiDI.h5')
loaders.append(loader_ISIC_DiDI)
loader_DiDI = load_dataset(f'{CWD}/datasets/DiDI/DiDI.h5')
loaders.append(loader_DiDI)

In [None]:
models = []

### Which models do you want? ###
subfolder = '2024-5-30-models_4'
year = 2024
month = 5
day = 30

In [None]:
for i in range(1, 7):
    models.append(keras.models.load_model(f'{CWD}/{subfolder}/Model_{i}.keras', safe_mode=False))

In [None]:
def evaluate(model, runs, batch_size, loader_test):
    ### Evaluation of model on test dataset (n = 526) ###

    ### Confusion Matrices ###
    """
    | TP  FP |
    | FN  TN |
    where TP is true positive
    FP is false positive
    FN is false negative
    TN is true negative
    Precision equals TP / (TP + FP)
    Recall equals TP / (FN + TP)
    harmonic mean of precision and recall gives F1 score
    F1 = 2 * (precision * recall) / (precision + recall)

    Sensitivity: TP / (FN + TP)

    Specificity: TN / (FP + TN)
    """

    from sklearn.metrics import confusion_matrix

    acc = np.empty(shape=[runs])
    spec = np.empty(shape=[runs])
    sens = np.empty(shape=[runs])
    pre = np.empty(shape=[runs])
    rec = np.empty(shape=[runs])
    f1s = np.empty(shape=[runs])
    for i in range(runs):
        # print(batch_x.shape) # (256, 64, 64, 3), these are the test images
        # print(batch_y.shape) # (256, 1), these are the test labels
        (batch_x, batch_y) = loader_test.get_batch(batch_size)

        ### Predict on the test batch ###
        """
        returns an EagerTensor when it's a DB-VAE but a numpy array when it's a CNN. 
        I don't know why.
        """
        
        y_pred_standard = tf.round(tf.nn.sigmoid(model.predict(batch_x)))
        acc_standard = tf.reduce_mean(tf.cast(tf.equal(batch_y, y_pred_standard), tf.float32))
   
        cm = confusion_matrix(batch_y, y_pred_standard)

        assert batch_size == np.sum(cm)
        
        tp = cm[0,0]
        fp = cm[0,1]
        fn = cm[1,0]
        tn = cm[1,1]
        
        accuracy, specificity, sensitivity, precision, recall, f1score = 0, 0, 0, 0, 0, 0

        ### Compute Accuracy: TP + FP divided by batch_size ###
        if (np.sum(cm) == 0):
            accuracy = np.nan
        else:
            accuracy = float(tp + tn) / float(batch_size)
            assert accuracy == acc_standard # sanity check

        ### Compute Sensitivity: TP divided by FN + TP ###
        ### Sensitivity is the same as Recall ###
        if (fn + tp == 0):
            sensitivity = np.nan
        else:
            sensitivity = float(tp) / float(tp + fn)
        recall = sensitivity

        ### Compute Specificity: TN divided by FP + TN ###
        if (tn + fp == 0):
            specificity = np.nan
        else:
            specificity = float(tn) / float(fp + tn)

        ### Compute Precision: TP divided by TP + FP ###
        if (tp + fp == 0):
            precision = np.nan
        else:
            precision = float(tp) / float(tp + fp)

        ### Compute F1-Score: two times the product of precision and recall, divided by the sum of precision and recall ###
        ### https://stackoverflow.com/questions/68796138/a-way-around-f1-score-calculation-when-recall-and-precision-is-zero-in-python ###
        if (precision + recall == 0 or precision == np.nan or recall == np.nan):
            f1score = np.nan
        else: 
            f1score = 2 * precision * recall / float(precision + recall)

        # print(cm)
        # print(f'accuracy: {accuracy}')
        # print(f'specificity: {specificity}')
        # print(f'sensitivity: {sensitivity}')
        # print(f'precision: {precision}')
        # print(f'F1-score: {f1score}')

        acc[i] = accuracy
        spec[i] = specificity
        sens[i] = sensitivity
        pre[i] = precision
        rec[i] = recall
        f1s[i] = f1score    
    
    return [acc.mean(), spec.mean(), sens.mean(), pre.mean(), rec.mean(), f1s.mean()]

In [None]:
results = np.empty(shape=[3, 6, 6]) # 3 test datasets, 6 models, 6 metrics per model

In [None]:
for i in range(3):
    for j in range(6):
        results[i][j] = evaluate(models[j], 30, 256, loaders[i])

In [None]:
print(results[0]) # test 20-ISIC-DiDI -> Good metric, though there is some overlap so model 2 will perform rlly good and model 5 should do well too.

In [None]:
print(results[1]) # test ISIC_DiDI -> good metric, though this really favors model 2 because it is literally Model 2's training dataset all over again, and a simple CNN will obviously do well on its training set

In [None]:
print(results[2]) # test DiDI -> decent metric, but unfairly decreases performance of models that aren't 2 and 5 because DDI's format is not the same as ISIC in terms of how the images were collected.

In [None]:
results_20_ISIC_DiDI = pd.DataFrame(data=results[0], 
                                    columns=[
                                        'accuracy',
                                        'specificity',
                                        'sensitivity',
                                        'precision',
                                        'recall',
                                        'f1score',                                             
                                             ]
                                    )

print(results_20_ISIC_DiDI)

In [None]:
results_ISIC_DiDI = pd.DataFrame(data=results[1], 
                                    columns=[
                                        'accuracy',
                                        'specificity',
                                        'sensitivity',
                                        'precision',
                                        'recall',
                                        'f1score',                                             
                                             ]
                                    )

print(results_ISIC_DiDI)

In [None]:
results_DiDI = pd.DataFrame(data=results[2], 
                                    columns=[
                                        'accuracy',
                                        'specificity',
                                        'sensitivity',
                                        'precision',
                                        'recall',
                                        'f1score',                                             
                                             ]
                                    )

print(results_DiDI)