In [None]:
import SimpleITK as sitk
import numpy as np
import tensorflow as tf
import math
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix


In [None]:
def plot_histogram(data_col, title, xlabel, ylabel, x_range=None):
    """
    Plot histogram showing the distribution of patient ages.

    Parameters:
    data (Series): Pandas Series containing patient ages.
    """
    plt.figure(figsize=(10, 6))
    sns.histplot(data_col, bins=20, kde=True, color='skyblue')
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    if x_range:
        plt.xlim(x_range)
    plt.show();

# Image Data Preprocessing

In [None]:
def forresnet(image_sample):
    """
    Preprocess the image for ResNet-50 model

    """
    # Assuming images[0] is a SimpleITK image
    image_sitk = image_sample


    # # Selecting a single slice (e.g., the first slice)
    # single_slice = image_sitk[0, :, :]

    # Convert SimpleITK image to numpy array
    image_array = sitk.GetArrayFromImage(image_sitk)  #single_slice
    slice_index = 0
    image_array = image_array[slice_index, :, :]
    # print("Original shape:", image_array.shape)  # (23, 384)

    # Reshape to add an extra dimension
    image_array = np.expand_dims(image_array, axis=-1)
    # print("Shape after adding dimension:", image_array.shape)  # (23, 384, 1)

    # Resize the image to (224, 224)
    image_array = tf.image.resize(image_array, (224, 224))
    # print("Resized shape:", image_array.shape)  # (224, 224, 1)

    # Remove the extra dimension
    image_array = tf.squeeze(image_array, axis=-1)
    # print("Shape after squeezing:", image_array.shape)  # (224, 224)

    # Convert to grayscale NOT NEEDED BECAUSE IT IS ALREADY IN GRAYSCALE
    # image_array = tf.image.rgb_to_grayscale(image_array)
    # print("Shape after converting to grayscale:", image_array.shape)  # (224, 224, 1)

    # Stack to create a 3-channel image
    image_array = tf.stack([image_array] * 3, axis=-1)
    # print("Shape after stacking:", image_array.shape)  # (224, 224, 3)

    # Normalize the image
    image_array = image_array / 255.0
    # print("Shape after normalization:", image_array.shape)

    # # Add batch dimension
    # image_array = tf.expand_dims(image_array, axis=0)
    # print("Final shape with batch dimension:", image_array.shape)

    return image_array

# Model Evalution

In [None]:
def evaluate_model(model,threshold, X_train, y_train, X_val, y_val, X_test = None, y_test = None, include_test = False):
    def calculate_metrics(X, y):
        # Predict
        y_pred = model.predict(X)
        y_pred_binary = (y_pred > threshold).astype('int32')  # Convert probabilities to binary predictions

        # Calculate metrics
        accuracy = accuracy_score(y, y_pred_binary)
        precision = precision_score(y, y_pred_binary, zero_division = 0)
        recall = recall_score(y, y_pred_binary)
        f1 = f1_score(y, y_pred_binary)
        conf_matrix = confusion_matrix(y, y_pred_binary)

        return accuracy, precision, recall, f1, conf_matrix

    # Calculate metrics for train and validation sets
    train_metrics = calculate_metrics(X_train, y_train)
    val_metrics = calculate_metrics(X_val, y_val)

    # If include_test is True, calculate metrics for the test set
    test_metrics = None
    if include_test:
        test_metrics = calculate_metrics(X_test, y_test)

    # Create a DataFrame for metrics
    metrics_df = pd.DataFrame([train_metrics, val_metrics, test_metrics],
                              columns = ['Accuracy', 'Precision', 'Recall', 'F1 Score', 'Confusion Matrix'])

    # Add a column for set type
    metrics_df['Set Type'] = ['Train', 'Validation', 'Test']
    metrics_df = metrics_df.set_index('Set Type')


    # Remove the confusion matrix column from the DataFrame
    metrics_df = metrics_df.drop('Confusion Matrix', axis = 1)

    # Filter out the test row if include_test is False
    if not include_test:
        metrics_df = metrics_df.drop('Test', axis = 0)


    display(metrics_df)

    ### Plot confusion matrices
    if include_test == False:
        conf_matrices = [train_metrics[-1], val_metrics[-1]]
        titles = ['Train', 'Validation']
        num_plots = 2
    else:
        conf_matrices = [train_metrics[-1], val_metrics[-1], test_metrics[-1]]
        titles = ['Train', 'Validation', 'Test']
        num_plots = 3


    fig, axes = plt.subplots(1, num_plots, figsize = (5 * num_plots, 5))


    for i, (conf_matrix, title) in enumerate(zip(conf_matrices, titles)):
        sns.heatmap(conf_matrix, annot = True, fmt = 'd', cmap = 'Blues', ax = axes[i])
        axes[i].set_title(f'Confusion Matrix - {title}')

    plt.tight_layout()
    plt.show()


    return metrics_df