Import required modules

In [1]:
import gc
import json
import math
import numpy as np
import os
import patient_data
import tensorflow as tf

from sklearn.metrics import classification_report
from sklearn.model_selection import StratifiedKFold
from sklearn.utils import shuffle
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Flatten
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam

Access the folder path for the cancer and the non-cancer images

In [2]:
all_paths = json.loads(open("./paths.json").read())

personal_path = all_paths['personal_path']
cancerous_path = personal_path + all_paths['cancerous_path']

Configure GPUs is applicable

In [3]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
print('Num GPUs Available: ', len(physical_devices))
if len(physical_devices) > 0:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)

Num GPUs Available:  0


Load in all the DICOM files and preprocess/label images

In [4]:
# Using the patient_data data structure, load in all the patient data and save it in a dictionary with the folder name as the key
def load_all_patients(path, add_label = False):
    patients = np.array([])
    folder = os.listdir(path)
    for name in folder:
        patients= np.append(patients, patient_data.Patient(os.path.join(path, name)))
        if add_label:
            if patients[-1].segpath == None:
                print(name, "was not processed correctly")
                patients.pop()
            else:
                patients[-1].label_imgs()
    return patients

patients = load_all_patients(cancerous_path, True)
patients = np.append(patients, patients)
print(patients)


[<patient_data.Patient object at 0x0000024B198902F0>
 <patient_data.Patient object at 0x0000024B2F63EE70>
 <patient_data.Patient object at 0x0000024B43057380>
 <patient_data.Patient object at 0x0000024B198902F0>
 <patient_data.Patient object at 0x0000024B2F63EE70>
 <patient_data.Patient object at 0x0000024B43057380>]


Setting up train/test data

In [5]:
# # create a list for only the cancerous dataset data
# x_c = []
# y_c = []

# for patient in patients:
#     for i, img in enumerate(patient.ct.data.values()):
#         x_c.append(img)
#         y_c.append(patient.labels[i])


# x_c, y_c = shuffle(x_c, y_c)
patients = shuffle(patients)
# Train-test split should be 80-20. 
# Since the data has been shuffled, we can just grab the 1st 80% of the list and make it the train set and the remainder is the test set
train_patients = patients[:math.floor(len(patients) * 0.8)]
test_patients = patients[math.floor(len(patients) * 0.8):]
print(train_patients)
print(test_patients)

[<patient_data.Patient object at 0x0000024B2F63EE70>
 <patient_data.Patient object at 0x0000024B198902F0>
 <patient_data.Patient object at 0x0000024B43057380>
 <patient_data.Patient object at 0x0000024B2F63EE70>]
[<patient_data.Patient object at 0x0000024B43057380>
 <patient_data.Patient object at 0x0000024B198902F0>]


Create a custom callback to clear any memory that is no longer being used

In [6]:
class MyCustomCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        gc.collect
        tf.keras.backend.clear_session()

In [17]:

# Define K-Fold Cross-Validation
n_splits = 3
kfold = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
# x_data = np.array(x_c)/255  # Normalize the images
# y_data = np.array(y_c)
# Model training and evaluation loop
fold_results = []

for fold, (train_idx, val_idx) in enumerate(kfold.split(train_patients, [0]*len(train_patients))):
    print(f"\nTraining fold {fold + 1}/{n_splits}")
    
    # Split data
    p_train, p_val = train_patients[train_idx], train_patients[val_idx]
    print('point a passed')

    # Build the model
    model = Sequential([
        Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same', input_shape=(512, 512, 1)),
        MaxPool2D(pool_size=(2, 2), strides=2),
        Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'),
        MaxPool2D(pool_size=(2, 2), strides=2),
        Flatten(),
        Dense(units=1, activation='sigmoid')
    ])
    model.compile(
        optimizer=Adam(learning_rate=0.0001),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    print('point b passed')

    early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
    x_train = []
    y_train = np.array([])
    for p in p_train:
        x_train.extend(y for y in p.ct.data.values())
        y_train = np.append(y_train, p.labels)
    x_train = np.asarray(x_train)

    x_val = []
    y_val = np.array([])
    for p in p_val:
        x_val.extend(y for y in p.ct.data.values())
        y_val = np.append(y_val, p.labels)
    x_val = np.asarray(x_val)

    print(x_train.shape)
    print((i.shape, i.dtype) for i in model.inputs)

    # Train the model
    model.fit(
        x=x_train,
        y=y_train,
        validation_data=(x_val, y_val),
        batch_size=10,
        epochs=20,
        callbacks=[MyCustomCallback()],
        verbose=1
    )
    print("Model architecture built")
    print('point c passed')
    # Evaluate the model
    predictions = (model.predict(x_val) > 0.5).astype("int32")
    report = classification_report(y_val, predictions, output_dict=True)
    print(classification_report(y_val, predictions))
    
    # Save fold results
    fold_results.append(report)
    K.clear_session()


# Aggregate results
avg_accuracy = np.mean([fold['accuracy'] for fold in fold_results])
print(f"\nAverage Accuracy Across {n_splits} Folds: {avg_accuracy:.4f}")


Training fold 1/3
point a passed
point b passed
(235, 512, 512)
<generator object <genexpr> at 0x0000024B52FF6B50>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 1s/step - accuracy: 0.7300 - loss: 33.7756 - val_accuracy: 0.8585 - val_loss: 5.2310
Epoch 2/20
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 1s/step - accuracy: 0.8420 - loss: 8.8725 - val_accuracy: 0.8255 - val_loss: 1.6352
Epoch 3/20
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 1s/step - accuracy: 0.8895 - loss: 2.8090 - val_accuracy: 0.6274 - val_loss: 6.7471
Epoch 4/20
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 1s/step - accuracy: 0.9739 - loss: 0.2042 - val_accuracy: 0.8774 - val_loss: 0.5997
Epoch 5/20
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 1s/step - accuracy: 0.9625 - loss: 0.1457 - val_accuracy: 0.6981 - val_loss: 2.4338
Epoch 6/20
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 1s/step - accuracy: 0.9868 - loss: 0.0373 - val_accuracy: 0.6745 - val_loss: 3.5133
Epoch 7/20
[1m24/24[0m [32m━━━━━━━━

KeyboardInterrupt: 

In [None]:
model.summary()

NameError: name 'model' is not defined

In [None]:
from tensorflow.keras.models import Model
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
def getgradcam_heatmap_fixed(model, img_array, last_conv_layer):
    grad_model = Model(inputs=model.input, outputs=[last_conv_layer.output, model.output])
    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(img_array)
        pred_index = tf.argmax(predictions[0])
        class_channel = predictions[:, pred_index]
    grads = tape.gradient(class_channel, conv_outputs)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
    conv_outputs = conv_outputs[0]
    heatmap = tf.reduce_sum(conv_outputs * pooled_grads, axis=-1)
    heatmap = tf.maximum(heatmap, 0) / tf.reduce_max(heatmap)
    return heatmap.numpy()

def compute_saliency_map_fixed(model, img_array):
    with tf.GradientTape() as tape:
        tape.watch(img_array)
        predictions = model(img_array)
        pred_index = tf.argmax(predictions[0])
        loss = predictions[:, pred_index]
    grads = tape.gradient(loss, img_array)
    saliency = tf.reduce_max(tf.abs(grads), axis=-1).numpy()
    return saliency[0]

test_image_array = x_c  # Replace 'processed_image' with your actual preprocessed variable

if isinstance(test_image_array, list):
    test_image_array = np.stack(test_image_array)  # Convert list of tensors to a single 4D tensor
if len(test_image_array.shape) == 3:  # If missing batch dimension
    test_image_array = np.expand_dims(test_image_array, axis=0)
if len(test_image_array.shape) == 2:  # If missing channel dimension
    test_image_array = np.expand_dims(test_image_array, axis=-1)

test_image_array = tf.convert_to_tensor(test_image_array, dtype=tf.float32)

last_conv_layer_name = None
for layer in reversed(model.layers):
    if 'conv' in layer.name:
        last_conv_layer_name = layer.name
        break

if last_conv_layer_name is None:
    raise ValueError("No convolutional layer found in the model.")

last_conv_layer = model.get_layer(last_conv_layer_name)

heatmap = get_gradcam_heatmap_fixed(model, test_image_array, last_conv_layer)
plt.imshow(heatmap, cmap="jet")
plt.axis("off")
plt.title("Grad-CAM Heatmap")
plt.show()

saliency = compute_saliency_map_fixed(model, test_image_array)
plt.imshow(saliency, cmap="viridis")
plt.axis("off")
plt.title("Saliency Map")
plt.show()

2024-12-02 05:56:22.154797: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-12-02 05:56:22.781406: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1733118983.010492   96129 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1733118983.073945   96129 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-02 05:56:23.668822: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

NameError: name 'x_c' is not defined

In [None]:
print(len(y_test))
for i, j in enumerate(y_test):
    k = predictions[i][0]
    if j != k:
        print(j, k)

Train and test CNN model

In [None]:

# num_tests = 1
# cnns = []
# for i in range(num_tests):
# cnns.append(cnn.CNN(x_train, x_test, y_train, y_test))

Cross validation and bootstrapping

In [None]:
# print(cnns[0].test_acc)