In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
import keras
import cv2
import pickle
import sklearn
import random

from sklearn.metrics import auc, f1_score, roc_curve, recall_score, precision_score, accuracy_score, confusion_matrix
from sklearn import metrics
from google.colab import files
from keras.preprocessing import image
# from keras.layers.serialization import activation
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.applications import *
from keras.layers import Dense, GlobalAveragePooling2D
from keras.models import Model
from keras.models import load_model
from keras import backend as K
from PIL import Image
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import load_img

In [None]:
base_dir = '/content/drive/MyDrive/TIFF PET-CTWholeDataset'

training_set_dir = os.path.join(base_dir, 'trainingSet')
test_set_dir = os.path.join(base_dir, 'testSet')

train_lungCancer_dir = os.path.join(training_set_dir, 'lungCancer')
train_normal_dir = os.path.join(training_set_dir, 'normal')
test_lungCancer_dir = os.path.join(test_set_dir, 'lungCancer')
test_normal_dir = os.path.join(test_set_dir, 'normal')

In [None]:
print('total training lungCancer images:', len(os.listdir(train_lungCancer_dir)))
print('total training normal images:', len(os.listdir(train_normal_dir)))
print('total test lungCancer images:', len(os.listdir(test_lungCancer_dir)))
print('total test normal images:', len(os.listdir(test_normal_dir)))

total training lungCancer images: 719
total training normal images: 726
total test lungCancer images: 199
total test normal images: 207


In [None]:
def check_data_leakage(directory1, directory2):
    filenames_dir1 = set(os.listdir(directory1))
    filenames_dir2 = set(os.listdir(directory2))

    common_filenames = filenames_dir1.intersection(filenames_dir2)

    if len(common_filenames) > 0:
        print("Data leakage detected!")
        print("Common image filenames between the directories:", common_filenames)
    else:
        print("No data leakage detected.")

In [None]:
check_data_leakage(train_lungCancer_dir, test_lungCancer_dir)
check_data_leakage(train_lungCancer_dir, test_normal_dir)
check_data_leakage(train_normal_dir, test_normal_dir)
check_data_leakage(train_normal_dir, test_lungCancer_dir)

No data leakage detected.
No data leakage detected.
No data leakage detected.
No data leakage detected.


In [None]:
BATCH_SIZE= 80
image_height, image_width = 245, 457

train_datagen = ImageDataGenerator(rescale=1.0/255.0,
                                  rotation_range= 15,
                                  width_shift_range=0.1,
                                  height_shift_range=0.1,
                                  zoom_range=0.1,
                                  fill_mode='nearest')

train_generator = train_datagen.flow_from_directory(
        training_set_dir,
        target_size=(image_height, image_width),
        batch_size=BATCH_SIZE,
        class_mode='binary',
        shuffle = True)


test_datagen = ImageDataGenerator(rescale=1.0/255.0)

test_generator = test_datagen.flow_from_directory(
        test_set_dir,
        target_size=(image_height, image_width),
        batch_size=10,
        class_mode='binary')

Found 1445 images belonging to 2 classes.
Found 406 images belonging to 2 classes.


In [None]:
pre_trained_model = ResNet152V2(include_top=False,
                                        weights= 'imagenet',
                                        input_shape = (image_height, image_width, 3))

for layer in pre_trained_model.layers:
    layer.trainable = False

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet152v2_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
pre_trained_model.summary()

Model: "resnet152v2"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 245, 457, 3)]        0         []                            
                                                                                                  
 conv1_pad (ZeroPadding2D)   (None, 251, 463, 3)          0         ['input_1[0][0]']             
                                                                                                  
 conv1_conv (Conv2D)         (None, 123, 229, 64)         9472      ['conv1_pad[0][0]']           
                                                                                                  
 pool1_pad (ZeroPadding2D)   (None, 125, 231, 64)         0         ['conv1_conv[0][0]']          
                                                                                        

In [None]:
last_layer = pre_trained_model.get_layer('post_relu')
print('last layer output shape: ', last_layer.output_shape)
last_output = last_layer.output

x = layers.Flatten()(last_output)
x = layers.Dense(1024, activation='relu')(x)
x = layers.Dense(512, activation='relu')(x)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dense(128, activation='relu')(x)
x = layers.Dense(64, activation='relu')(x)
x = layers.Dense(32, activation='relu')(x)
x = layers.Dense(1)(x)
x = layers.Activation(tf.nn.sigmoid)(x)

model = Model(pre_trained_model.input, x)

last layer output shape:  (None, 8, 15, 2048)


In [None]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 245, 457, 3)]        0         []                            
                                                                                                  
 conv1_pad (ZeroPadding2D)   (None, 251, 463, 3)          0         ['input_1[0][0]']             
                                                                                                  
 conv1_conv (Conv2D)         (None, 123, 229, 64)         9472      ['conv1_pad[0][0]']           
                                                                                                  
 pool1_pad (ZeroPadding2D)   (None, 125, 231, 64)         0         ['conv1_conv[0][0]']          
                                                                                              

In [None]:
savingPath = os.makedirs('/content/sample_data/temporaryWeights')
checkpoint_path = '/content/sample_data/temporaryWeights'
checkpoint_dir = os.path.dirname(checkpoint_path)
print(checkpoint_path)
cp_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_path, save_weights_only=True, save_best_only=True, verbose= 1)

/content/sample_data/temporaryWeights


In [None]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate= 0.001), loss= 'binary_crossentropy',
              metrics= [tf.keras.metrics.BinaryAccuracy(name="Binary_accuracy"),
                        tf.keras.metrics.Precision(name='Precision'),
                        tf.keras.metrics.Recall(name='Recall'),
                        tf.keras.metrics.TruePositives(name='TP'),
                        tf.keras.metrics.TrueNegatives(name='TN'),
                        tf.keras.metrics.FalseNegatives(name='FN'),
                        tf.keras.metrics.FalsePositives(name='FP'),
                        tf.keras.metrics.AUC(name='AUC')])

In [None]:
history = model.fit(
            train_generator,
            validation_data = test_generator,
            epochs = 35,
            verbose = 1,
            callbacks = [cp_callback])

Epoch 1/35
Epoch 1: val_loss improved from inf to 1.39253, saving model to /content/sample_data/temporaryWeights
Epoch 2/35
Epoch 2: val_loss improved from 1.39253 to 1.19484, saving model to /content/sample_data/temporaryWeights
Epoch 3/35
Epoch 3: val_loss did not improve from 1.19484
Epoch 4/35
Epoch 4: val_loss improved from 1.19484 to 1.13766, saving model to /content/sample_data/temporaryWeights
Epoch 5/35
Epoch 5: val_loss improved from 1.13766 to 0.47966, saving model to /content/sample_data/temporaryWeights
Epoch 6/35
Epoch 6: val_loss did not improve from 0.47966
Epoch 7/35
Epoch 7: val_loss did not improve from 0.47966
Epoch 8/35
Epoch 8: val_loss did not improve from 0.47966
Epoch 9/35
Epoch 9: val_loss did not improve from 0.47966
Epoch 10/35
Epoch 10: val_loss did not improve from 0.47966
Epoch 11/35
Epoch 11: val_loss improved from 0.47966 to 0.47219, saving model to /content/sample_data/temporaryWeights
Epoch 12/35
Epoch 12: val_loss did not improve from 0.47219
Epoch 1

In [None]:
model2 = Model(pre_trained_model.input, x)

In [None]:
model2.compile(optimizer=tf.keras.optimizers.Adam(learning_rate= 0.001), loss= 'binary_crossentropy', metrics= [tf.keras.metrics.BinaryAccuracy(name="Binary_accuracy"),
                        tf.keras.metrics.Precision(name='Precision'),
                        tf.keras.metrics.Recall(name='Recall'),
                        tf.keras.metrics.TruePositives(name='TP'),
                        tf.keras.metrics.TrueNegatives(name='TN'),
                        tf.keras.metrics.FalseNegatives(name='FN'),
                        tf.keras.metrics.FalsePositives(name='FP'),
                        tf.keras.metrics.AUC(name='AUC')])

In [None]:
model2.load_weights(checkpoint_path)

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7f764c591c00>

In [None]:
model2.evaluate(test_generator)



[0.4721914827823639,
 0.9187192320823669,
 0.8990825414657593,
 0.9468598961830139,
 196.0,
 177.0,
 11.0,
 22.0,
 0.9335324168205261]

In [None]:
base_dir = '/content/drive/MyDrive/TIFF PET-CTWholeDataset'

training_set_dir = os.path.join(base_dir, 'trainingSet')
test_set_dir = os.path.join(base_dir, 'testSet')

train_lungCancer_dir = os.path.join(training_set_dir, 'lungCancer')
train_normal_dir = os.path.join(training_set_dir, 'normal')
test_lungCancer_dir = os.path.join(test_set_dir, 'lungCancer')
test_normal_dir = os.path.join(test_set_dir, 'normal')

print(test_normal_dir)
print(test_lungCancer_dir)

/content/drive/MyDrive/TIFF PET-CTWholeDataset/testSet/normal
/content/drive/MyDrive/TIFF PET-CTWholeDataset/testSet/lungCancer


In [None]:
fileNames2 = os.listdir(test_lungCancer_dir)
filePaths2 = []

for i in fileNames2:
    path = '/content/drive/MyDrive/TIFF PET-CTWholeDataset/testSet/lungCancer/'
    new_path = path + i
    filePaths2.append(new_path)


my_y_pred2 = []


for fn in filePaths2:
    img = tf.keras.utils.load_img(fn, target_size=(245, 457))
    x = tf.keras.utils.img_to_array(img)
    x /= 255
    x = np.expand_dims(x, axis=0)
    images = np.vstack([x])
    classes = model2.predict(images, batch_size=10)
    my_y_pred2.append(classes)
    print(classes[0])
    if classes[0]>0.5:
        print(fn + " lunCancer does not exist in this slice")
    else:
        print(fn + " lungCancer exists in this slice")


y_pred2 = np.array(my_y_pred2)
y_pred2.resize(1,len(my_y_pred2))
y_pred2

In [None]:
base_dir3 = '/content/drive/MyDrive/2ExternalDataSet'

In [None]:
fileNames2 = os.listdir(base_dir3)
filePaths2 = []

for i in fileNames2:
    path = '/content/drive/MyDrive/2ExternalDataSet/'
    new_path = path + i
    filePaths2.append(new_path)


my_y_pred2 = []


for fn in filePaths2:
    img = tf.keras.utils.load_img(fn, target_size=(245, 457))
    x = tf.keras.utils.img_to_array(img)
    x /= 255
    x = np.expand_dims(x, axis=0)
    images = np.vstack([x])
    classes = model2.predict(images, batch_size=10)
    my_y_pred2.append(classes)
    print(classes[0])
    if classes[0]>0.5:
        print(fn + " lunCancer does not exist in this slice")
    else:
        print(fn + " lungCancer exists in this slice")


y_pred2 = np.array(my_y_pred2)
y_pred2.resize(1,len(my_y_pred2))
y_pred2

[0.04052657]
/content/drive/MyDrive/2ExternalDataSet/196 (100).tif lungCancer exists in this slice
[0.00159256]
/content/drive/MyDrive/2ExternalDataSet/196 (101).tif lungCancer exists in this slice
[0.00322425]
/content/drive/MyDrive/2ExternalDataSet/196 (104).tif lungCancer exists in this slice
[0.01300463]
/content/drive/MyDrive/2ExternalDataSet/196 (103).tif lungCancer exists in this slice
[0.00633582]
/content/drive/MyDrive/2ExternalDataSet/196 (102).tif lungCancer exists in this slice
[0.09652135]
/content/drive/MyDrive/2ExternalDataSet/196 (106).tif lungCancer exists in this slice
[0.09939609]
/content/drive/MyDrive/2ExternalDataSet/196 (105).tif lungCancer exists in this slice
[0.00010468]
/content/drive/MyDrive/2ExternalDataSet/196 (109).tif lungCancer exists in this slice
[0.00282757]
/content/drive/MyDrive/2ExternalDataSet/196 (108).tif lungCancer exists in this slice
[0.00456254]
/content/drive/MyDrive/2ExternalDataSet/196 (107).tif lungCancer exists in this slice
[0.0001129

array([[4.05265726e-02, 1.59255671e-03, 3.22425249e-03, 1.30046289e-02,
        6.33581635e-03, 9.65213478e-02, 9.93960947e-02, 1.04681545e-04,
        2.82757473e-03, 4.56254324e-03, 1.12946334e-04, 4.28677682e-04,
        1.50358141e-03, 6.68110268e-04, 5.38783241e-03, 3.04045994e-02,
        1.59873650e-03, 5.31523442e-03, 1.49488274e-03, 7.63222110e-04,
        6.09273076e-01, 1.24351047e-01, 4.96840745e-01, 8.49720478e-01,
        7.41017520e-01, 7.37237453e-01, 8.43377650e-01, 9.75990713e-01,
        9.89908218e-01, 9.52386975e-01, 9.79955733e-01, 6.38989747e-01,
        5.53026080e-01, 7.15573668e-01, 7.56207779e-02, 8.09505045e-01,
        9.50885177e-01, 2.40972176e-01, 6.20888352e-01, 7.09472746e-02,
        1.78597923e-02, 5.41133918e-02, 6.25076741e-02, 1.68203725e-04,
        9.69434902e-03, 1.12957328e-04, 1.13141454e-04, 2.87262141e-04,
        6.19700411e-04, 2.14220418e-04, 6.53458192e-05, 4.86416575e-05,
        2.70710825e-05, 6.36508688e-04, 1.33481080e-04, 6.545292

In [None]:
binary_predictions = (y_pred2.flatten() > 0.5).astype(int)
true_labels = np.zeros_like(binary_predictions)
accuracy = accuracy_score(true_labels, binary_predictions)
print(f"Accuracy: {accuracy:.4f}")
sensitivity = recall_score(true_labels, binary_predictions, pos_label=0)
print(f"Sensitivity (Recall): {sensitivity:.4f}")

Accuracy: 0.7743
Sensitivity (Recall): 0.7743
