In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
import keras
import cv2
import pickle
import sklearn
import random

from sklearn.metrics import auc, f1_score, roc_curve, recall_score, precision_score, accuracy_score, confusion_matrix
from sklearn import metrics
from google.colab import files
from keras.preprocessing import image
# from keras.layers.serialization import activation
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.applications import *
from keras.layers import Dense, GlobalAveragePooling2D
from keras.models import Model
from keras.models import load_model
from keras import backend as K
from PIL import Image
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import load_img

In [None]:
base_dir = '/content/drive/MyDrive/TIFF PET-CTWholeDataset'

training_set_dir = os.path.join(base_dir, 'trainingSet')
test_set_dir = os.path.join(base_dir, 'testSet')

train_lungCancer_dir = os.path.join(training_set_dir, 'lungCancer')
train_normal_dir = os.path.join(training_set_dir, 'normal')
test_lungCancer_dir = os.path.join(test_set_dir, 'lungCancer')
test_normal_dir = os.path.join(test_set_dir, 'normal')

In [None]:
print('total training lungCancer images:', len(os.listdir(train_lungCancer_dir)))
print('total training normal images:', len(os.listdir(train_normal_dir)))
print('total test lungCancer images:', len(os.listdir(test_lungCancer_dir)))
print('total test normal images:', len(os.listdir(test_normal_dir)))

total training lungCancer images: 719
total training normal images: 726
total test lungCancer images: 199
total test normal images: 207


In [None]:
def check_data_leakage(directory1, directory2):
    filenames_dir1 = set(os.listdir(directory1))
    filenames_dir2 = set(os.listdir(directory2))

    common_filenames = filenames_dir1.intersection(filenames_dir2)

    if len(common_filenames) > 0:
        print("Data leakage detected!")
        print("Common image filenames between the directories:", common_filenames)
    else:
        print("No data leakage detected.")

In [None]:
check_data_leakage(train_lungCancer_dir, test_lungCancer_dir)
check_data_leakage(train_lungCancer_dir, test_normal_dir)
check_data_leakage(train_normal_dir, test_normal_dir)
check_data_leakage(train_normal_dir, test_lungCancer_dir)

No data leakage detected.
No data leakage detected.
No data leakage detected.
No data leakage detected.


In [None]:
BATCH_SIZE= 80
image_height, image_width = 245, 457

train_datagen = ImageDataGenerator(rescale=1.0/255.0,
                                  rotation_range= 15,
                                  width_shift_range=0.1,
                                  height_shift_range=0.1,
                                  zoom_range=0.1,
                                  fill_mode='nearest')

train_generator = train_datagen.flow_from_directory(
        training_set_dir,
        target_size=(image_height, image_width),
        batch_size=BATCH_SIZE,
        class_mode='binary',
        shuffle = True)


test_datagen = ImageDataGenerator(rescale=1.0/255.0)

test_generator = test_datagen.flow_from_directory(
        test_set_dir,
        target_size=(image_height, image_width),
        batch_size=10,
        class_mode='binary')

Found 1445 images belonging to 2 classes.
Found 406 images belonging to 2 classes.


In [None]:
pre_trained_model = Xception(include_top=False,
                                        weights= 'imagenet',
                                        input_shape = (image_height, image_width, 3))

for layer in pre_trained_model.layers[0:-8]:
    layer.trainable = False

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
pre_trained_model.summary()

Model: "xception"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 245, 457, 3)]        0         []                            
                                                                                                  
 block1_conv1 (Conv2D)       (None, 122, 228, 32)         864       ['input_1[0][0]']             
                                                                                                  
 block1_conv1_bn (BatchNorm  (None, 122, 228, 32)         128       ['block1_conv1[0][0]']        
 alization)                                                                                       
                                                                                                  
 block1_conv1_act (Activati  (None, 122, 228, 32)         0         ['block1_conv1_bn[0][0]

In [None]:
last_layer = pre_trained_model.get_layer('block14_sepconv2_act')
print('last layer output shape: ', last_layer.output_shape)
last_output = last_layer.output

x = layers.Flatten()(last_output)
x = layers.Dense(1024, activation='relu')(x)
x = layers.Dense(512, activation='relu')(x)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dense(128, activation='relu')(x)
x = layers.Dense(64, activation='relu')(x)
x = layers.Dense(32, activation='relu')(x)
x = layers.Dense(1)(x)
x = layers.Activation(tf.nn.sigmoid)(x)

model = Model(pre_trained_model.input, x)

last layer output shape:  (None, 8, 15, 2048)


In [None]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 245, 457, 3)]        0         []                            
                                                                                                  
 block1_conv1 (Conv2D)       (None, 122, 228, 32)         864       ['input_1[0][0]']             
                                                                                                  
 block1_conv1_bn (BatchNorm  (None, 122, 228, 32)         128       ['block1_conv1[0][0]']        
 alization)                                                                                       
                                                                                                  
 block1_conv1_act (Activati  (None, 122, 228, 32)         0         ['block1_conv1_bn[0][0]'] 

In [None]:
savingPath = os.makedirs('/content/sample_data/temporaryWeights')
checkpoint_path = '/content/sample_data/temporaryWeights'
checkpoint_dir = os.path.dirname(checkpoint_path)
print(checkpoint_path)
cp_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_path, save_weights_only=True, save_best_only=True, verbose= 1)

/content/sample_data/temporaryWeights


In [None]:
initial_learning_rate = 0.001
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate, decay_steps=1000, decay_rate=0.4, staircase=True)

model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate= lr_schedule), loss= 'binary_crossentropy',
              metrics= [tf.keras.metrics.BinaryAccuracy(name="Binary_accuracy"),
                        tf.keras.metrics.Precision(name='Precision'),
                        tf.keras.metrics.Recall(name='Recall'),
                        tf.keras.metrics.TruePositives(name='TP'),
                        tf.keras.metrics.TrueNegatives(name='TN'),
                        tf.keras.metrics.FalseNegatives(name='FN'),
                        tf.keras.metrics.FalsePositives(name='FP'),
                        tf.keras.metrics.AUC(name='AUC')])

In [None]:
history = model.fit(
            train_generator,
            validation_data = test_generator,
            epochs = 40,
            verbose = 1,
            callbacks = [cp_callback])

In [None]:
model2 = Model(pre_trained_model.input, x)

In [None]:
model2.compile(optimizer=tf.keras.optimizers.SGD(learning_rate= lr_schedule), loss= 'binary_crossentropy',
               metrics= [tf.keras.metrics.BinaryAccuracy(name="Binary_accuracy"),
                        tf.keras.metrics.Precision(name='Precision'),
                        tf.keras.metrics.Recall(name='Recall'),
                        tf.keras.metrics.TruePositives(name='TP'),
                        tf.keras.metrics.TrueNegatives(name='TN'),
                        tf.keras.metrics.FalseNegatives(name='FN'),
                        tf.keras.metrics.FalsePositives(name='FP'),
                        tf.keras.metrics.AUC(name='AUC')])

In [None]:
model2.load_weights(checkpoint_path)

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7b5891872e00>

In [None]:
model2.evaluate(test_generator)



[0.37895143032073975,
 0.8768472671508789,
 0.8584474921226501,
 0.9082125425338745,
 188.0,
 168.0,
 19.0,
 31.0,
 0.9162964820861816]

In [None]:
base_dir = '/content/drive/MyDrive/TIFF PET-CTWholeDataset'

training_set_dir = os.path.join(base_dir, 'trainingSet')
test_set_dir = os.path.join(base_dir, 'testSet')

train_lungCancer_dir = os.path.join(training_set_dir, 'lungCancer')
train_normal_dir = os.path.join(training_set_dir, 'normal')
test_lungCancer_dir = os.path.join(test_set_dir, 'lungCancer')
test_normal_dir = os.path.join(test_set_dir, 'normal')

print(test_normal_dir)
print(test_lungCancer_dir)

/content/drive/MyDrive/TIFF PET-CTWholeDataset/testSet/normal
/content/drive/MyDrive/TIFF PET-CTWholeDataset/testSet/lungCancer


In [None]:
fileNames2 = os.listdir(test_lungCancer_dir)
filePaths2 = []

for i in fileNames2:
    path = '/content/drive/MyDrive/TIFF PET-CTWholeDataset/testSet/lungCancer/'
    new_path = path + i
    filePaths2.append(new_path)


my_y_pred2 = []


for fn in filePaths2:
    img = tf.keras.utils.load_img(fn, target_size=(245, 457))
    x = tf.keras.utils.img_to_array(img)
    x /= 255
    x = np.expand_dims(x, axis=0)

    images = np.vstack([x])
    classes = model2.predict(images, batch_size=10)
    my_y_pred2.append(classes)
    print(classes[0])
    if classes[0]>0.5:
        print(fn + " lunCancer does not exist in this slice")
    else:
        print(fn + " lungCancer exists in this slice")

[0.02196226]
/content/drive/MyDrive/TIFF PET-CTWholeDataset/testSet/lungCancer/0000156 (10).tif lungCancer exists in this slice
[0.01155093]
/content/drive/MyDrive/TIFF PET-CTWholeDataset/testSet/lungCancer/0000156 (1).tif lungCancer exists in this slice
[0.0135491]
/content/drive/MyDrive/TIFF PET-CTWholeDataset/testSet/lungCancer/0000156 (11).tif lungCancer exists in this slice
[0.00407713]
/content/drive/MyDrive/TIFF PET-CTWholeDataset/testSet/lungCancer/0000156 (13).tif lungCancer exists in this slice
[0.00727973]
/content/drive/MyDrive/TIFF PET-CTWholeDataset/testSet/lungCancer/0000156 (12).tif lungCancer exists in this slice
[0.03183099]
/content/drive/MyDrive/TIFF PET-CTWholeDataset/testSet/lungCancer/0000156 (15).tif lungCancer exists in this slice
[0.00494994]
/content/drive/MyDrive/TIFF PET-CTWholeDataset/testSet/lungCancer/0000156 (14).tif lungCancer exists in this slice
[0.01142132]
/content/drive/MyDrive/TIFF PET-CTWholeDataset/testSet/lungCancer/0000156 (17).tif lungCancer

In [None]:
base_dir3 = '/content/drive/MyDrive/2ExternalDataSet'

In [None]:
fileNames2 = os.listdir(base_dir3)
filePaths2 = []

for i in fileNames2:
    path = '/content/drive/MyDrive/2ExternalDataSet/'
    new_path = path + i
    filePaths2.append(new_path)


my_y_pred2 = []


for fn in filePaths2:
    img = tf.keras.utils.load_img(fn, target_size=(245, 457))
    x = tf.keras.utils.img_to_array(img)
    x /= 255
    x = np.expand_dims(x, axis=0)
    images = np.vstack([x])
    classes = model2.predict(images, batch_size=10)
    my_y_pred2.append(classes)
    print(classes[0])
    if classes[0]>0.5:
        print(fn + " lunCancer does not exist in this slice")
    else:
        print(fn + " lungCancer exists in this slice")


y_pred2 = np.array(my_y_pred2)
y_pred2.resize(1,len(my_y_pred2))
y_pred2

[0.44421592]
/content/drive/MyDrive/2ExternalDataSet/196 (100).tif lungCancer exists in this slice
[0.47779244]
/content/drive/MyDrive/2ExternalDataSet/196 (101).tif lungCancer exists in this slice
[0.45777944]
/content/drive/MyDrive/2ExternalDataSet/196 (104).tif lungCancer exists in this slice
[0.41165105]
/content/drive/MyDrive/2ExternalDataSet/196 (103).tif lungCancer exists in this slice
[0.4428756]
/content/drive/MyDrive/2ExternalDataSet/196 (102).tif lungCancer exists in this slice
[0.37666726]
/content/drive/MyDrive/2ExternalDataSet/196 (106).tif lungCancer exists in this slice
[0.43852094]
/content/drive/MyDrive/2ExternalDataSet/196 (105).tif lungCancer exists in this slice
[0.35172588]
/content/drive/MyDrive/2ExternalDataSet/196 (109).tif lungCancer exists in this slice
[0.30225614]
/content/drive/MyDrive/2ExternalDataSet/196 (108).tif lungCancer exists in this slice
[0.33770865]
/content/drive/MyDrive/2ExternalDataSet/196 (107).tif lungCancer exists in this slice
[0.34046105

array([[0.44421592, 0.47779244, 0.45777944, 0.41165105, 0.4428756 ,
        0.37666726, 0.43852094, 0.35172588, 0.30225614, 0.33770865,
        0.34046105, 0.34637964, 0.48510477, 0.38852137, 0.4418751 ,
        0.4738678 , 0.43728527, 0.36408654, 0.4235511 , 0.40455678,
        0.39242986, 0.3781026 , 0.35139516, 0.4651018 , 0.41740426,
        0.43605736, 0.41463286, 0.3099226 , 0.4826206 , 0.3555751 ,
        0.4349237 , 0.49638397, 0.5045037 , 0.5014538 , 0.55574703,
        0.35770413, 0.37401575, 0.509365  , 0.45401442, 0.4730378 ,
        0.43812206, 0.4113301 , 0.42396632, 0.31513813, 0.42089322,
        0.27501047, 0.34489524, 0.32675907, 0.40810072, 0.40055728,
        0.38366494, 0.3496055 , 0.2982611 , 0.2778204 , 0.3406471 ,
        0.23383099, 0.19946365, 0.19492635, 0.27266687, 0.2984964 ,
        0.21105565, 0.21474048, 0.24613746, 0.38899225, 0.44430792,
        0.4733192 , 0.43242764, 0.48714012, 0.5855313 , 0.41937405,
        0.6207441 , 0.4220263 , 0.45505288, 0.43

In [None]:
binary_predictions = (y_pred2.flatten() > 0.5).astype(int)
true_labels = np.zeros_like(binary_predictions)
accuracy = accuracy_score(true_labels, binary_predictions)
print(f"Accuracy: {accuracy:.4f}")
sensitivity = recall_score(true_labels, binary_predictions, pos_label=0)  # Since 0 is the positive class (lung cancer)
print(f"Sensitivity (Recall): {sensitivity:.4f}")

Accuracy: 0.7683
Sensitivity (Recall): 0.7683
