In [None]:
!pip install pydicom

import tensorflow as tf
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
import keras
import cv2
import pickle
import sklearn
import pydicom
import random

from sklearn.metrics import auc, f1_score, roc_curve, recall_score, precision_score, accuracy_score, confusion_matrix
from sklearn import metrics
from google.colab import files
from keras.preprocessing import image
from keras.layers.serialization import activation
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import *
from keras.layers import Dense, GlobalAveragePooling2D, Flatten
from keras.models import Model
from keras import backend as K
from tensorflow.keras import layers, Model
from keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array, array_to_img

#**Data directory**

In [None]:
base_dir = '/content/drive/MyDrive/PET-CTDICOMWholeDataset'

training_set_dir = os.path.join(base_dir, 'trainingSet')
test_set_dir = os.path.join(base_dir, 'testSet')

train_lungCancer_dir = os.path.join(training_set_dir, 'lungCancer')
train_normal_dir = os.path.join(training_set_dir, 'normal')
test_lungCancer_dir = os.path.join(test_set_dir, 'lungCancer')
test_normal_dir = os.path.join(test_set_dir, 'normal')

In [None]:
print('total training lungCancer images:', len(os.listdir(train_lungCancer_dir)))
print('total training normal images:', len(os.listdir(train_normal_dir)))
print('total test lungCancer images:', len(os.listdir(test_lungCancer_dir)))
print('total test normal images:', len(os.listdir(test_normal_dir)))

total training lungCancer images: 719
total training normal images: 726
total test lungCancer images: 199
total test normal images: 207


#**Checking patient overlap**

In [None]:
def check_data_leakage(directory1, directory2):
    filenames_dir1 = set(os.listdir(directory1))
    filenames_dir2 = set(os.listdir(directory2))

    common_filenames = filenames_dir1.intersection(filenames_dir2)

    if len(common_filenames) > 0:
        print("Data leakage detected!")
        print("Common image filenames between the directories:", common_filenames)
    else:
        print("No data leakage detected.")

In [None]:
check_data_leakage(train_lungCancer_dir, test_lungCancer_dir)
check_data_leakage(train_lungCancer_dir, test_normal_dir)
check_data_leakage(train_normal_dir, test_normal_dir)
check_data_leakage(train_normal_dir, test_lungCancer_dir)

##**Data generation**

In [None]:
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255.0,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    fill_mode='nearest'
)

def load_and_preprocess_dicom(dcm_path):
    dcm_data = pydicom.dcmread(dcm_path)
    image_data = dcm_data.pixel_array
    cropped_image_data = image_data[66:311, 100:557,:]
    return cropped_image_data

In [None]:
train_images = np.array([load_and_preprocess_dicom(os.path.join(train_lungCancer_dir, filename)) for filename in os.listdir(train_lungCancer_dir) if filename.endswith('.dcm')])
train_labels = np.array([1] * len(train_images))

normal_train_images = np.array([load_and_preprocess_dicom(os.path.join(train_normal_dir, filename)) for filename in os.listdir(train_normal_dir) if filename.endswith('.dcm')])
normal_train_labels = np.array([0] * len(normal_train_images))

train_images = np.concatenate((train_images, normal_train_images))
train_labels = np.concatenate((train_labels, normal_train_labels))

train_generator = train_datagen.flow(train_images, train_labels,
        batch_size=80,
        shuffle = True)

test_images = np.array([load_and_preprocess_dicom(os.path.join(test_lungCancer_dir, filename)) for filename in os.listdir(test_lungCancer_dir) if filename.endswith('.dcm')])
test_labels = np.array([1] * len(test_images))

normal_test_images = np.array([load_and_preprocess_dicom(os.path.join(test_normal_dir, filename)) for filename in os.listdir(test_normal_dir) if filename.endswith('.dcm')])
normal_test_labels = np.array([0] * len(normal_test_images))

test_images = np.concatenate((test_images, normal_test_images))
test_labels = np.concatenate((test_labels, normal_test_labels))

#**Defining Inception model**

In [None]:
pre_trained_model = InceptionV3(include_top=False,
                                        weights= 'imagenet',
                                        input_shape = (245, 457, 3))

for layer in pre_trained_model.layers:
  layer.trainable = False

In [None]:
pre_trained_model.summary()

In [None]:
last_layer = pre_trained_model.get_layer('mixed10')
print('last layer output shape: ', last_layer.output_shape)
last_output = last_layer.output

x = layers.Flatten()(last_output)
x = layers.Dense(2048, activation='relu')(x)
x = layers.Dense(1024, activation='relu')(x)
x = layers.Dense(512, activation='relu')(x)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dense(128, activation='relu')(x)
x = layers.Dense(32, activation='relu')(x)
x = layers.Dense(1)(x)
x = layers.Activation(tf.nn.sigmoid)(x)

model = Model(pre_trained_model.input, x)

In [None]:
model.summary()

In [None]:
savingPath = os.makedirs('/content/sample_data/temporaryWeights')
checkpoint_path = '/content/sample_data/temporaryWeights'
checkpoint_dir = os.path.dirname(checkpoint_path)
print(checkpoint_path)
cp_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_path, save_weights_only=True, save_best_only=True, verbose= 1)

In [None]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate= 0.001), loss= 'binary_crossentropy', metrics= [tf.keras.metrics.BinaryAccuracy(name="Binary_accuracy"),
                        tf.keras.metrics.Precision(name='Precision'),
                        tf.keras.metrics.Recall(name='Recall'),
                        tf.keras.metrics.TruePositives(name='TP'),
                        tf.keras.metrics.TrueNegatives(name='TN'),
                        tf.keras.metrics.FalseNegatives(name='FN'),
                        tf.keras.metrics.FalsePositives(name='FP'),
                        tf.keras.metrics.AUC(name='AUC')])

In [None]:
history = model.fit(train_generator, epochs=20, validation_data=(test_images, test_labels),
            verbose = 1,
            callbacks = [cp_callback])

#**Defining ResNet152V2 model**

In [None]:
pre_trained_model = ResNet152V2(include_top=False,
                                        weights= 'imagenet',
                                        input_shape = (245, 457, 3))

for layer in pre_trained_model.layers:
  layer.trainable = False

In [None]:
last_layer = pre_trained_model.get_layer('post_relu')
print('last layer output shape: ', last_layer.output_shape)
last_output = last_layer.output

x = layers.Flatten()(last_output)
x = layers.Dense(128, activation='relu')(x)
x = layers.Dense(64, activation='relu')(x)
x = layers.Dense(32, activation='relu')(x)
x = layers.Dense(1)(x)
x = layers.Activation(tf.nn.sigmoid)(x)

model = Model(pre_trained_model.input, x)

In [None]:
savingPath = os.makedirs('/content/sample_data/temporaryWeights')
checkpoint_path = '/content/sample_data/temporaryWeights'
checkpoint_dir = os.path.dirname(checkpoint_path)
print(checkpoint_path)
cp_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_path, save_weights_only=True, save_best_only=True, verbose= 1)

In [None]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate= 0.001), loss= 'binary_crossentropy', metrics= [tf.keras.metrics.BinaryAccuracy(name="Binary_accuracy"),
                        tf.keras.metrics.Precision(name='Precision'),
                        tf.keras.metrics.Recall(name='Recall'),
                        tf.keras.metrics.TruePositives(name='TP'),
                        tf.keras.metrics.TrueNegatives(name='TN'),
                        tf.keras.metrics.FalseNegatives(name='FN'),
                        tf.keras.metrics.FalsePositives(name='FP'),
                        tf.keras.metrics.AUC(name='AUC')])

In [None]:
history = model.fit(train_generator, validation_data=(test_images, test_labels),
            verbose = 1,
            epochs = 15, callbacks = [cp_callback])