In [1]:
!pip install pydicom

import tensorflow as tf
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
import keras
import cv2
import pickle
import sklearn
import pydicom
import random

from sklearn.metrics import auc, f1_score, roc_curve, recall_score, precision_score, accuracy_score, confusion_matrix
from sklearn import metrics
from google.colab import files
from keras.preprocessing import image
from keras.layers.serialization import activation
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import *
from keras.layers import Dense, GlobalAveragePooling2D, Flatten
from keras.models import Model
from keras import backend as K
from tensorflow.keras import layers, Model
from keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array, array_to_img

Collecting pydicom
  Downloading pydicom-2.4.3-py3-none-any.whl (1.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydicom
Successfully installed pydicom-2.4.3


#**Data directory**

In [2]:
base_dir = '/content/drive/MyDrive/PET-CTDICOMWholeDataset'

training_set_dir = os.path.join(base_dir, 'trainingSet')
test_set_dir = os.path.join(base_dir, 'testSet')

train_lungCancer_dir = os.path.join(training_set_dir, 'lungCancer')
train_normal_dir = os.path.join(training_set_dir, 'normal')
test_lungCancer_dir = os.path.join(test_set_dir, 'lungCancer')
test_normal_dir = os.path.join(test_set_dir, 'normal')

In [3]:
print('total training lungCancer images:', len(os.listdir(train_lungCancer_dir)))
print('total training normal images:', len(os.listdir(train_normal_dir)))
print('total test lungCancer images:', len(os.listdir(test_lungCancer_dir)))
print('total test normal images:', len(os.listdir(test_normal_dir)))

total training lungCancer images: 719
total training normal images: 726
total test lungCancer images: 199
total test normal images: 207


#**Checking patient overlap**

In [4]:
def check_data_leakage(directory1, directory2):
    filenames_dir1 = set(os.listdir(directory1))
    filenames_dir2 = set(os.listdir(directory2))

    common_filenames = filenames_dir1.intersection(filenames_dir2)

    if len(common_filenames) > 0:
        print("Data leakage detected!")
        print("Common image filenames between the directories:", common_filenames)
    else:
        print("No data leakage detected.")

In [5]:
check_data_leakage(train_lungCancer_dir, test_lungCancer_dir)
check_data_leakage(train_lungCancer_dir, test_normal_dir)
check_data_leakage(train_normal_dir, test_normal_dir)
check_data_leakage(train_normal_dir, test_lungCancer_dir)

No data leakage detected.
No data leakage detected.
No data leakage detected.
No data leakage detected.


##**Data generation**

In [6]:
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255.0,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    fill_mode='nearest'
)

def load_and_preprocess_dicom(dcm_path):
    dcm_data = pydicom.dcmread(dcm_path)
    image_data = dcm_data.pixel_array
    cropped_image_data = image_data[66:311, 100:557,:]
    return cropped_image_data

In [7]:
train_images = np.array([load_and_preprocess_dicom(os.path.join(train_lungCancer_dir, filename)) for filename in os.listdir(train_lungCancer_dir) if filename.endswith('.dcm')])
train_labels = np.array([1] * len(train_images))

normal_train_images = np.array([load_and_preprocess_dicom(os.path.join(train_normal_dir, filename)) for filename in os.listdir(train_normal_dir) if filename.endswith('.dcm')])
normal_train_labels = np.array([0] * len(normal_train_images))

train_images = np.concatenate((train_images, normal_train_images))
train_labels = np.concatenate((train_labels, normal_train_labels))

train_generator = train_datagen.flow(train_images, train_labels,
        batch_size=80,
        shuffle = True)

test_images = np.array([load_and_preprocess_dicom(os.path.join(test_lungCancer_dir, filename)) for filename in os.listdir(test_lungCancer_dir) if filename.endswith('.dcm')])
test_labels = np.array([1] * len(test_images))

normal_test_images = np.array([load_and_preprocess_dicom(os.path.join(test_normal_dir, filename)) for filename in os.listdir(test_normal_dir) if filename.endswith('.dcm')])
normal_test_labels = np.array([0] * len(normal_test_images))

test_images = np.concatenate((test_images, normal_test_images))
test_labels = np.concatenate((test_labels, normal_test_labels))

#**Defining InceptionV3 model**

In [8]:
pre_trained_model1 = InceptionV3(include_top=False,
                                        weights= 'imagenet',
                                        input_shape = (245, 457, 3))

for layer in pre_trained_model1.layers:
  layer.trainable = False

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5


In [9]:
pre_trained_model1.summary()

Model: "inception_v3"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 245, 457, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d (Conv2D)                (None, 122, 228, 32  864         ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 batch_normalization (BatchNorm  (None, 122, 228, 32  96         ['conv2d[0][0]']                 
 alization)                     )                                                      

In [10]:
last_layer1 = pre_trained_model1.get_layer('mixed10')
print('last layer output shape: ', last_layer1.output_shape)
last_output1 = last_layer1.output

x = layers.Flatten()(last_output1)
x = layers.Dense(2048, activation='relu')(x)
x = layers.Dense(1024, activation='relu')(x)
x = layers.Dense(512, activation='relu')(x)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dense(128, activation='relu')(x)
x = layers.Dense(32, activation='relu')(x)
x = layers.Dense(1)(x)
x = layers.Activation(tf.nn.sigmoid)(x)

model1 = Model(pre_trained_model1.input, x)

last layer output shape:  (None, 6, 12, 2048)


In [11]:
# model.summary()

In [12]:
savingPath = os.makedirs('/content/sample_data/temporaryWeights')
checkpoint_path = '/content/sample_data/temporaryWeights'
checkpoint_dir = os.path.dirname(checkpoint_path)
print(checkpoint_path)
cp_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_path, save_weights_only=True, save_best_only=True, verbose= 1)

/content/sample_data/temporaryWeights


In [13]:
model1.compile(optimizer=tf.keras.optimizers.Adam(learning_rate= 0.001), loss= 'binary_crossentropy', metrics= [tf.keras.metrics.BinaryAccuracy(name="Binary_accuracy"),
                        tf.keras.metrics.Precision(name='Precision'),
                        tf.keras.metrics.Recall(name='Recall'),
                        tf.keras.metrics.TruePositives(name='TP'),
                        tf.keras.metrics.TrueNegatives(name='TN'),
                        tf.keras.metrics.FalseNegatives(name='FN'),
                        tf.keras.metrics.FalsePositives(name='FP'),
                        tf.keras.metrics.AUC(name='AUC')])

In [None]:
history = model1.fit(train_generator, epochs=20, validation_data=(test_images, test_labels),
            verbose = 1,
            callbacks = [cp_callback])

Epoch 1/20
Epoch 1: val_loss improved from inf to 26.45264, saving model to /content/sample_data/temporaryWeights
Epoch 2/20
Epoch 2: val_loss improved from 26.45264 to 13.68817, saving model to /content/sample_data/temporaryWeights
Epoch 3/20
Epoch 3: val_loss improved from 13.68817 to 2.32252, saving model to /content/sample_data/temporaryWeights
Epoch 4/20
Epoch 4: val_loss did not improve from 2.32252
Epoch 5/20
Epoch 5: val_loss did not improve from 2.32252
Epoch 6/20
Epoch 6: val_loss did not improve from 2.32252
Epoch 7/20
Epoch 7: val_loss improved from 2.32252 to 2.09770, saving model to /content/sample_data/temporaryWeights
