# Importing Libraries and Modules

In [1]:
import os, random, shutil
import numpy as np
from imutils import paths
import tensorflow as tf
from keras.utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler
from keras import backend as K
import matplotlib
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix





# Setting up Dataset Paths

In [2]:
INPUT_DATASET = "datasets/original"
BASE_PATH = "datasets/idc"
TRAIN_PATH = os.path.sep.join([BASE_PATH, "training"])
VAL_PATH = os.path.sep.join([BASE_PATH, "validation"])
TEST_PATH =  os.path.sep.join([BASE_PATH, "testing"])
TRAIN_SPLIT = 0.8
VAL_SPLIT =  0.1

# Data Preprocessing

In [8]:
originalPaths = list(paths.list_images(INPUT_DATASET))
random.seed (7)
random.shuffle(originalPaths)
index = int(len(originalPaths)*TRAIN_SPLIT)
trainPaths = originalPaths[:index]
testPaths = originalPaths[index:]
index = int(len(trainPaths)*VAL_SPLIT)
valPaths = trainPaths [:index]
trainPaths = trainPaths[index:]
datasets = [("training", trainPaths, TRAIN_PATH),
            ("validation" , valPaths, VAL_PATH),
            ("testing", testPaths, TEST_PATH)] 
for (setType, originalPaths, basePath) in datasets :
        print (f' Building {setType} set')
        if not os.path.exists(basePath):
            print(f' Building directory {basePath}' )
            os.makedirs(basePath)
        for path in originalPaths:
            file = path.split(os.path.sep) [ -1]
            label = file [-5:-4]
            labelPath = os.path.sep.join([basePath,label])
            if not os.path.exists(labelPath) :
                print(f'BuiIding directory {labelPath}' )
                os.makedirs(labelPath)
            newPath = os.path.sep.join([labelPath, file])
            shutil.copy2(path, newPath)

 Building training set
 Building validation set
 Building testing set


# Building the CancerNet Model

In [3]:

class CancerNet:
    def build(width, height, depth, classes):
        model = tf.keras.models.Sequential()
        shape = (height, width, depth)
        channelDim = -1
        if K.image_data_format() == "channels_first":
            shape = (depth, height, width)
            channelDim = 1

        model.add(tf.keras.layers.Conv2D(filters = 32, kernel_size = 3, activation = 'relu', input_shape = shape))
        model.add(tf.keras.layers.BatchNormalization(axis = channelDim) )
        model.add(tf.keras.layers.MaxPooling2D(pool_size=2, strides=2))
        model.add(tf.keras.layers.Dropout(0.25))

        model.add(tf.keras.layers.Conv2D(filters = 64, kernel_size = 3, activation = 'relu'))
        model.add(tf.keras.layers.BatchNormalization(axis = channelDim))
        model.add(tf.keras.layers.Conv2D(filters = 64, kernel_size = 3, activation = 'relu'))
        model.add(tf.keras.layers.BatchNormalization(axis = channelDim))
        model.add(tf.keras.layers.MaxPooling2D(pool_size=2, strides=2))
        model.add(tf.keras.layers.Dropout(0.25))
        model.add(tf.keras.layers.Conv2D(filters = 64, kernel_size = 3, activation = 'relu'))
        model.add(tf.keras.layers.BatchNormalization(axis = channelDim))
        model.add(tf.keras.layers.Conv2D(filters = 64, kernel_size = 3, activation = 'relu'))
        model.add(tf.keras.layers.BatchNormalization(axis = channelDim))
        model.add(tf.keras.layers.Conv2D(filters = 64, kernel_size = 3, activation = 'relu'))
        model.add(tf.keras.layers.BatchNormalization(axis = channelDim))
        model.add(tf.keras.layers.MaxPooling2D(pool_size=2, strides=2))
        model.add(tf.keras.layers.Dropout(0.25))

        model.add(tf.keras.layers.Flatten())
        model.add(tf.keras.layers.Dense(units = 256, activation = 'relu'))
        model.add(tf.keras.layers.BatchNormalization(axis = channelDim))
        model.add(tf.keras.layers.Dropout(0.5))

        model.add(tf.keras.layers.Dense(units = classes, activation = 'softmax'))

        return model

# Data Augmentation and ImageDataGenerator

In [4]:
matplotlib.use('Agg')

train_datagen = ImageDataGenerator(rescale = 1/255,
                                shear_range = 0.2,
                                zoom_range = 0.2,
                                horizontal_flip = True)
Training_set = train_datagen.flow_from_directory('datasets/idc/training',
                                                 target_size = (64,64),
                                                 batch_size = 32,
                                                 class_mode = 'binary')

Found 255815 images belonging to 2 classes.


In [5]:
len(Training_set)

7995

# Data Configuration and Augmentation Setup

In [6]:
NUM_EPOCHS=10; INIT_LR=1e-2; BS=32

trainPaths = list(paths.list_images(TRAIN_PATH))
lenVal = len(list(paths.list_images(VAL_PATH)))
lenTest = len(list(paths.list_images(TEST_PATH)))
trainLabels = [int(p.split(os.path.sep)[-2]) for p in trainPaths]
trainLabels = to_categorical(trainLabels)
classTotals = trainLabels.sum(axis=0)
classWeight = classTotals.max()/classTotals

trainAug = ImageDataGenerator(
    rescale = 1/255.0,
    rotation_range = 20,
    zoom_range = 0.05,
    width_shift_range = 0.1,
    height_shift_range = 0.1,
    shear_range = 0.05,
    horizontal_flip = True,
    vertical_flip = True,
    fill_mode = "nearest")

valAug = ImageDataGenerator(rescale=1/255.0)

trainGen = trainAug.flow_from_directory(
    TRAIN_PATH,
    class_mode = "categorical",
    target_size = (48,48),
    color_mode = "rgb",
    shuffle = True,
    batch_size = BS)
valGen = valAug.flow_from_directory(
    VAL_PATH,
    class_mode = "categorical",
    target_size = (48,48),
    color_mode = "rgb",
    shuffle = False,
    batch_size = BS)
testGen = valAug.flow_from_directory(
TEST_PATH,
class_mode = "categorical",
target_size = (48,48),
color_mode = "rgb",
shuffle = False,
batch_size = BS)

Found 255815 images belonging to 2 classes.
Found 42660 images belonging to 2 classes.
Found 99906 images belonging to 2 classes.


# Model Definition, Compilation, and Training

In [10]:
model = CancerNet.build(width = 48, height = 48, depth = 3, classes = 2)
model.compile(loss = "binary_crossentropy", optimizer = 'adam', metrics = ["accuracy"])
M = model.fit(x=trainGen, validation_data = valGen, epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [11]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_6 (Conv2D)           (None, 46, 46, 32)        896       
                                                                 
 batch_normalization_7 (Bat  (None, 46, 46, 32)        128       
 chNormalization)                                                
                                                                 
 max_pooling2d_3 (MaxPoolin  (None, 23, 23, 32)        0         
 g2D)                                                            
                                                                 
 dropout_4 (Dropout)         (None, 23, 23, 32)        0         
                                                                 
 conv2d_7 (Conv2D)           (None, 21, 21, 64)        18496     
                                                                 
 batch_normalization_8 (Bat  (None, 21, 21, 64)       

# Plotting Loss and Accuracy

In [12]:
print("Now evaluating the model")
testGen.reset( )
pred_indices = model.predict_generator(testGen, steps = (lenTest//BS)+1)
pred_indices = np.argmax(pred_indices, axis = 1)
print(classification_report(testGen.classes, pred_indices, target_names = testGen.class_indices.keys()))
cm = confusion_matrix(testGen.classes, pred_indices)
total = sum(sum(cm))
accuracy = (cm[0,0]+cm[1,1])/total
specificity = cm[1,1]/(cm[1,0]+cm[1,1])
sensitivity = cm[0,0]/(cm[0,0]+cm[0,1])
print(cm)
print(f'Accuracy: {accuracy}')
print(f'Specificity: {specificity}')
print(f'Sensitivity: {sensitivity}')

N = NUM_EPOCHS
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0,N), M.history["loss"], label = "train_loss")
plt.plot(np.arange(0,N), M.history["val_loss"], label = "val_loss")
plt.plot(np.arange(0,N), M.history["acc"], label = "train_acc")
plt.plot(np.arange(0,N), M.history["val_acc"], label = "val_acc")
plt.titie("Training Loss and Accuracy on the IDC Dataset")
plt.xlabel("Epoch No.")
plt.ylabel("Loss/Accuracy")
plt.legend(loc = "lower left")
plt.savefig('plot.png')

Now evaluating the model


  pred_indices = model.predict_generator(testGen, steps = (lenTest//BS)+1)


              precision    recall  f1-score   support

           0       0.94      0.80      0.87     71451
           1       0.64      0.87      0.74     28455

    accuracy                           0.82     99906
   macro avg       0.79      0.84      0.80     99906
weighted avg       0.85      0.82      0.83     99906

[[57308 14143]
 [ 3674 24781]]
Accuracy: 0.8216623626208636
Specificity: 0.8708838516956598
Sensitivity: 0.8020601531119228


KeyError: 'acc'