In [1]:
import os
INPUT_DATASET = "datasets/original"
BASE_PATH = "datasets/idc"
TRAIN_PATH = os.path.sep.join([BASE_PATH, "training"])
VAL_PATH = os.path.sep.join([BASE_PATH, "validation"])
TEST_PATH =os.path.sep.join([BASE_PATH, "testing"])
TRAIN_SPLIT = 0.8
VAL_SPLIT = 0.1

In [2]:
from imutils import paths
import random, shutil, os
originalPaths=list(paths.list_images(INPUT_DATASET))
random.seed(7)
random.shuffle(originalPaths)
index=int(len(originalPaths)*TRAIN_SPLIT)
trainPaths =originalPaths[:index]
testPaths=originalPaths[index:]
index = int(len(trainPaths)*VAL_SPLIT)
valPaths=trainPaths[:index]
trainPaths=trainPaths[index:] 
datasets=[("training", trainPaths, TRAIN_PATH), 
          ("validation", valPaths, VAL_PATH), 
          ("testing", testPaths, TEST_PATH)]

for (setType, originalPaths, basePath) in datasets:
          print(f'Building {setType} set')
          if not os.path.exists(basePath):
              print(f'Building directory {basePath}')
              os.makedirs(basePath)
          for path in originalPaths:
              file=path.split(os.path.sep)[-1]
              label=file[-5:-4]
              labelPath=os.path.sep.join([basePath, label])
              if not os.path.exists(labelPath):
                  print(f'Building directory {labelPath}')
                  os.makedirs(labelPath) 
              newPath=os.path.sep.join([labelPath, file])
              shutil.copy2(path, newPath)

Building training set
Building validation set
Building testing set


In [3]:
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
from keras import backend as K

class CancerNet:
    @staticmethod
    def build(width, height, depth, classes):
        model = tf.keras.models.Sequential()
        shape = (height, width, depth)
        channelDim = -1
        
        if K.image_data_format() == "channels_first":
            shape = (depth, height, width)
            channelDim = 1

        model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu', input_shape=shape))
        model.add(tf.keras.layers.BatchNormalization(axis=channelDim))
        model.add(tf.keras.layers.MaxPooling2D(pool_size=2, strides=2))
        model.add(tf.keras.layers.Dropout(0.25))

        model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation='relu'))
        model.add(tf.keras.layers.BatchNormalization(axis=channelDim))

        model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation='relu'))
        model.add(tf.keras.layers.BatchNormalization(axis=channelDim))

        model.add(tf.keras.layers.MaxPooling2D(pool_size=2, strides=2))
        model.add(tf.keras.layers.Dropout(0.25))

        model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation='relu'))
        model.add(tf.keras.layers.BatchNormalization(axis=channelDim))

        model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation='relu'))
        model.add(tf.keras.layers.BatchNormalization(axis=channelDim))

        model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation='relu'))
        model.add(tf.keras.layers.BatchNormalization(axis=channelDim))
        model.add(tf.keras.layers.MaxPooling2D(pool_size=2, strides=2))
        model.add(tf.keras.layers.Dropout(0.25))

        model.add(tf.keras.layers.Flatten())
        model.add(tf.keras.layers.Dense(units=256, activation='relu'))
        model.add(tf.keras.layers.BatchNormalization(axis=channelDim))
        model.add(tf.keras.layers.Dropout(0.5))

        model.add(tf.keras.layers.Dense(units=classes, activation='softmax'))
        return model







In [4]:
import matplotlib
matplotlib.use("Agg")

train_datagen = ImageDataGenerator(rescale = 1./255, 
                                   shear_range= 0.2,
                                   zoom_range =0.2, 
                                   horizontal_flip = True) 
training_set = train_datagen.flow_from_directory('datasets/idc/training', 
                                                 target_size = (64, 64),
                                                 batch_size =32,
                                                 class_mode= 'binary')

Found 199818 images belonging to 2 classes.


In [5]:
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler
from keras.utils import to_categorical  
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix 
from imutils import paths
import matplotlib.pyplot as plt 
import numpy as np 
import os

NUM_EPOCHS=4; INIT_LR=1e-2; BS=32
trainPaths=list(paths.list_images (TRAIN_PATH))
lenTrain=len(trainPaths)
lenVal = len(list(paths.list_images (VAL_PATH)))
lenTest = len(list(paths.list_images(TEST_PATH)))

trainLabels=[int(p.split(os.path.sep)[-2]) for p in trainPaths]
trainLabels=to_categorical(trainLabels)  
classTotals =trainLabels.sum(axis=0)
classWeight=classTotals.max()/classTotals

trainAug=ImageDataGenerator(
    rescale=1/255.0,
    rotation_range=20,
    zoom_range=0.05,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.05,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode="nearest")

valAug=ImageDataGenerator(rescale=1 / 255.0)

trainGen = trainAug.flow_from_directory(
    TRAIN_PATH,
    class_mode="categorical",
    target_size=(48,48),
    color_mode="rgb",
    shuffle=True,
    batch_size=BS)

valGen = valAug.flow_from_directory(
    VAL_PATH,
    class_mode="categorical",
    target_size=(48,48),
    color_mode="rgb",
    shuffle=False,
    batch_size=BS)

testGen =valAug.flow_from_directory(
    TEST_PATH,
    class_mode="categorical", 
    target_size=(48,48),
    color_mode="rgb", 
    shuffle=False,
    batch_size=BS)


Found 199818 images belonging to 2 classes.
Found 22201 images belonging to 2 classes.
Found 55505 images belonging to 2 classes.


In [6]:
model=CancerNet.build(width=48,height=48, depth=3, classes=2)

model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])

model.summary()




Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 46, 46, 32)        896       
                                                                 
 batch_normalization (Batch  (None, 46, 46, 32)        128       
 Normalization)                                                  
                                                                 
 max_pooling2d (MaxPooling2  (None, 23, 23, 32)        0         
 D)                                                              
                                                                 
 dropout (Dropout)           (None, 23, 23, 32)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 21, 21, 64)        18496     
                                                                 
 batch_normalization_1 (Bat  (None, 21, 21, 64)      

In [7]:
M=model.fit(x = trainGen,validation_data =valGen, epochs = 40)

Epoch 1/40


Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [8]:
print("Now evaluating the model")
testGen.reset()
pred_indices = model.predict_generator(testGen, steps=(lenTest//BS)+1)

pred_indices = np.argmax(pred_indices, axis=1)

print(classification_report(testGen.classes, pred_indices, target_names=testGen.class_indices.keys()))

cm = confusion_matrix(testGen.classes, pred_indices)
total = sum(sum(cm))
accuracy = (cm[0, 0] + cm[1, 1]) / total
specificity = cm[1, 1] / (cm[1, 0] + cm[1, 1]) 
sensitivity = cm[0, 0] / (cm[0, 0] + cm[0, 1])
print(cm)
print(f'Accuracy: {accuracy}')
print(f'Specificity: {specificity}')
print(f'Sensitivity: {sensitivity}')

num_epochs = len(M.history['loss'])

plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(1, num_epochs + 1), M.history["loss"], label="train_loss") 
plt.plot(np.arange(1, num_epochs + 1), M.history["val_loss"], label="val_loss")  
plt.plot(np.arange(1, num_epochs + 1), M.history["accuracy"], label="train_accuracy")  
plt.plot(np.arange(1, num_epochs + 1), M.history["val_accuracy"], label="val_accuracy")
plt.title("Training Loss and Accuracy on the IDC Dataset")
plt.xlabel("Epoch No.")
plt.ylabel("Loss/Accuracy") 
plt.legend(loc="lower left")
plt.savefig('plot.png')


Now evaluating the model


  pred_indices = model.predict_generator(testGen, steps=(lenTest//BS)+1)


              precision    recall  f1-score   support

           0       0.93      0.55      0.69     39736
           1       0.44      0.89      0.59     15769

    accuracy                           0.65     55505
   macro avg       0.68      0.72      0.64     55505
weighted avg       0.79      0.65      0.66     55505

[[21946 17790]
 [ 1702 14067]]
Accuracy: 0.6488244302315107
Specificity: 0.8920667131714123
Sensitivity: 0.5522951479766459
