In [7]:
import os

INPUT_DATASET = 'Datasets/Original'

BASE_PATH = "Datasets/IDC"

TRAIN_PATH = os.path.sep.join([BASE_PATH , "training"])

TEST_PATH = os.path.sep.join([BASE_PATH , "testing"])

VAL_PATH = os.path.sep.join([BASE_PATH , "validation"])


TRAIN_SPLIT = 0.8

VAL_SPLIT = 0.1


In [10]:
import config
import random , shutil , os
from imutils import paths


originalPaths = list(paths.list_images(config.INPUT_DATASET))
random.seed(7)
random.shuffle(originalPaths)

index = int(len(originalPaths) * config.TRAIN_SPLIT)
trainPaths = originalPaths[:index]
testPaths = originalPaths[index:]


index = int(len(trainPaths) * config.VAL_SPLIT)
valPaths = trainPaths[:index]
trainPaths = trainPaths[index:]

datasets = (['training' , trainPaths , config.TRAIN_PATH] , 
            ['validation' , valPaths , config.VAL_PATH] , 
            ['testing' , testPaths , config.TEST_PATH])

for (setType , originalPaths , basePath) in datasets :
    print(f'Building {setType} set')
    
    
    if not os.path.exists(basePath) :
        print(f'Building directory {basePath}')
        os.makedirs(basePath)
        
        
    for path in originalPaths :
        file = path.split(os.path.sep)[-1]
        label = file[-5:-4]
        
        labelPath = os.path.sep.join([basePath , label])
        if not os.path.exists(labelPath) :
            print(f'Building directory {labelPath}')
            os.makedirs(labelPath)
            
            
        newPath = os.path.sep.join([labelPath , file])
        shutil.copy2(path , newPath)
        
        

Building training set
Building directory Datasets/IDC/training
Building directory Datasets/IDC/training/1
Building directory Datasets/IDC/training/0
Building validation set
Building directory Datasets/IDC/validation
Building directory Datasets/IDC/validation/0
Building directory Datasets/IDC/validation/1
Building testing set
Building directory Datasets/IDC/testing
Building directory Datasets/IDC/testing/0
Building directory Datasets/IDC/testing/1


In [8]:
from keras.models import Sequential
from keras.layers.normalization import layer_normalization
from tensorflow.keras.layers import BatchNormalization
#from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import SeparableConv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation
from keras.layers.core import Flatten
from keras.layers.core import Dense
from keras.layers.core import Dropout
from keras import backend as k


class CancerNet :
    @staticmethod
    def build(width , height , depth , classes) :
        model = Sequential()
        shape = (height , width , depth)
        channelDim = -1
        
        if k.image_data_format == 'channels_first' :
            shape = (depth , height , width)
            channelDim = 1
        
        model.add(SeparableConv2D(32 , (3,3) , padding='same' , input_shape=shape))
        model.add(Activation('relu'))
        model.add(BatchNormalization(axis=channelDim))
        model.add(MaxPooling2D(pool_size=(2,2)))
        model.add(Dropout(0.25))
        
        model.add(SeparableConv2D(64 , (3,3) , padding='same'))
        model.add(Activation('relu'))
        model.add(BatchNormalization(axis=channelDim))
        
        model.add(SeparableConv2D(64 , (3,3) , padding='same'))
        model.add(Activation('relu'))
        model.add(BatchNormalization(axis=channelDim))
        model.add(MaxPooling2D(pool_size=(2,2)))
        model.add(Dropout(0.25))
        
        model.add(SeparableConv2D(128 , (3,3) , padding='same'))
        model.add(Activation('relu'))
        model.add(BatchNormalization(axis=channelDim))
        
        
        model.add(SeparableConv2D(128 , (3,3) , padding='same'))
        model.add(Activation('relu'))
        model.add(BatchNormalization(axis=channelDim))
        
        model.add(SeparableConv2D(128 , (3,3) , padding='same'))
        model.add(Activation('relu'))
        model.add(BatchNormalization(axis=channelDim))
        model.add(MaxPooling2D(pool_size=(2,2)))
        model.add(Dropout(0.25))
        
        model.add(Flatten())
        model.add(Dense(256))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.25))
        
        model.add(Dense(classes))
        model.add(Activation('softmax'))
        
        return model
    

In [17]:
import matplotlib
matplotlib.use("Agg")

from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler
from keras.optimizers import adagrad_v2
from tensorflow.keras.optimizers import Adagrad
from keras.utils import np_utils


from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix


from cancernet import CancerNet
import config


from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import os


NUM_EPOCHS = 40
INIT_LR = 1e-2
BS = 32

trainPaths = list(paths.list_images(config.TRAIN_PATH))
lenTrain = len(trainPaths)
lenVal = len((list(paths.list_images(config.VAL_PATH))))
lenTest = len(list(paths.list_images(config.TEST_PATH)))

trainLabels=[int(p.split(os.path.sep)[-2]) for p in trainPaths]
trainLabels=np_utils.to_categorical(trainLabels)
classTotals=trainLabels.sum(axis=0)
classWeight=classTotals.max()/classTotals

trainAug = ImageDataGenerator(
           rescale=1/255.0 ,
           rotation_range=20 ,
           zoom_range=0.05 ,
           width_shift_range=0.1 ,
           height_shift_range=0.1 ,
           shear_range=0.05 ,
           vertical_flip=True ,
           horizontal_flip=True ,
           fill_mode='nearest')

valAug = ImageDataGenerator(rescale=1/255.0)


trainGen = trainAug.flow_from_directory(
                                         config.TRAIN_PATH ,
                                         class_mode='categorical' ,
                                         target_size=(48,48) ,
                                         color_mode='rgb' , 
                                         shuffle=True ,
                                          batch_size=BS)


valGen = trainAug.flow_from_directory(
                                         config.VAL_PATH ,
                                         class_mode='categorical' ,
                                         target_size=(48,48) ,
                                         color_mode='rgb' , 
                                         shuffle=False ,
                                          batch_size=BS)


testGen = trainAug.flow_from_directory(
                                         config.TEST_PATH ,
                                         class_mode='categorical' ,
                                         target_size=(48,48) ,
                                         color_mode='rgb' , 
                                         shuffle=False ,
                                          batch_size=BS)


model = CancerNet.build(width=48 , height=48 , depth=3 , classes=2)
opt = Adagrad(lr=INIT_LR , decay = INIT_LR/NUM_EPOCHS)
model.compile(loss='binary_crossentropy' , optimizer=opt , metrics=['accuracy'])

M=model.fit(
  trainGen,
  steps_per_epoch=lenTrain//BS,
  validation_data=valGen,
  validation_steps=lenVal//BS,
  class_weight=classWeight,
  epochs=NUM_EPOCHS)


print("Now evaluating the model")
testGen.reset()
pred_indices = model.predict_generator(testGen , steps=(lenTest//BS)+1)
pred_indices = np.argmax(pred_indices , axis=1)


print(classification_report(
      testGen.classes ,
      pred_indices , 
      target_names=testGen.class_indices.keys()))

cm = confusion_matrix(testGen.classes , pred_indices)
total=sum(sum(cm))
accuracy=(cm[0,0]+cm[1,1])/total
specificity=cm[1,1]/(cm[1,0]+cm[1,1])
sensitivity=cm[0,0]/(cm[0,0]+cm[0,1])

print(cm)
print(f'Accuracy: {accuracy}')
print(f'Specificity: {specificity}')
print(f'Sensitivity: {sensitivity}')

N = NUM_EPOCHS
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0,N), M.history["loss"], label="train_loss")
plt.plot(np.arange(0,N), M.history["val_loss"], label="val_loss")
plt.plot(np.arange(0,N), M.history["acc"], label="train_acc")
plt.plot(np.arange(0,N), M.history["val_acc"], label="val_acc")
plt.title("Training Loss and Accuracy on the IDC Dataset")
plt.xlabel("Epoch No.")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower left")
plt.savefig('plot.png')

Found 255750 images belonging to 2 classes.
Found 42601 images belonging to 2 classes.
Found 99926 images belonging to 2 classes.




ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [9]:
trainGen = trainAug.flow_from_directory(
                                         config.TRAIN_PATH ,
                                         class_mode='categorical' ,
                                         target_size=(48,48) ,
                                         color_mode='rgb' , 
                                         shuffle=True ,
                                          batch_size=BS)

Found 255750 images belonging to 2 classes.


In [10]:
type(trainGen)

keras.preprocessing.image.DirectoryIterator

In [13]:
valGen = trainAug.flow_from_directory(
                                         config.VAL_PATH ,
                                         class_mode='categorical' ,
                                         target_size=(48,48) ,
                                         color_mode='rgb' , 
                                         shuffle=False ,
                                          batch_size=BS)

Found 42601 images belonging to 2 classes.


In [14]:
trainPaths=list(paths.list_images(config.TRAIN_PATH))

In [16]:
len(trainPaths)

255750

In [18]:
trainLabels=[int(p.split(os.path.sep)[-2]) for p in trainPaths]
trainLabels=np_utils.to_categorical(trainLabels)
classTotals=trainLabels.sum(axis=0)
classWeight=classTotals.max()/classTotals
print(classTotals)
print(classWeight)

[183200.  72550.]
[1.       2.525155]


In [19]:
len(trainLabels)

255750

In [4]:
import matplotlib
matplotlib.use("Agg")

from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler
from tensorflow.keras.optimizers import Adagrad
from keras.utils import np_utils
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from cancernet import CancerNet
import config
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import os

NUM_EPOCHS=40; INIT_LR=1e-2; BS=32

trainPaths=list(paths.list_images(config.TRAIN_PATH))
lenTrain=len(trainPaths)
lenVal=len(list(paths.list_images(config.VAL_PATH)))
lenTest=len(list(paths.list_images(config.TEST_PATH)))

trainLabels=[int(p.split(os.path.sep)[-2]) for p in trainPaths]
trainLabels=np_utils.to_categorical(trainLabels)
classTotals=trainLabels.sum(axis=0)
classWeight=classTotals.max()/classTotals

trainAug = ImageDataGenerator(
	rescale=1/255.0,
	rotation_range=20,
	zoom_range=0.05,
	width_shift_range=0.1,
	height_shift_range=0.1,
	shear_range=0.05,
	horizontal_flip=True,
	vertical_flip=True,
	fill_mode="nearest")

valAug=ImageDataGenerator(rescale=1 / 255.0)

trainGen = trainAug.flow_from_directory(
	config.TRAIN_PATH,
	class_mode="categorical",
	target_size=(48,48),
	color_mode="rgb",
	shuffle=True,
	batch_size=BS)
valGen = valAug.flow_from_directory(
	config.VAL_PATH,
	class_mode="categorical",
	target_size=(48,48),
	color_mode="rgb",
	shuffle=False,
	batch_size=BS)
testGen = valAug.flow_from_directory(
	config.TEST_PATH,
	class_mode="categorical",
	target_size=(48,48),
	color_mode="rgb",
	shuffle=False,
	batch_size=BS)

model=CancerNet.build(width=48,height=48,depth=3,classes=2)
opt=Adagrad(lr=INIT_LR,decay=INIT_LR/NUM_EPOCHS)
model.compile(loss="binary_crossentropy",optimizer=opt,metrics=["accuracy"])


M=model.fit_generator(
	trainGen,
	steps_per_epoch=lenTrain//BS,
	validation_data=valGen,
	validation_steps=lenVal//BS,
	class_weight=classWeight,
	epochs=NUM_EPOCHS)

print("Now evaluating the model")
testGen.reset()
pred_indices=model.predict_generator(testGen,steps=(lenTest//BS)+1)

pred_indices=np.argmax(pred_indices,axis=1)

print(classification_report(testGen.classes, pred_indices, target_names=testGen.class_indices.keys()))

cm=confusion_matrix(testGen.classes,pred_indices)
total=sum(sum(cm))
accuracy=(cm[0,0]+cm[1,1])/total
specificity=cm[1,1]/(cm[1,0]+cm[1,1])
sensitivity=cm[0,0]/(cm[0,0]+cm[0,1])
print(cm)
print(f'Accuracy: {accuracy}')
print(f'Specificity: {specificity}')
print(f'Sensitivity: {sensitivity}')

N = NUM_EPOCHS
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0,N), M.history["loss"], label="train_loss")
plt.plot(np.arange(0,N), M.history["val_loss"], label="val_loss")
plt.plot(np.arange(0,N), M.history["acc"], label="train_acc")
plt.plot(np.arange(0,N), M.history["val_acc"], label="val_acc")
plt.title("Training Loss and Accuracy on the IDC Dataset")
plt.xlabel("Epoch No.")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower left")
plt.savefig('plot.png')

Found 255750 images belonging to 2 classes.
Found 42601 images belonging to 2 classes.
Found 99926 images belonging to 2 classes.


2021-09-25 22:33:44.275885: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2021-09-25 22:33:44.275929: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2021-09-25 22:33:44.275958: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (ews): /proc/driver/nvidia/version does not exist
2021-09-25 22:33:44.277155: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()