In [0]:
import json
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!ls ~/.kaggle

In [0]:
!pip install -q kaggle

In [0]:
!kaggle datasets download --force paultimothymooney/breast-histopathology-images

In [0]:
!mkdir dataset

In [0]:
!unzip breast-histopathology-images.zip

In [0]:
cd /content/dataset

In [0]:
!unzip /content/IDC_regular_ps50_idx5.zip

In [0]:
cd /content

In [0]:
!mkdir clean_dir

In [0]:
import os

In [0]:
ORIG_INPUT_DATASET = "/content/dataset"
BASE_PATH = "/content/clean_dir"
 
TRAIN_PATH = os.path.sep.join([BASE_PATH, "training"])
VAL_PATH = os.path.sep.join([BASE_PATH, "validation"])
TEST_PATH = os.path.sep.join([BASE_PATH, "testing"])
 
TRAIN_SPLIT = 0.8
VAL_SPLIT = 0.1

In [0]:
from imutils import paths
import random
import shutil

In [0]:
imagePaths = list(paths.list_images(ORIG_INPUT_DATASET))
random.seed(40)
random.shuffle(imagePaths)

# Train-Val-Test Split
i = int(len(imagePaths) * TRAIN_SPLIT)
trainPaths = imagePaths[:i]
testPaths = imagePaths[i:]

j = int(len(trainPaths) * VAL_SPLIT)
valPaths = trainPaths[:j]
trainPaths = trainPaths[j:]

datasets = [
	("training", trainPaths, TRAIN_PATH),
	("validation", valPaths, VAL_PATH),
	("testing", testPaths, TEST_PATH)
]

# loop over the datasets
for (dType, imagePaths, baseOutput) in datasets:

	# if the output base output directory does not exist, creating it
	if not os.path.exists(baseOutput):
		os.makedirs(baseOutput)

	# loop over the input image paths
	for inputPath in imagePaths:
		# extract the filename of the input image and extract the
		# class label ("0" for "negative" and "1" for "positive")
		filename = inputPath.split(os.path.sep)[-1]
		label = filename[-5:-4]

		# build the path to the label directory
		labelPath = os.path.sep.join([baseOutput, label])

		# if the label output directory does not exist, create it
		if not os.path.exists(labelPath):
			os.makedirs(labelPath)

		p = os.path.sep.join([labelPath, filename])
		shutil.copy2(inputPath, p)

In [0]:
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import AveragePooling2D
from keras.layers.core import Activation
from keras.layers.core import Dense
from keras.layers import Flatten
from keras.layers import Input
from keras.models import Model
from keras.layers import add
from keras.regularizers import l2
from keras import backend as K
from keras.layers.core import Dropout

In [0]:
class WideRN:
  def main_block(x, filters, n, strides, dropout):
    
	  # Normal part
	  x_res = Conv2D(filters, (3,3), strides=strides, padding="same")(x)
	  x_res = BatchNormalization()(x_res)
	  x_res = Activation('relu')(x_res)
	  x_res = Conv2D(filters, (3,3), padding="same")(x_res)
	  # Alternative branch
	  x = Conv2D(filters, (1,1), strides=strides)(x)
	  # Merge Branches
	  x = add([x_res, x])

	  for i in range(n-1):
		  # Residual conection
		  x_res = BatchNormalization()(x)
		  x_res = Activation('relu')(x_res)
		  x_res = Conv2D(filters, (3,3), padding="same")(x_res)
		  # Apply dropout if given
		  if dropout: x_res = Dropout(dropout)(x)
		  # Second part
		  x_res = BatchNormalization()(x_res)
		  x_res = Activation('relu')(x_res)
		  x_res = Conv2D(filters, (3,3), padding="same")(x_res)
		  # Merge branches
		  x = add([x, x_res])

	  # Inter block part
	  x = BatchNormalization()(x)
	  x = Activation('relu')(x)
	  return x
  
  @staticmethod
  def build(input_dims, output_dim, n, k, act= "relu", dropout=None):
    n = (n-4)//6
    # This returns a tensor input to the model
    inputs = Input(shape=(input_dims))

	  # Head of the model
    x = Conv2D(16, (3,3), padding="same")(inputs)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    # 3 Blocks (normal-residual)
    x = WideRN.main_block(x, 16*k, n, (1,1), dropout) # 0
    x = WideRN.main_block(x, 32*k, n, (2,2), dropout) # 1
    x = WideRN.main_block(x, 64*k, n, (2,2), dropout) # 2

    # Final part of the model
    x = AveragePooling2D((8,8))(x)
    x = Flatten()(x)
    x = Dense(output_dim)(x)
    outputs = Activation("softmax")(x)
    model = Model(inputs=inputs, outputs=outputs)
    return model


In [0]:
model = WideRN.build((48,48,3), 2, 22, 4)

In [0]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            (None, 48, 48, 3)    0                                            
__________________________________________________________________________________________________
conv2d_3 (Conv2D)               (None, 48, 48, 16)   448         input_3[0][0]                    
__________________________________________________________________________________________________
batch_normalization_3 (BatchNor (None, 48, 48, 16)   64          conv2d_3[0][0]                   
__________________________________________________________________________________________________
activation_3 (Activation)       (None, 48, 48, 16)   0           batch_normalization_3[0][0]      
__________________________________________________________________________________________________
conv2d_4 (

In [0]:
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from imutils import paths
import numpy as np

In [0]:
NUM_EPOCHS = 30
Batch = 64

In [0]:
trainPaths = list(paths.list_images(TRAIN_PATH))
totalTrain = len(trainPaths)
totalVal = len(list(paths.list_images(VAL_PATH)))
totalTest = len(list(paths.list_images(TEST_PATH)))

# Calculating class weights due to imbalanced dataset
trainLabels = [int(p.split(os.path.sep)[-2]) for p in trainPaths]
trainLabels = np_utils.to_categorical(trainLabels)
classTotals = trainLabels.sum(axis=0)
classWeight = classTotals.max() / classTotals

In [0]:
trainAug = ImageDataGenerator(
	rescale=1 / 255.0,
	rotation_range=20,
	zoom_range=0.05,
	width_shift_range=0.1,
	height_shift_range=0.1,
	shear_range=0.05,
	horizontal_flip=True,
	vertical_flip=True,
	fill_mode="nearest")

In [0]:
valAug = ImageDataGenerator(rescale=1 / 255.0)
testAug = ImageDataGenerator(rescale =1 /255.0)

In [0]:
trainGen = trainAug.flow_from_directory(
	TRAIN_PATH,
	class_mode="categorical",
	target_size=(48, 48),
	color_mode="rgb",
	shuffle=True,
	batch_size=Batch)

valGen = valAug.flow_from_directory(
	VAL_PATH,
	class_mode="categorical",
	target_size=(48, 48),
	color_mode="rgb",
	shuffle=False,
	batch_size=Batch)

testGen = testAug.flow_from_directory(
	TEST_PATH,
	class_mode="categorical",
	target_size=(48, 48),
	color_mode="rgb",
	shuffle=False,
	batch_size=Batch)


Found 199818 images belonging to 2 classes.
Found 22201 images belonging to 2 classes.
Found 55505 images belonging to 2 classes.


In [0]:
model.compile(optimizer="adam" , loss="binary_crossentropy", metrics=['accuracy'])

In [0]:
hist = model.fit_generator(
	trainGen,
	steps_per_epoch=totalTrain // Batch,
	validation_data=valGen,
	validation_steps=totalVal // Batch,
	class_weight=classWeight,
	epochs=NUM_EPOCHS)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30

In [0]:
predId = model.predict_generator(testGen,
	steps=(totalTest // Batch) + 1)

In [0]:
predId = np.argmax(predId, axis=1)
print(classification_report(testGen.classes, predId,
	target_names=testGen.class_indices.keys()))

              precision    recall  f1-score   support

           0       0.92      0.90      0.91     39693
           1       0.76      0.81      0.78     15812

    accuracy                           0.87     55505
   macro avg       0.84      0.85      0.85     55505
weighted avg       0.88      0.87      0.87     55505



In [0]:
cm = confusion_matrix(testGen.classes, predId)
total = sum(sum(cm))
acc = (cm[0, 0] + cm[1, 1]) / total
sensitivity = cm[0, 0] / (cm[0, 0] + cm[0, 1])
specificity = cm[1, 1] / (cm[1, 0] + cm[1, 1])
 
print(cm)
print("acc: {:.4f}".format(acc))
print("sensitivity: {:.4f}".format(sensitivity))
print("specificity: {:.4f}".format(specificity))

[[35648  4045]
 [ 2994 12818]]
acc: 0.8732
sensitivity: 0.8981
specificity: 0.8107


##### FINAL ACCURACY ON TEST DATASET ~ 87.32%