In [2]:
#import neccessary Libraries and modules
import pandas as pd
import numpy as np
import os
from glob import glob
import random
import matplotlib.pylab as plt

from sklearn.model_selection import train_test_split
import tensorflow as tf
import keras
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras.optimizers import SGD, RMSprop, Adam, Adagrad, Adadelta
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization, Conv2D, MaxPool2D, MaxPooling2D
from tensorflow.keras.callbacks import EarlyStopping

%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

In [3]:
imagePatches = glob('../input/breast-histopathology-images/IDC_regular_ps50_idx5/**/*.png', recursive=True)
for filename in imagePatches[0:10]:
    print(filename)

In [4]:
# Two arrays holding images by class type

class0 = [] # 0 = no cancer
class1 = [] # 1 = cancer

for filename in imagePatches:
    if filename.endswith("class0.png"):
         class0.append(filename)
    else:
        class1.append(filename)

In [5]:
len(class0)

In [6]:
len(class1)

In [7]:
sampled_class0 = random.sample(class0, 78786)
sampled_class1 = random.sample(class1, 78786)
#len(sampled_class0)

In [8]:
from matplotlib.image import imread
import cv2

def get_image_arrays(data, label):
    img_arrays = []
    for i in data:
        if i.endswith('.png'):
            img = cv2.imread(i ,cv2.IMREAD_COLOR)
            img_sized = cv2.resize(img, (50, 50), interpolation=cv2.INTER_LINEAR)
            img_arrays.append([img_sized, label])
    return img_arrays


In [9]:
class0_array = get_image_arrays(sampled_class0, 0)
class1_array = get_image_arrays(sampled_class1, 1)

In [10]:
class0_array[1]

In [11]:
test = cv2.imread('../input/breast-histopathology-images/IDC_regular_ps50_idx5/13689/1/13689_idx5_x801_y1501_class1.png' ,cv2.IMREAD_COLOR)
test.shape

In [12]:
from keras.preprocessing.image import load_img, img_to_array

In [13]:
def show_img(files):
    plt.figure(figsize= (10,10))
    ind = np.random.randint(0, len(files), 25)
    i=0
    for loc in ind:
        plt.subplot(5,5,i+1)
        sample = load_img(files[loc], target_size=(150,150))
        sample = img_to_array(sample)
        plt.axis("off")
        plt.imshow(sample.astype("uint8"))
        i+=1

In [14]:
show_img(imagePatches)

In [15]:
combined_data = np.concatenate((class0_array, class1_array))
random.seed(42)
random.shuffle(combined_data)

In [16]:
X = []
y = []

for features,label in combined_data:
    X.append(features)
    y.append(label)

In [17]:
X = np.array(X).reshape(-1, 50, 50, 3)

In [18]:
X.shape

In [19]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

In [20]:
model=Sequential()
model.add(Conv2D(filters=32,kernel_size=(4,4),input_shape=(50,50,3),activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))

model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(filters=32,kernel_size=(4,4),activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))

model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.25))

model.add(Flatten())

model.add(Dense(128,activation='relu'))
model.add(Dense(2,activation='sigmoid'))

model.compile(loss = 'binary_crossentropy', optimizer ='adam', metrics= ['accuracy'])


model.summary()

In [21]:
epochs = 50  
batch_size = 250

In [22]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs= epochs,
    verbose = 2,
    batch_size=batch_size
    )

In [23]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [24]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [25]:
loss,accuracy=model.evaluate(X_test,y_test)

In [26]:

from sklearn.metrics import confusion_matrix
import seaborn as sns

Y_pred = model.predict(X_test)
Y_pred_classes = np.argmax(Y_pred,axis = 1) 
Y_true = np.argmax(y_test,axis = 1) 

confusion_mtx = confusion_matrix(Y_true, Y_pred_classes) 

f,ax = plt.subplots(figsize=(8, 8))
sns.heatmap(confusion_mtx, annot=True, linewidths=0.01,cmap="Greens",linecolor="gray", fmt= '.1f',ax=ax)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()

In [27]:
from sklearn.metrics import classification_report

print(classification_report(Y_true, Y_pred_classes))

In [28]:
def img_plot(arr,index=0):
    plt.title('Test Image')
    plt.imshow(arr[index])

In [29]:
index1 = 2
img_plot(X_test, index1)

In [30]:
input1 = X_test[index1:index1+1]
print('Input Index =',index1)

In [31]:
cnn_pred1 = model.predict(input1)[0].argmax()
label1 = y_test[index1].argmax()

In [32]:
print('Predicted Value using  cnn model',cnn_pred1)
print("\nTrue Value",label1)

# ANN MODEL

In [33]:
num_classes=2
input_shape = (50,50, 3)

In [34]:
model_nn=Sequential()
model_nn.add(Flatten(input_shape=input_shape))

model_nn.add(Dense(64, activation="relu",input_shape=input_shape))
model_nn.add(Dense(128, activation="relu"))
model_nn.add(Dense(64, activation="relu"))
model_nn.add(Dense(32, activation="relu"))
model_nn.add(Dense(16, activation="relu"))

model_nn.add(Dense(2, activation="sigmoid"))


model_nn.compile(loss = 'binary_crossentropy', optimizer ='adam', metrics= ['accuracy'])
model_nn.summary()

In [35]:
history_2 = model_nn.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs= epochs,
    verbose = 2,
    batch_size=batch_size)

In [36]:

plt.plot(history_2.history['accuracy'])
plt.plot(history_2.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [37]:
#plotting the loss of test and training sets
plt.plot(history_2.history['loss'])
plt.plot(history_2.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [38]:
loss,accuracy=model_nn.evaluate(X_test,y_test)

In [39]:

from sklearn.metrics import confusion_matrix
import seaborn as sns

Y_pred = model.predict(X_test)
Y_pred_classes = np.argmax(Y_pred,axis = 1) 
Y_true = np.argmax(y_test,axis = 1) 

confusion_mtx = confusion_matrix(Y_true, Y_pred_classes) 

f,ax = plt.subplots(figsize=(8, 8))
sns.heatmap(confusion_mtx, annot=True, linewidths=0.01,cmap="Greens",linecolor="gray", fmt= '.1f',ax=ax)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()

In [40]:
from sklearn.metrics import classification_report

print(classification_report(Y_true, Y_pred_classes))

In [41]:
def img_plot(arr,index=0):
    plt.title('Test Image')
    plt.imshow(arr[index])

In [42]:
index1 = 72
img_plot(X_test, index1)

In [43]:
input1 = X_test[index1:index1+1]
print('Input Index =',index1)

In [44]:
ann_pred1 = model_nn.predict(input1)[0].argmax()
label1 = y_test[index1].argmax()

In [45]:
print('Predicted Value using  cnn model',cnn_pred1)
print("True Value",label1)