In [None]:
import numpy as np
import glob
import random
import warnings
warnings.filterwarnings(action = 'ignore')
import matplotlib.pyplot as plt

from PIL import Image

random.seed(98)
np.random.seed(98)
import pandas as pd 
import numpy as np 
from tensorflow import keras
from keras.layers import GlobalAveragePooling2D, Dense
from keras.models import Model 
import tensorflow as tf 
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical

In [None]:
breast_img = glob.glob('/kaggle/input/breast-histopathology-images/IDC_regular_ps50_idx5/**/*.png', recursive = True)

for img_name in breast_img[:3]:
    print(img_name)

In [None]:
neg_img = []
pos_img = []

for img in breast_img:
    if img[-5] == '0' :
        neg_img.append(img)
    
    elif img[-5] == '1' :
        pos_img.append(img)
neg_num = len(neg_img)
pos_num = len(pos_img)
        
total_img_num = neg_num + pos_num
        
print('Number of Images in IDC (-): {}' .format(neg_num))
print('Number of Images in IDC (+) : {}' .format(pos_num))
print('Total Number of Images : {}' .format(total_img_num))

In [None]:
from keras.preprocessing import image
from tensorflow.keras.preprocessing.image import load_img, img_to_array

plt.figure(figsize = (15, 15))

some_neg = np.random.randint(0, len(neg_img), 18)
some_pos = np.random.randint(0, len(pos_img), 18)

s = 0
for num in some_neg:
    
        img = load_img((neg_img[num]), target_size=(100, 100))
        img = img_to_array(img)
        
        plt.subplot(6, 6, 2*s+1)
        plt.axis('off')
        plt.title('IDC (-)')
        plt.imshow(img.astype('uint8'))
        s += 1
        
s = 1
for num in some_pos:
    
        img = load_img((pos_img[num]), target_size=(100, 100))
        img = img_to_array(img)
        
        plt.subplot(6, 6, 2*s)
        plt.axis('off')        
        plt.title('IDC (+)')
        plt.imshow(img.astype('uint8'))
        s += 1

In [None]:
from matplotlib.image import imread
import cv2

some_neg_img = random.sample(neg_img, len(pos_img))
some_pos_img = random.sample(pos_img, len(pos_img))

neg_img_arr = []
pos_img_arr = []

for img in some_neg_img:
        
    n_img = cv2.imread(img, cv2.IMREAD_COLOR)
    n_img_size = cv2.resize(n_img, (75, 75), interpolation = cv2.INTER_LINEAR)
    neg_img_arr.append([n_img_size, 0])
    
for img in some_pos_img:
    
    p_img = cv2.imread(img, cv2.IMREAD_COLOR)
    p_img_size = cv2.resize(p_img, (75, 75), interpolation = cv2.INTER_LINEAR)
    pos_img_arr.append([p_img_size, 1])

In [None]:
# create empty lists to hold features and labels 
X = []
y = []

# concatenate negative and positive image arrays and shuffle the resulting array randomly
breast_img_arr = np.concatenate((neg_img_arr, pos_img_arr))
random.shuffle(breast_img_arr)

# iterate over each breast image in the shuffled array
for feature, label in breast_img_arr:
    # add the image feature to the list of features
    X.append(feature)
    # add the label to the list of labels
    y.append(label)
    
# convert the list of features to a numpy array
X = np.array(X)
# convert the list of labels to a numpy array
y = np.array(y)

# print the shape of the feature array
print('X shape : {}' .format(X.shape))

In [None]:
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical

X_train, X_predict, y_train, y_true = train_test_split(X, y, test_size = 0.3, random_state = 7)

rate = 0.5
num = int(X.shape[0] * rate)

X_test = X_train[num:]
X_train = X_train[:num]

y_test = y_train[num:]
y_train = y_train[:num]

y_train = to_categorical(y_train, 2)
y_test = to_categorical(y_test, 2)
y_true = to_categorical(y_true, 2)

print('X_train shape : {}' .format(X_train.shape))
print('X_test shape : {}' .format(X_test.shape))
print('X_predict shape : {}' .format(X_predict.shape))
print('y_train shape : {}' .format(y_train.shape))
print('y_test shape : {}' .format(y_test.shape))
print('y_true shape : {}' .format(y_true.shape))

In [None]:
from keras.layers import Conv2D, MaxPool2D, Dropout, Dense, Flatten
from keras.models import Sequential 
from tensorflow.keras.applications.inception_v3 import InceptionV3

# create the InceptionV3 model
base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(75, 75, 3))

# freeze the convolutional layers in the base model
for layer in base_model.layers:
    layer.trainable = False

# add new classification layers on top of the base model
model = Sequential()
model.add(base_model)
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))

# compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# print the model summary
model.summary()

In [None]:
history = model.fit(X_train, y_train, validation_data = (X_test, y_test), epochs = 15, batch_size = 32)

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Performance Results')
plt.xlabel('epoch')
plt.ylabel('accuracy and loss')
plt.legend(['train accuracy', 'validation accuracy', 'train loss', 'validation loss'], loc='upper left')
plt.show()


In [None]:
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

y_pred = model.predict(X_test)
y_true = np.argmax(y_test, axis=1)

conf_mtx = confusion_matrix(y_true, np.argmax(y_pred, axis=1))

fig, ax = plt.subplots()
im = ax.imshow(conf_mtx, cmap="Blues")

# Add annotations to the plot
# Add annotations to the plot
for i in range(conf_mtx.shape[0]):
    for j in range(conf_mtx.shape[1]):
        color = "black" if conf_mtx[i, j] < conf_mtx.max() else "white"
        ax.text(j, i, str(conf_mtx[i, j]), ha="center", va="center", color=color)


# Add labels to the plot
ax.set_xlabel("Predicted Label")
ax.set_ylabel("True Label")
ax.set_title("Confusion Matrix")

# Add a colorbar to the plot
cbar = ax.figure.colorbar(im, ax=ax)
cbar.ax.set_ylabel("Counts", rotation=-90, va="bottom")

plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc

# Get predicted probabilities for X_test
y_pred_prob = model.predict(X_test)

# Compute ROC curve and area under the curve (AUC)
fpr, tpr, thresholds = roc_curve(y_test.ravel(), y_pred_prob.ravel())
roc_auc = auc(fpr, tpr)

# Plot ROC curve
plt.plot(fpr, tpr, color='darkorange', label='ROC curve (AUC = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', linestyle='--')
plt.xlabel('False Positive Rate (1 - Specificity)')
plt.ylabel('True Positive Rate (Sensitivity)')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Make predictions on test set
y_pred = model.predict(X_test)

# Calculate prediction errors
errors = y_pred - y_test

# Plot distribution of errors
plt.hist(errors, bins=50)
plt.xlabel('Prediction Errors')
plt.ylabel('Count')
plt.title('Distribution of Prediction Errors')
plt.show()