In [None]:
pip install imutils
pip install efficientnet

In [None]:
import numpy as np 
import cv2
import os
import shutil
import itertools
import imutils
import matplotlib.pyplot as plt

# from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot
from plotly import tools

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.applications.vgg16 import VGG16, preprocess_input

# from tensorflow.keras.applications import EfficientNetB0, preprocess_input

from keras import layers
from keras.models import Model, Sequential
from keras.optimizers import Adam, RMSprop
from keras.callbacks import EarlyStopping

init_notebook_mode(connected=True)
RANDOM_SEED = 123

In [None]:
!mkdir TRAIN TEST VAL TRAIN/YES TRAIN/NO TEST/YES TEST/NO VAL/YES VAL/NO

In [None]:
IMG_PATH = '../input/brain-mri-images-for-brain-tumor-detection/brain_tumor_dataset/'

for CLASS in os.listdir(IMG_PATH):
   
    IMG_NUM = len(os.listdir(IMG_PATH + CLASS))

    for (n, FILE_NAME) in enumerate(os.listdir(IMG_PATH + CLASS)):

        img = IMG_PATH + CLASS + '/' + FILE_NAME

        if n < 0.2*IMG_NUM:
            shutil.copy(img, 'TEST/' + CLASS.upper() + '/' + FILE_NAME)
        elif n < 0.8*IMG_NUM:
            shutil.copy(img, 'TRAIN/'+ CLASS.upper() + '/' + FILE_NAME)
        else:
            shutil.copy(img, 'VAL/'+ CLASS.upper() + '/' + FILE_NAME)
            
            
print('Directories are successfully made!')

In [None]:
import os
import cv2
import numpy as np

def load_data(dir_path, img_size=(100, 100)):
    X = []
    y = []
    labels = {}

    for i, class_name in enumerate(sorted(os.listdir(dir_path))):
        if not class_name.startswith('.'):
            labels[i] = class_name
            class_path = os.path.join(dir_path, class_name)
            for file in sorted(os.listdir(class_path)):
                if not file.startswith('.'):
                    img = cv2.imread(os.path.join(class_path, file))
#                     img = cv2.resize(img, img_size)
                    X.append(img)
                    y.append(i)

    X = np.array(X)
    y = np.array(y)

    print(f'{len(X)} images loaded from {dir_path} directory.')
    return X, y, labels



def plot_confusion_matrix(cm, classes):
    plt.figure(figsize= (5,5))
    plt.imshow(cm, interpolation= 'nearest', cmap= plt.cm.Greens)
    plt.title('Confusion Matrix')
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation= 45)
    plt.yticks(tick_marks, classes)

    print(cm)
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j], horizontalalignment= 'center', color= 'white' if cm[i, j] > thresh else 'black')
    plt.tight_layout()
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')

In [None]:
TRAIN_DIR = 'TRAIN/'
TEST_DIR = 'TEST/'
VAL_DIR = 'VAL/'
IMG_SIZE = (224,224)

# use predefined function to load the image data into workspace
X_train, y_train, labels = load_data(TRAIN_DIR, IMG_SIZE)
X_test, y_test, _ = load_data(TEST_DIR, IMG_SIZE)
X_val, y_val, _ = load_data(VAL_DIR, IMG_SIZE)

In [None]:
import plotly.express as px
import pandas as pd

sets = ['Train Set', 'Validation Set', 'Test Set']
classes = [0, 1]
counts = [[np.sum(y_train == c) for c in classes],
          [np.sum(y_val == c) for c in classes],
          [np.sum(y_test == c) for c in classes]]

df = pd.DataFrame(counts, columns=classes)
df['Set'] = sets
df_melted = df.melt(id_vars='Set', var_name='Class', value_name='Count')

fig = px.bar(df_melted, x='Set', y='Count', color='Class', 
             barmode='group', title='Count of classes in each set',
             color_discrete_sequence=['#33cc33', '#ff3300'])
fig.show()


In [None]:
for index in range(len(labels)):
    imgs = X_train[np.argwhere(y_train == index)][:10]
    j = 5
    i = int(10/j)

    plt.figure(figsize=(15,5))
    c = 1
    for img in imgs:
        plt.subplot(i,j,c)
        plt.imshow(img[0])

        plt.xticks([])
        plt.yticks([])
        c += 1
    plt.suptitle(f'Tumor: {labels[index]}')
    plt.show()

In [None]:
def crop_imgs(set_name, add_pixels_value=0):

    set_new = []
    for img in set_name:
        gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        gray = cv2.GaussianBlur(gray, (5, 5), 0)

        # threshold the image, then perform a series of erosions +
        # dilations to remove any small regions of noise
        thresh = cv2.threshold(gray, 45, 255, cv2.THRESH_BINARY)[1]
        thresh = cv2.erode(thresh, None, iterations=2)
        thresh = cv2.dilate(thresh, None, iterations=2)

        # find contours in thresholded image, then grab the largest one
        cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        cnts = imutils.grab_contours(cnts)
        c = max(cnts, key=cv2.contourArea)

        # find the extreme points
        extLeft = tuple(c[c[:, :, 0].argmin()][0])
        extRight = tuple(c[c[:, :, 0].argmax()][0])
        extTop = tuple(c[c[:, :, 1].argmin()][0])
        extBot = tuple(c[c[:, :, 1].argmax()][0])

        ADD_PIXELS = add_pixels_value
        new_img = img[extTop[1]-ADD_PIXELS:extBot[1]+ADD_PIXELS, extLeft[0]-ADD_PIXELS:extRight[0]+ADD_PIXELS].copy()
        set_new.append(new_img)

    return np.array(set_new)

# apply this for each set
X_train_crop = crop_imgs(set_name=X_train)
X_val_crop = crop_imgs(set_name=X_val)
X_test_crop = crop_imgs(set_name=X_test)

In [None]:
for index in range(len(labels)):
    imgs = X_train_crop[np.argwhere(y_train == index)][:10]
    j = 5
    i = int(10/j)

    plt.figure(figsize=(15,5))
    c = 1
    for img in imgs:
        plt.subplot(i,j,c)
        plt.imshow(img[0])

        plt.xticks([])
        plt.yticks([])
        c += 1
    plt.suptitle(f'Tumor: {labels[index]}')
    plt.show()

In [None]:
def save_new_images(x_set, y_set, folder_name):
    i = 0
    for (img, imclass) in zip(x_set, y_set):
        if imclass == 0:
            cv2.imwrite(folder_name+'NO/'+str(i)+'.jpg', img)
        else:
            cv2.imwrite(folder_name+'YES/'+str(i)+'.jpg', img)
        i += 1

In [None]:
# saving new images to the folder
!mkdir TRAIN_CROP TEST_CROP VAL_CROP TRAIN_CROP/YES TRAIN_CROP/NO TEST_CROP/YES TEST_CROP/NO VAL_CROP/YES VAL_CROP/NO

save_new_images(X_train_crop, y_train, folder_name='TRAIN_CROP/')
save_new_images(X_val_crop, y_val, folder_name='VAL_CROP/')
save_new_images(X_test_crop, y_test, folder_name='TEST_CROP/')

In [None]:
def preprocess_imgs(set_name, img_size):
    set_new = []
    for img in set_name:
        img = cv2.resize(
            img,
            dsize=img_size,
            interpolation=cv2.INTER_CUBIC
        )
        set_new.append(preprocess_input(img))
    return np.array(set_new)

X_train_prep = preprocess_imgs(set_name=X_train_crop, img_size=IMG_SIZE)
X_test_prep = preprocess_imgs(set_name=X_test_crop, img_size=IMG_SIZE)
X_val_prep = preprocess_imgs(set_name=X_val_crop, img_size=IMG_SIZE)

In [None]:
# set the paramters we want to change randomly
demo_datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.05,
    height_shift_range=0.05,
    rescale=1./255,
    shear_range=0.05,
    brightness_range=[0.1, 1.5],
    horizontal_flip=True,
    vertical_flip=True
)

In [None]:
os.makedirs('preview', exist_ok=True)
x = X_train_crop[5].reshape((1,) + X_train_crop[5].shape)

for i, batch in enumerate(demo_datagen.flow(x, batch_size=1, save_to_dir='preview', save_prefix='aug_img', save_format='jpg')):
    if i >= 20:
        break

plt.figure(figsize=(10, 5))
plt.subplot(2, 5, 1)
plt.imshow(X_train_crop[5])
plt.title('Original Image')
plt.axis('off')

for i, img_file in enumerate(os.listdir('preview/')[:9]):
    img = cv2.cvtColor(cv2.imread(os.path.join('preview', img_file)), cv2.COLOR_BGR2RGB)
    plt.subplot(2, 5, i + 2)
    plt.imshow(img)
    plt.axis('off')

plt.suptitle('Augmented Images')
plt.show()

In [None]:
TRAIN_DIR = 'TRAIN_CROP/'
VAL_DIR = 'VAL_CROP/'

train_datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    brightness_range=[0.5, 1.5],
    horizontal_flip=True,
    vertical_flip=True,
    preprocessing_function=preprocess_input
)

test_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input
)


train_generator = train_datagen.flow_from_directory(
    TRAIN_DIR,
    color_mode='rgb',
    target_size=IMG_SIZE,
    batch_size=32,
    class_mode='binary',
    seed=RANDOM_SEED
)


validation_generator = test_datagen.flow_from_directory(
    VAL_DIR,
    color_mode='rgb',
    target_size=IMG_SIZE,
    batch_size=16,
    class_mode='binary',
    seed=RANDOM_SEED
)

# Basic CNN Model

In [None]:
NUM_CLASSES = 1
model = Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(224,224,3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dropout(0.5))
model.add(layers.Dense(NUM_CLASSES, activation='sigmoid'))

# Freeze convolutional layers
for layer in model.layers[:-3]:
    layer.trainable = False

model.compile(
    loss='binary_crossentropy',
    optimizer=RMSprop(lr=1e-4),
    metrics=['accuracy']
)

model.summary()

In [None]:
# Define number of epochs
EPOCHS = 25

# Train the model
history = model.fit_generator(
    train_generator,
    steps_per_epoch=40,
    epochs=EPOCHS,
    validation_data=validation_generator,
    validation_steps=25
)

In [None]:
# plot model performance
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(1, len(history.epoch) + 1)

plt.figure(figsize=(15,5))

plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Train Set')
plt.plot(epochs_range, val_acc, label='Val Set')
plt.legend(loc="best")
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Model Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Train Set')
plt.plot(epochs_range, val_loss, label='Val Set')
plt.legend(loc="best")
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Model Loss')

plt.tight_layout()
plt.show()

In [None]:
predictions = model.predict(X_test_prep)
predictions = [1 if x>0.5 else 0 for x in predictions]

accuracy = accuracy_score(y_test, predictions)
print('Test Accuracy = %.2f' % accuracy)

from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt

target_names = ['yes','no']
cm = confusion_matrix(y_test, predictions)
plot_confusion_matrix(cm= cm, classes= target_names)
print(classification_report(y_test, predictions, target_names= target_names))

# VGG Net

In [None]:
from keras.applications.vgg16 import VGG16, preprocess_input

vgg16_weight_path = '../input/keras-pretrained-models/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'
base_model = VGG16(
    weights=vgg16_weight_path,
    include_top=False, 
    input_shape=IMG_SIZE + (3,)
)

NUM_CLASSES = 1

vgg_model = Sequential()
vgg_model.add(base_model)
vgg_model.add(layers.Flatten())
vgg_model.add(layers.Dropout(0.5))
vgg_model.add(layers.Dense(NUM_CLASSES, activation='sigmoid'))

vgg_model.layers[0].trainable = False

vgg_model.compile(
    loss='binary_crossentropy',
    optimizer=RMSprop(lr=1e-4),
    metrics=['accuracy']
)

vgg_model.summary()

In [None]:
EPOCHS = 25

history = vgg_model.fit_generator(
    train_generator,
    steps_per_epoch=50,
    epochs=EPOCHS,
    validation_data=validation_generator,
    validation_steps=25
)

In [None]:
# plot model performance
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(1, len(history.epoch) + 1)

plt.figure(figsize=(15,5))

plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Train Set')
plt.plot(epochs_range, val_acc, label='Val Set')
plt.legend(loc="best")
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Model Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Train Set')
plt.plot(epochs_range, val_loss, label='Val Set')
plt.legend(loc="best")
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Model Loss')

plt.tight_layout()
plt.show()

In [None]:
predictions = vgg_model.predict(X_test_prep)
predictions = [1 if x>0.5 else 0 for x in predictions]

accuracy = accuracy_score(y_test, predictions)
print('Test Accuracy = %.2f' % accuracy)

from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt

target_names = ['yes','no']
cm = confusion_matrix(y_test, predictions)
plot_confusion_matrix(cm= cm, classes= target_names)
print(classification_report(y_test, predictions, target_names= target_names))

In [None]:
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.models import Model
import matplotlib.pyplot as plt
from numpy import expand_dims


# load the model
model = VGG16()
# redefine model to output right after the first hidden layer
ixs = [2, 5, 9, 13, 17]
outputs = [model.layers[i].output for i in ixs]
model = Model(inputs=model.inputs, outputs=outputs)
# load the image with the required shape
# convert the image to an array
img = img_to_array(X_val_prep[43])
# expand dimensions so that it represents a single 'sample'
img = expand_dims(img, axis=0)
# prepare the image (e.g. scale pixel values for the vgg)
img = preprocess_input(img)
# get feature map for first hidden layer
feature_maps = model.predict(img)
# plot the output from each block
square = 8
for fmap in feature_maps:
    # plot all 64 maps in an 8x8 squares
    ix = 1
    for _ in range(square):
        plt.figure(figsize=(64,64))
        for _ in range(square):
           

            # specify subplot and turn of axis
            ax = plt.subplot(square, square, ix)
            ax.set_xticks([])
            ax.set_yticks([])

            # plot filter channel in grayscale
            plt.imshow(fmap[0, :, :, ix-1], cmap='viridis')
            ix += 1
    # show the figure

        
    plt.show()

Feature maps from earlier layers tend to capture low-level features like edges, textures, or simple shapes, while feature maps from deeper layers capture higher-level features or complex patterns relevant to the task the network was trained on.



# ResNet

In [None]:
# ResNet
from keras.applications.resnet50 import ResNet50, preprocess_input

resnet_weight_path = '../input/keras-pretrained-models/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
resnet_base_model = ResNet50(
    weights=resnet_weight_path,
    include_top=False, 
    input_shape=IMG_SIZE + (3,)
)

resnet_model = Sequential()
resnet_model.add(resnet_base_model)
resnet_model.add(layers.Flatten())
resnet_model.add(layers.Dropout(0.5))
resnet_model.add(layers.Dense(NUM_CLASSES, activation='sigmoid'))

resnet_model.layers[0].trainable = False

resnet_model.compile(
    loss='binary_crossentropy',
    optimizer=RMSprop(lr=1e-4),
    metrics=['accuracy']
)

resnet_model.summary()

In [None]:
EPOCHS = 25
es = EarlyStopping(
    monitor='val_acc', 
    mode='max',
    patience=6
)

history = resnet_model.fit_generator(
    train_generator,
    steps_per_epoch=40,
    epochs=EPOCHS,
    validation_data=validation_generator,
    validation_steps=25,
#     callbacks=[es]
)

In [None]:
# plot model performance
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(1, len(history.epoch) + 1)

plt.figure(figsize=(15,5))

plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Train Set')
plt.plot(epochs_range, val_acc, label='Val Set')
plt.legend(loc="best")
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Model Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Train Set')
plt.plot(epochs_range, val_loss, label='Val Set')
plt.legend(loc="best")
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Model Loss')

plt.tight_layout()
plt.show()

In [None]:
predictions = resnet_model.predict(X_test_prep)
predictions = [1 if x>0.5 else 0 for x in predictions]

accuracy = accuracy_score(y_test, predictions)
print('Test Accuracy = %.2f' % accuracy)

from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt

target_names = ['yes','no']
cm = confusion_matrix(y_test, predictions)
plot_confusion_matrix(cm= cm, classes= target_names)
print(classification_report(y_test, predictions, target_names= target_names))

# Inception Net

In [None]:
from keras.applications.inception_v3 import InceptionV3, preprocess_input

# InceptionNet
inception_weight_path = '../input/keras-pretrained-models/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5'
inception_base_model = InceptionV3(
    weights=inception_weight_path,
    include_top=False, 
    input_shape=IMG_SIZE + (3,)
)

inception_model = Sequential()
inception_model.add(inception_base_model)
inception_model.add(layers.GlobalAveragePooling2D())
inception_model.add(layers.Dropout(0.5))
inception_model.add(layers.Dense(NUM_CLASSES, activation='sigmoid'))

inception_model.layers[0].trainable = False

inception_model.compile(
    loss='binary_crossentropy',
    optimizer=RMSprop(lr=1e-4),
    metrics=['accuracy']
)

inception_model.summary()

In [None]:
EPOCHS = 25

history = inception_model.fit_generator(
    train_generator,
    steps_per_epoch=40,
    epochs=EPOCHS,
    validation_data=validation_generator,
    validation_steps=25
)

In [None]:
# plot model performance
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(1, len(history.epoch) + 1)

plt.figure(figsize=(15,5))

plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Train Set')
plt.plot(epochs_range, val_acc, label='Val Set')
plt.legend(loc="best")
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Model Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Train Set')
plt.plot(epochs_range, val_loss, label='Val Set')
plt.legend(loc="best")
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Model Loss')

plt.tight_layout()
plt.show()

In [None]:
predictions = inception_model.predict(X_test_prep)
predictions = [1 if x>0.5 else 0 for x in predictions]

accuracy = accuracy_score(y_test, predictions)
print('Test Accuracy = %.2f' % accuracy)

from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt

target_names = ['yes','no']
cm = confusion_matrix(y_test, predictions)
plot_confusion_matrix(cm= cm, classes= target_names)
print(classification_report(y_test, predictions, target_names= target_names))

# EfficientNet

In [None]:
import tensorflow as tf
from efficientnet.tfkeras import EfficientNetB3, preprocess_input
img_size = (224, 224)
channels = 3
img_shape = (img_size[0], img_size[1], channels)

base_model = EfficientNetB3(include_top=False, weights="imagenet", input_shape=img_shape, pooling='max')

for layer in base_model.layers:
    layer.trainable = False

eff_model = tf.keras.Sequential([
    base_model,
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(2, activation='softmax')
])

optimizer = tf.keras.optimizers.Adam()
# model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
eff_model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

eff_model.summary()


In [None]:
history = eff_model.fit_generator(
    train_generator,
    steps_per_epoch=40,
    epochs=25,
    validation_data=validation_generator,
    verbose=1,
    validation_steps=25
)

In [None]:
# plot model performance
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(1, len(history.epoch) + 1)

plt.figure(figsize=(15,5))

plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Train Set')
plt.plot(epochs_range, val_acc, label='Val Set')
plt.legend(loc="best")
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Model Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Train Set')
plt.plot(epochs_range, val_loss, label='Val Set')
plt.legend(loc="best")
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Model Loss')

plt.tight_layout()
plt.show()

In [None]:
predictions = eff_model.predict(X_test_prep)
predictions = [1 if x[1]>0.5 else 0 for x in predictions]

accuracy = accuracy_score(y_test, predictions)
print('Test Accuracy = %.2f' % accuracy)

from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt

target_names = ['yes','no']
cm = confusion_matrix(y_test, predictions)
plot_confusion_matrix(cm= cm, classes= target_names)
print(classification_report(y_test, predictions, target_names= target_names))