In [38]:
import os
import shutil
import glob
import json
import pickle
import time
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import copy
import cv2
import skimage.io as io
from skimage.transform import resize
import scipy
from skimage.filters import sobel
from skimage.feature import graycomatrix , graycoprops
from skimage.measure import shannon_entropy
import lightgbm as lgbm
from imblearn.over_sampling import SMOTE
import tensorflow
from PIL import Image

from sklearn.preprocessing import MinMaxScaler, Normalizer, StandardScaler, RobustScaler, LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold, StratifiedKFold, cross_val_score, cross_validate, RandomizedSearchCV, GridSearchCV
from sklearn.utils import shuffle
from sklearn.metrics import (confusion_matrix, classification_report, accuracy_score, f1_score, recall_score, precision_score,
                             auc, roc_curve, roc_auc_score, cohen_kappa_score, plot_confusion_matrix, plot_roc_curve,
                             plot_precision_recall_curve, precision_recall_fscore_support, precision_recall_curve)
from sklearn.utils import compute_class_weight
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC 
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier

import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.preprocessing.image import load_img, img_to_array, array_to_img, ImageDataGenerator, save_img
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.models import Sequential, Model, load_model, model_from_json
from tensorflow.keras.layers import (Input, InputLayer, Conv2D, MaxPooling2D, Dense, Concatenate, BatchNormalization, Dropout, Activation, GlobalAveragePooling2D, InputSpec, Flatten, Concatenate)
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.applications.vgg19 import VGG19, preprocess_input
from tensorflow.keras.applications.resnet_v2 import ResNet101V2, preprocess_input
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam, SGD, RMSprop, Adadelta, Adagrad
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.utils import to_categorical, plot_model
from tensorflow.keras.metrics import Accuracy, AUC, Precision, Recall
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.applications import InceptionResNetV2
import warnings
warnings.filterwarnings("ignore")

In [39]:
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2

In [None]:
!pip install natsort
from natsort import natsorted

In [41]:
yes_path='../input/brain-tumor-detection/yes'
no_path='../input/brain-tumor-detection/no'
merged_train='../input/yesno/yes-no'
train_set_path='../input/dataclasses'

test_set_path='../input/testyesno/test/test'

In [None]:
classes = os.listdir(train_set_path)
class2label = {}

for i in range(len(classes)):
    class2label[classes[i]] = i
    
label2class = {v:k for k, v in class2label.items()}

print(class2label)
print("--"*45)
print(label2class)

In [None]:
classes = os.listdir(test_set_path)
class2label = {}

for i in range(len(classes)):
    class2label[classes[i]] = i
    
label2class = {v:k for k, v in class2label.items()}

print(class2label)
print("--"*45)
print(label2class)

In [44]:
no_list=os.listdir(no_path)
yes_list=os.listdir(yes_path)
training_set=os.listdir(merged_train)

test_set=os.listdir(test_set_path)

In [None]:
print(len(training_set))
print(len(test_set))

In [46]:
train_set=natsorted(training_set)
test_set=natsorted(test_set)

In [47]:
def show_img(number):
    
    fig=plt.figure(figsize=(15,15))
    
    for i in range(number):
        image_num=random.randint(0,3000)
        
        if image_num<1500:
            #plt.figure(figsize=(3,3))
            a=fig.add_subplot(1,number,i+1)
            plt.imshow(plt.imread(os.path.join(no_path,train_set[image_num])))
            plt.xlabel('No BT')
        else:
            #plt.figure(figsize=(3,3))
            a=fig.add_subplot(1,number,i+1)
            plt.imshow(plt.imread(os.path.join(yes_path, train_set[image_num])))
            plt.xlabel('Yes BT')
        plt.tight_layout()

In [None]:
show_img(8)

In [None]:
train_distribution = {}
print("TRAIN-SET DISTRIBUTION\n")
for cat in classes:
    path = os.path.join(train_set_path, cat, "*")
    data = glob.glob(path)
    print(f"Number of {cat} Images: {len(data)}")
    train_distribution[cat] = len(data)
    print('--'*20)   

test_distribution = {}
print("\nVAL-SET DISTRIBUTION\n")
for cat in classes:
    path = os.path.join(test_set_path, cat, "*")
    data = glob.glob(path)
    print(f"Number of {cat} Images: {len(data)}")
    test_distribution[cat] = len(data)
    print('--'*20)

In [50]:
categories = []
image_name = []
image_id = []
image_format = []
labels = []
img_path = []
img_height = []
img_width = []

for cat in classes:
    path = os.path.join(train_set_path, cat, "*")
    data = glob.glob(path)
    for i in range(len(data)):
        h, w, c = cv2.imread(data[i]).shape
        cls , img = data[i].split('/')[-2:]
        img_id , img_format = img.split('.')
        img_height.append(h)
        img_width.append(w)
        categories.append(cls)
        image_name.append(img)
        image_id.append(img_id)
        image_format.append(img_format) 
        labels.append(class2label[cls])
        img_path.append(data[i])

In [51]:
assert len(categories) == len(image_name) == len(image_id) == len(image_id) == len(image_format) == len(labels) == len(img_path) == len(img_height) == len(img_width)

In [None]:
data = pd.DataFrame(list(zip(image_id, image_name, image_format, categories, labels, img_path, img_height, img_width)))
data.columns = ['image_id' , 'image_name' , 'format' , 'Class' , 'label' , 'image_path' , 'height' , 'width']
data.sample(10 , random_state = 42)

In [None]:
data.shape

In [None]:
data['Class'].value_counts()

In [None]:
if data.isnull().sum().sum() == 0:
    print('No Null Values found in whole dataset')
else:
    print(f'{data.isnull().sum().sum()} null values found in dataset')

# checking for any duplicate values
if data.duplicated().sum() == 0:
    print('No Duplicate Values found in dataset')
else:
    print(f'{data.duplicated().sum()} Duplicate values found in dataset')

In [None]:
sample_data = data.sample(12 , random_state = 42)
sample_imgs = sample_data['image_path'].to_list()

plt.figure(figsize = (12,12))
for i in range(9):
    plt.subplot(3 , 3 , i+1)
    img = cv2.imread(sample_imgs[i])
    plt.title(sample_imgs[i].split('/')[-2], fontsize = 12)
    plt.axis('off')
    plt.imshow(img)
    
plt.savefig("brainPlots.png")

In [None]:
print('Shape of the image : {}'.format(img.shape))
print('Image Height: {}'.format(img.shape[0]))
print('Image Width: {}'.format(img.shape[1]))
print('Image Dimensions/Channels: {}'.format(img.ndim))
print("=="*20)
print('Image size: {}'.format(img.size))
print('Image Data Type: {}'.format(img.dtype))
print("=="*20)
print('Maximum RGB value in this image {}'.format(img.max()))
print('Minimum RGB value in this image {}'.format(img.min()))

In [58]:
mean_val = []
std_val = []
max_val = []
min_val = []
med_val = []


for i in range(data.shape[0]):
    img_arr = img_to_array(load_img(data['image_path'][i]))
        
    mean_val.append(img_arr.mean())
    std_val.append(img_arr.std())
    max_val.append(img_arr.max())
    min_val.append(img_arr.min())
    med_val.append(np.median(img_arr))
    
data['mean_pixel'] = mean_val
data['std_dev'] = std_val
data['max_pixel'] = max_val
data['min_pixel'] = min_val
data['median'] = med_val

data['mean_variation'] = data['mean_pixel'].mean() - data['mean_pixel']

In [None]:
plt.figure(figsize = (8,5))
sns.violinplot(y = 'mean_pixel' , data = data , x = 'Class')
sns.despine()
plt.xticks(rotation=340)
plt.title('Mean value distribution for all Classes')
plt.savefig("meanValue.png")
plt.show()

In [None]:
plt.figure(figsize=(20,8))
sns.set(style="ticks", font_scale = 1)
ax = sns.scatterplot(data=data, x="mean_pixel", y=data['std_dev'], hue = 'Class',alpha=0.8);
sns.despine(top=True, right=True, left=False, bottom=False)
plt.xticks(rotation=0,fontsize = 12)
ax.set_xlabel('Image Channel Colour Mean',fontsize = 14,weight = 'bold')
ax.set_ylabel('Image Channel Colour Standard Deviation',fontsize = 14,weight = 'bold')
plt.title('Mean and Standard Deviation of Image Samples', fontsize = 16,weight = 'bold');

In [61]:
BATCH_SIZE = 64
NUM_EPOCHS = 75
IMG_SIZE = 256
INPUT_SHAPE = (256, 256, 3)
TARGET_SIZE = (256, 256)

In [62]:
train_datagen = ImageDataGenerator(rescale=1./255,
                                    featurewise_std_normalization=True,
                                    zca_epsilon=1e-06,
                                    rotation_range=16,
                                    width_shift_range=0.2,
                                    height_shift_range=0.2,
                                    brightness_range=[0.5,1.5],
                                    shear_range=0.2,
                                    horizontal_flip=True,
                                    vertical_flip=True)

test_datagen  = ImageDataGenerator(rescale=1./255,
                                    featurewise_std_normalization=True,
                                    zca_epsilon=1e-06,
                                    rotation_range=16,
                                    width_shift_range=0.2,
                                    height_shift_range=0.2,
                                    brightness_range=[0.5,1.5],
                                    shear_range=0.2,
                                    horizontal_flip=True,
                                    vertical_flip=True)   
    

In [None]:
train_dataset  = train_datagen.flow_from_directory(directory = train_set_path,
                                                   target_size = TARGET_SIZE,
                                                   color_mode = "rgb",
                                                   class_mode = "binary",
                                                   batch_size = BATCH_SIZE,
                                                   shuffle = True)    

test_dataset = test_datagen.flow_from_directory(directory = test_set_path,
                                               target_size = TARGET_SIZE,
                                               color_mode = "rgb",
                                               class_mode = "binary",
                                               batch_size = BATCH_SIZE,
                                               shuffle = False)

In [82]:
shutil. rmtree("./temp/")
os.mkdir("./temp/")
checkpoint_filepath = "./temp/"

In [83]:
optimizer = Adam(learning_rate=0.0001, beta_1=0.95, beta_2=0.999, epsilon=1e-09)
metrics = ["accuracy", Recall(), Precision(), AUC()]

early_stop = EarlyStopping(monitor='val_loss', patience=5, mode='min', verbose=1)
checkpoint = ModelCheckpoint(filepath=checkpoint_filepath, monitor="val_loss", verbose=1, save_best_only=True, mode="min")
lr_reduction = ReduceLROnPlateau(monitor="val_loss", factor=0.1, patience=5, verbose=1, mode="min")
callbacks_list = [early_stop, lr_reduction, checkpoint]

In [None]:
base_model = InceptionResNetV2(include_top=False, weights='imagenet', input_shape=INPUT_SHAPE)
x = base_model.output

x = Dense(500,  kernel_initializer='he_uniform')(x)
x = Activation('relu')(x)
x = Dropout(0.25)(x)
x = GlobalAveragePooling2D()(x)

output = Dense(1, activation='sigmoid')(x)
model = Model(inputs=base_model.input, outputs=output, name="DenseNet_Network")

model.summary()

In [None]:
plot_model(model, 'finalModel.png', show_shapes=True)

In [86]:
model.compile(optimizer = optimizer, loss = "binary_crossentropy", metrics = metrics)

In [None]:
train_dataset

In [None]:
test_dataset

In [None]:
hist = model.fit(x=train_dataset, epochs=NUM_EPOCHS, callbacks=[callbacks_list], batch_size=BATCH_SIZE, steps_per_epoch=len(train_dataset),
                 validation_data=test_dataset, validation_steps=len(test_dataset))

In [None]:
model.save('MobileNet_Model.h5')

model_json = model.to_json()
with open("Model.json", "w") as json_file:
    json_file.write(model_json)

model.save_weights("Model_Weights.h5")
print("Saved model to disk")

In [None]:
metricsEval = list(hist.history.keys())
print(metricsEval , '\n')

historyDF = pd.DataFrame(hist.history)
historyDF.head()

In [None]:
fig, axarr = plt.subplots(1,3, figsize=(21, 8), sharex=True)

sns.set(style="ticks", font_scale = 1)
sns.despine(top=True, right=True, left=False, bottom=False)

ax = sns.lineplot(x = historyDF.index, y = hist.history[metricsEval[0]], ax=axarr[0], label="Training");
ax = sns.lineplot(x = historyDF.index, y = hist.history[metricsEval[5]], ax=axarr[0], label="Validation");
ax.set_ylabel('Loss')

ax = sns.lineplot(x = historyDF.index, y = hist.history[metricsEval[1]], ax=axarr[1], label="Training");
ax = sns.lineplot(x = historyDF.index, y = hist.history[metricsEval[6]], ax=axarr[1], label="Validation");
ax.set_ylabel('Accuracy')

ax = sns.lineplot(x = historyDF.index, y = hist.history[metricsEval[10]], ax=axarr[2]);
ax.set_ylabel('Learning Rate')

axarr[0].set_title('Training and Validation Loss', fontsize=17)
axarr[1].set_title('Training and Validation Accuracy', fontsize=17)
axarr[2].set_title('LR during training', fontsize=17)

for ax in axarr:
    ax.set_xlabel('Epochs')

plt.suptitle('Training and Performance plots', fontsize=19, weight='bold');
fig.tight_layout(pad=3.0)
plt.show()

In [None]:
loss , acc , recall , precision , auc = model.evaluate(train_dataset)
print("\n---------------- Evaluation on Train DataSet ----------------\n")
print(f'Loss on Train set: {loss:.4f}')
print(f'Accuracy on Train set: {acc*100:.2f}%')
print(f'Recall on Train set: {recall:.2f}')
print(f'Precision on Train set: {precision:.2f}')
print(f'AUC on Train set: {auc:.2f}')

In [None]:
loss , acc , recall , precision , auc = model.evaluate(test_dataset)
print("\n---------------- Evaluation on Test DataSet ----------------\n")
print(f'Loss on Test set: {loss:.4f}')
print(f'Accuracy on Test set: {acc*100:.2f}%')
print(f'Recall on Test set: {recall:.2f}')
print(f'Precision on Test set: {precision:.2f}')
print(f'AUC on Test set: {auc:.2f}')