In [1]:
import glob
import cv2
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, GlobalAveragePooling2D, Flatten, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNet
from sklearn.metrics import confusion_matrix
from mlxtend.plotting import plot_confusion_matrix

%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

In [2]:
# hyperparameter setting
image_size = 224
batch_size = 32
epochs = 10

In [3]:
# input files
base_dir="../input/brain-tumor-classification-mri"

glioma_tumor = glob.glob(base_dir + '/**/glioma_tumor/*')
meningioma_tumor = glob.glob(base_dir + '/**/meningioma_tumor/*')
pituitary_tumor = glob.glob(base_dir + '/**/pituitary_tumor/*')
normal = glob.glob(base_dir + '/**/no_tumor/*')

print("glioma_tumor : {}".format(len(glioma_tumor)))
print("meningioma_tumor : {}".format(len(meningioma_tumor)))
print("pituitary_tumor : {}".format(len(pituitary_tumor)))
print("normal : {}".format(len(normal)))

In [4]:
def add_labels(data, label):
    return list(map(lambda x: (x, label), data))

glioma_tumor = add_labels(glioma_tumor, 'glioma')
meningioma_tumor = add_labels(meningioma_tumor, 'meningioma')
pituitary_tumor = add_labels(pituitary_tumor, 'pituitary')
normal = add_labels(normal, 'normal')

In [5]:
def split_data(data):
    train_data, test_data = train_test_split(data, test_size=0.15)
    train_data, val_data = train_test_split(train_data, test_size=0.15)
    return [train_data, val_data, test_data]

glioma_data = split_data(glioma_tumor)
meningioma_data = split_data(meningioma_tumor)
pituitary_data = split_data(pituitary_tumor)
normal_data = split_data(normal)

In [6]:
train_data = []
val_data = []
test_data = []

def collect(data):
    global train_data, val_data, test_data
    for sample in data[0]:
        train_data.append(sample)
    
    for sample in data[1]:
        val_data.append(sample)
        
    for sample in data[2]:
        test_data.append(sample)

collect(glioma_data)
collect(meningioma_data)
collect(pituitary_data)
collect(normal_data)
    
train_df = pd.DataFrame(train_data, columns=['image', 'label'])
val_df = pd.DataFrame(val_data, columns=['image', 'label'])
test_df = pd.DataFrame(test_data, columns=['image', 'label'])

In [7]:
train_gen = ImageDataGenerator(rescale=1./255,
                               rotation_range=10,
                               zoom_range = 0.1,
                               shear_range=0.1,
                               width_shift_range=0.1,
                               height_shift_range=0.1,
                               horizontal_flip=True)

train_set = train_gen.flow_from_dataframe(dataframe=train_df,
                                          x_col='image',
                                          y_col='label',
                                          target_size=(image_size, image_size),
                                          batch_size=batch_size,
                                          shuffle=True,
                                          class_mode='categorical',
                                          color_mode='grayscale')

# rescaling (validation)
val_rescaled = ImageDataGenerator(rescale = 1./255)

val_set = val_rescaled.flow_from_dataframe(dataframe=val_df,
                                           x_col='image',
                                           y_col='label',
                                           target_size=(image_size, image_size),
                                           batch_size=batch_size,
                                           shuffle=True,
                                           class_mode='categorical',
                                           color_mode='grayscale')

# rescaling (test)
test_rescaled = ImageDataGenerator(rescale = 1./255)

test_set = test_rescaled.flow_from_dataframe(dataframe=test_df,
                                             x_col='image',
                                             y_col='label',
                                             target_size=(image_size, image_size),
                                             batch_size=1,
                                             shuffle=False,
                                             class_mode='categorical',
                                             color_mode='grayscale')

In [8]:
# build the model

# pretrained_model = MobileNet(input_shape=(image_size, image_size, 3),
#                          include_top=False,
#                          weights='imagenet')

model = Sequential([
    Conv2D(filters = 32, kernel_size = (5,5), padding = 'same', 
           activation ='relu', input_shape = (image_size, image_size, 1)),
    MaxPool2D(pool_size=(2,2)),
    
    Conv2D(filters = 64, kernel_size = (3,3), padding = 'same', activation ='relu'),
    MaxPool2D(pool_size=(2,2)),
    Dropout(0.2),
    
    Conv2D(filters = 128, kernel_size = (3,3), padding = 'same', activation ='relu'),
    MaxPool2D(pool_size=(2,2)),
    Dropout(0.2),
    
    Conv2D(filters = 256, kernel_size = (3,3), padding = 'same', activation ='relu'),
    MaxPool2D(pool_size=(2,2)),
    Dropout(0.2),
    
    Flatten(),
    Dense(1024, activation='relu'),
    Dropout(0.4),
    Dense(4, activation='softmax')
])

In [9]:
# model configuration
model.compile(optimizer=Adam(lr=0.001, beta_1=0.9, beta_2=0.999),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

es = EarlyStopping(monitor='val_loss', 
                   patience=8, 
                   restore_best_weights=True)

model.summary()

In [10]:
history = model.fit_generator(train_set,
                              steps_per_epoch=len(train_set),
                              epochs=epochs,
                              validation_data=val_set,
                              validation_steps=len(val_set),
                              callbacks=[es]) 

In [12]:
predictions = model.predict_classes(test_set)

cm = confusion_matrix(test_set.classes, predictions)

plot_confusion_matrix(conf_mat = cm, 
                      figsize=(8,7),
                      class_names = ['Glioma','Meningioma','Pituitary', 'Normal'],
                      show_normed=True)

In [13]:
test_accuracy = (cm[0, 0] + cm[1, 1] + cm[2,2] + cm[3,3]) / len(test_set)
print("Test Accuracy: {}%".format(round(test_accuracy * 100, 3)))