In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)


import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# **Import Libraries**

In [None]:
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.layers import Dense, Dropout, Flatten, BatchNormalization
from tensorflow.keras.activations import relu, softmax
from tensorflow.keras.models import Model
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import Adam

import cv2
import matplotlib.pyplot as plt
from collections import Counter

from sklearn.utils import class_weight
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns

# **Data Preprocessing**

In [None]:
train_dir = "../input/mri-image-based-brain-tumor-classification/MRI Image Dataset for Brain Tumor/Training"
val_dir = "../input/mri-image-based-brain-tumor-classification/MRI Image Dataset for Brain Tumor/Testing"
test_dir = "../input/mri-image-based-brain-tumor-classification/MRI Image Dataset for Brain Tumor/Validation"


img_width, img_height = 224, 224
batch_size=32

def data_loader(color_mode='rgb'):
    
    train_gen = ImageDataGenerator(rescale=1./255, rotation_range=45, 
                                  horizontal_flip=True, vertical_flip=True,
                                  brightness_range=[0.5, 1.5])

    train_data = train_gen.flow_from_directory(
        directory=train_dir,
        target_size=(img_width, img_height),
        color_mode=color_mode,
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=True,
        seed=42

    )
    
    val_gen = ImageDataGenerator(rescale=1./255)
    validation_data = val_gen.flow_from_directory(
        directory=val_dir,
        target_size=(img_width, img_height),
        color_mode=color_mode,
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=True,
        seed=42

    )
    
    test_gen = ImageDataGenerator(rescale=1./255)
    test_data = test_gen.flow_from_directory(
        directory=test_dir,
        target_size=(img_width, img_height),
        color_mode=color_mode,
        batch_size=1,
        class_mode=None,
        shuffle=False
    )
        

    
    return train_data, validation_data, test_data

train_set, val_set, test_set = data_loader()

# **Data Visualization and Exploration**

In [None]:
def data_exploration(dir_path, train_data):
    myimgs = []
    for folder, subfolders, files in os.walk(dir_path):
        for file in files:
            path, filename = os.path.split(folder)
            random_file = np.random.choice(files)
            file_path = os.path.join(path, filename, random_file)
            myimgs.append(file_path)
            break


    plt.figure(figsize=(8, 8))
    for i in range(len(myimgs)):
        plt.subplot(2, 2, i+1)
        imig = myimgs[i]
        title = imig.split('/')
        plt.title(title[5], fontsize=14)
        imig = plt.imread(imig)
        imig = cv2.resize(imig, (224, 224))
        plt.imshow(imig, cmap='gray')
        plt.xticks([])
        plt.yticks([])
        plt.suptitle('\nImage representation per class', fontsize=18, fontweight='bold')
    plt.tight_layout(pad=4)
    plt.show()
    

    plt.figure(figsize=(8, 8))
    for j in range(len(myimgs)):
        plt.subplot(2, 2, j + 1)
        img = myimgs[j]
        title = img.split('/')
        plt.title(title[5], fontsize=14)
        img = cv2.imread(img)
        plt.hist(img.ravel(), 256, [0, 255])
        plt.xticks(fontsize=8)
        plt.yticks(fontsize=8)
        plt.tight_layout(pad=2)
        plt.suptitle('\n\nPixels values distribution', fontsize=18, fontweight='bold')
    plt.show()


    def add_values(classes,counts):
        for i in range(0, len(classes)):
            plt.text(i,counts[i]+25,counts[i], fontsize=12, horizontalalignment='center')
        
    my_classes = train_data.class_indices.keys()
    my_count = Counter(train_data.classes)    
    plt.figure(figsize=(8, 8))
    plt.bar(my_classes, my_count.values(), color=['pink', 'c', 'yellow', 'salmon'])
    add_values(my_classes,my_count)
    plt.ylabel('Counts per class\n', fontsize=13)
    plt.xlabel('\nClasses', fontsize=13)
    plt.title('\n\nBalance check\n\n', fontsize=18, fontweight='bold')
    plt.show()

data_exploration(train_dir, train_set)

# **Balance the Data**

In [None]:
classes = np.unique(train_set.classes)
weights_balance = class_weight.compute_class_weight(class_weight='balanced', classes=classes,y=train_set.classes)
class_weights = dict(zip(classes, weights_balance))
print(class_weights)

# **Callbacks**

In [None]:
def callbacks():
    checkpoint = ModelCheckpoint('best_vgg16.h5', monitor='val_loss', mode='min', verbose=1, save_best_only=True)
    early = EarlyStopping(monitor='val_loss', mode='min', patience=10, verbose=1)

    return checkpoint, early

call_callbacks = callbacks()

# **Model Configuration and Fine-tuning**


In [None]:
def transfer_learning(layer_num=19):
    my_model = VGG16()
    for layers in (my_model.layers)[:layer_num]:
        layers.trainable = False

    flatten = my_model.layers[-4].output
    fc_1 = Dense(4096, activation='relu', kernel_regularizer='l2', name='FC_1')(flatten)
    bn_norm_1 = BatchNormalization(name='BN_1')(fc_1)
    drop_1 = Dropout(rate=0.5, name='Dropout_1')(bn_norm_1)
    fc_2 = Dense(4096, activation='relu', kernel_regularizer='l2', name='FC_2')(drop_1)
    bn_norm_2 = BatchNormalization(name='BN_2')(fc_2)
    drop_2 = Dropout(rate=0.5, name='Dropout_2')(bn_norm_2)
    predictions = Dense(4, activation='softmax', name='Predictions')(drop_2)
    final_model = Model(inputs=my_model.input, outputs=predictions)
    final_model.summary()

    return final_model

training_model = transfer_learning()

# **Training the Model**

In [None]:
opt = Adam(learning_rate=0.00003)
training_model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy']) 
hist = training_model.fit(train_set, validation_data=val_set, epochs=150, callbacks=call_callbacks, class_weight=class_weights, verbose=1)

In [None]:
best_val_loss = min(hist.history['val_loss']) 
index_best_loss = hist.history['val_loss'].index(best_val_loss)
best_val_acc = hist.history['val_accuracy'][index_best_loss]
print(f'The best validation params - loss: {best_val_loss}, accuracy: {best_val_acc*100}%')

# **Loss and Accuracy Plots**

In [None]:
plt.figure(figsize=(8, 8))
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.title('Model loss\n', fontsize=18, fontweight='bold')
plt.ylabel('Loss', fontsize=15)
plt.xlabel('Epochs', fontsize=15)
plt.legend(['Train loss','Validation loss'], fontsize=13, loc='upper right')
plt.show()

In [None]:
plt.figure(figsize=(8,8))
plt.plot(hist.history['accuracy'])
plt.plot(hist.history['val_accuracy'])
plt.title('Model accuracy\n', fontsize=18, fontweight='bold')
plt.ylabel('Accuracy', fontsize=15)
plt.xlabel('Epochs', fontsize=15)
plt.legend(['Train accuracy','Validation accuracy'], fontsize=13, loc='lower right')
plt.show()

# **Prediction**

In [None]:
test_set.reset()

filenames = test_set.filenames
nb_samples = np.ceil(test_set.n//test_set.batch_size)


make_predictions = training_model.predict(test_set,steps=nb_samples)
predicted_classes = np.argmax(make_predictions, axis=1)
labels = dict((v,k) for k,v in test_set.class_indices.items())
predictions = [labels[k] for k in predicted_classes]

test_score = accuracy_score(test_set.classes, predicted_classes) * 100
print('The accuracy score of the test data on the training model is: %0.2f%% ' % (test_score))

# **Evaluate Model Performance**

In [None]:
y_test = test_set.classes

target_names =[]
for k,v in test_set.class_indices.items():
  target_names.append(f'{k}')

test_report = classification_report(y_test, predicted_classes, target_names=target_names)
print(f'Classification report: \n \n {test_report}')

conf_mat = confusion_matrix(y_test, predicted_classes)

def confusion_matrix_visualization(cm, tick_labels, file_name):
    
    plt.figure(figsize=(10, 10))
    
    class_percentages = ['{0:.2%}'.format(value) for value in cm.flatten()/np.sum(cm)]
    class_counts = ['{:.0f}'.format(value) for value in cm.flatten()]
    labels = [f'{v1}\n({v2})' for v1, v2,  in zip(class_percentages, class_counts)]
    labels = np.asarray(labels).reshape(4,4)
    
    ax = sns.heatmap(cm, annot=labels, cmap='Blues', fmt='')
    ax.set_title('\n\nConfusion Matrix\n', fontsize=20, fontweight='bold');
    ax.set_xlabel('\n\nPredicted Labels', fontsize=16)
    ax.set_ylabel('True Labels\n\n ', fontsize=16);
    ax.xaxis.set_ticklabels(tick_labels, fontsize=14)
    ax.yaxis.set_ticklabels(tick_labels, fontsize=14)
    
    plt.savefig(file_name)
    plt.show()


confusion_matrix_visualization(conf_mat, target_names, file_name='confusion_matrix.png')