In [None]:
!pip install np_utils

In [None]:
import pandas as pd 
import numpy as np 
import seaborn as sns 
import matplotlib.pyplot as plt

from warnings import filterwarnings 
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report, roc_auc_score, roc_curve
from tensorflow.keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization, MaxPooling2D
from keras import models

import tensorflow as tf
import os
import os.path
from pathlib import Path
import cv2
from tensorflow.keras.preprocessing.image import ImageDataGenerator 
from tensorflow.keras.utils import to_categorical 
from sklearn.model_selection import train_test_split
from keras import regularizers
from keras.optimizers import RMSprop, Adam

import glob
from PIL import Image

# Ignoring unnecessary warnings
filterwarnings("ignore", category=DeprecationWarning)
filterwarnings("ignore", category=FutureWarning)
filterwarnings("ignore", category=UserWarning)

In [None]:
# Path process
## Train

No_Data_Path = Path("../input/brain-tumor-detection/no")
Yes_Data_Path = Path("../input/brain-tumor-detection/yes")

In [None]:
No_JPG_Path = list(No_Data_Path.glob(r"*.jpg"))
Yes_JPG_Path = list(Yes_Data_Path.glob(r"*.jpg"))

In [None]:
print(No_JPG_Path[0:5])
print("_____"*20)
print(Yes_JPG_Path[0:5])

In [None]:
Yes_No_List = []

for No_JPG in No_JPG_Path:
    Yes_No_List.append(No_JPG)

for Yes_JPG in Yes_JPG_Path:
    Yes_No_List.append(Yes_JPG)

In [None]:
print(Yes_No_List[1])

In [None]:
JPG_Labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1],Yes_No_List))

In [None]:
len(JPG_Labels)

In [None]:
print(JPG_Labels[2995:2999])

In [None]:
print("NO COUNTING: ", JPG_Labels.count("no"))
print("YES_COUNTING: ", JPG_Labels.count("yes"))

In [None]:
JPG_Path_Series = pd.Series(Yes_No_List, name="JPG").astype(str)
JPG_Category_Series = pd.Series(JPG_Labels, name="TUMOR_CATEGORY")

In [None]:
Main_Train_Data = pd.concat([JPG_Path_Series, JPG_Category_Series], axis=1)

In [None]:
print(Main_Train_Data.head(-1))

In [None]:
# Test 

Prediction_Path = Path("../input/brain-tumor-detection/pred")
Test_JPG_Path = list(Prediction_Path.glob(r"*.jpg"))
print(Test_JPG_Path[0:5])

In [None]:
Test_JPG_Labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1],Test_JPG_Path))
print(Test_JPG_Labels[0:5])

In [None]:
Test_JPG_Path_Series = pd.Series(Test_JPG_Path, name="JPG").astype(str)
Test_JPG_Labels_Series = pd.Series(Test_JPG_Labels, name="TUMOR_CATEGORY")

In [None]:
Test_Data = pd.concat([Test_JPG_Path_Series, Test_JPG_Labels_Series], axis=1)

In [None]:
print(Test_Data.head())

In [None]:
# Shuffling 

Main_Train_Data = Main_Train_Data.sample(frac=1).reset_index(drop=True)
print(Main_Train_Data.head(-1))

In [None]:
# visualization
image_num = 2997
figure = plt.figure(figsize=(5,5))
plt.imshow(plt.imread(Main_Train_Data["JPG"][image_num]))
plt.title(Main_Train_Data["TUMOR_CATEGORY"][image_num])

In [None]:
fig, axes = plt.subplots(nrows=5, ncols=5, figsize=(10, 10),
                       subplot_kw = {'xticks': [], 'yticks': []})


for i, ax in enumerate(axes.flat):
    ax.imshow(plt.imread(Main_Train_Data["JPG"][i]))
    ax.set_title(Main_Train_Data["TUMOR_CATEGORY"][i])
    
plt.tight_layout()
plt.show()

In [None]:
# Model Training Data

train_data, test_data = train_test_split(Main_Train_Data, train_size=0.9, random_state=42)

In [None]:
print(train_data.shape, test_data.shape)

In [None]:
# Image Data Generator Without Diversification

Generator_Basic = ImageDataGenerator(rescale=1./255,
                                    validation_split=0.1)

In [None]:
Train_Set = Generator_Basic.flow_from_dataframe(dataframe=train_data, 
                                               x_col ="JPG",
                                               y_col = "TUMOR_CATEGORY",
                                               color_mode = "grayscale",
                                               class_mode = "categorical",
                                               subset = "training",
                                               batch_size = 20,
                                               target_size = (200, 200))

In [None]:
Validation_Set = Generator_Basic.flow_from_dataframe(dataframe=train_data,
                                              x_col = "JPG",
                                              y_col = "TUMOR_CATEGORY", 
                                              color_mode = "grayscale",
                                              class_mode = "categorical",
                                              subset = "validation",                            
                                              batch_size = 20,
                                              target_size = (200, 200)) 

In [None]:
Test_Set = Generator_Basic.flow_from_dataframe(dataframe=test_data,
                                              x_col = "JPG",
                                              y_col = "TUMOR_CATEGORY", 
                                              color_mode = "grayscale",
                                              class_mode = "categorical",
                                              batch_size = 20,
                                              target_size = (200, 200)) 

In [None]:
# checking 
for data_batch, label_batch in Train_Set:
    print("DATA SHAPE :", data_batch.shape)
    print("LABEL SHAPE :", label_batch.shape)
    break

In [None]:
for data_batch, label_batch in Validation_Set:
    print("DATA SHAPE :", data_batch.shape)
    print("LABEL SHAPE :", label_batch.shape)
    break
    
for data_batch, label_batch in Test_Set:
    print("DATA SHAPE :", data_batch.shape)
    print("LABEL SHAPE :", label_batch.shape)
    break

In [None]:
print(Train_Set.class_indices)
print(Train_Set.classes[0:5])
print(Train_Set.image_shape)

In [None]:
print(Validation_Set.class_indices)
print(Validation_Set.classes[0:5])
print(Validation_Set.image_shape)

In [None]:
print(Test_Set.class_indices)
print(Test_Set.classes[0:5])
print(Test_Set.image_shape)

In [None]:
# CNN Model for non-diversification


Model = Sequential()
Model.add(Conv2D(32, (5,5), activation="relu", input_shape=(200, 200, 1)))
Model.add(MaxPool2D((2,2)))
Model.add(Dropout(0.2))
#
Model.add(Conv2D(64, (3,3), activation="relu"))
Model.add(MaxPool2D((2,2)))
Model.add(Dropout(0.2))
#
Model.add(Conv2D(128, (3,3), activation="relu"))
Model.add(MaxPool2D((2,2)))
Model.add(Dropout(0.2))
#
Model.add(Conv2D(256, (3,3), activation="relu"))
Model.add(MaxPool2D((2,2)))
Model.add(Dropout(0.2))
# 
Model.add(Flatten())
Model.add(Dropout(0.5))
Model.add(Dense(512, activation="relu"))
Model.add(Dense(2, activation="softmax"))

In [None]:
Model.summary()

In [None]:
Model.compile(optimizer=RMSprop(learning_rate=0.001),loss="categorical_crossentropy", metrics=["accuracy"])

In [None]:
# Model Fitting
ANN_Model = Model.fit(Train_Set, validation_data=Validation_Set, epochs=30, steps_per_epoch=120)

In [None]:
Model.summary()

In [None]:
# Checking model with Graphs
HistoryDict = ANN_Model.history

val_losses = HistoryDict["val_loss"]
val_acc = HistoryDict["val_accuracy"]
acc = HistoryDict["accuracy"]
losses = HistoryDict["loss"]
epochs = range(1, len(val_losses)+1)

In [None]:
plt.plot(ANN_Model.history["accuracy"])
plt.plot(ANN_Model.history["val_accuracy"])
plt.ylabel("ACCURACY")
plt.legend()
plt.show()

In [None]:
plt.plot(epochs, losses, "k-", label="LOSS")
plt.plot(epochs, val_losses, "ro", label = "LOSS VALIDATION")
plt.title("LOSS & LOSS VAL")
plt.xlabel("EPOH")
plt.ylabel("LOSS & LOSS VAL")
plt.legend()
plt.show()

In [None]:
plt.plot(epochs, acc, "k-", label="ACCURACY")
plt.plot(epochs, val_acc, "ro", label = "VALIDATION ACCURACY")
plt.title("TRAINING AND VALIDATION ACCURACY")
plt.xlabel("EPOH")
plt.ylabel("TRAINING AND VALIDATIN ACCURACY")
plt.legend()
plt.show()

In [None]:
Dict_Summary = pd.DataFrame(ANN_Model.history)
Dict_Summary.plot()

In [None]:
# Prediction Score on Divided Data
Model_Results = Model.evaluate(Test_Set, verbose=False)
print("LOSS: " + "%.4f" % Model_Results[0])
print("ACCURACY: " + "%.4f" % Model_Results[1])

In [None]:
# Prediction Process
Main_Data_Prediction = pd.DataFrame({"JPG":Test_JPG_Path_Series})
print(Main_Data_Prediction.head())

In [None]:
Main_Test_Generator = ImageDataGenerator(rescale=1./255)

In [None]:
Main_Test_Set = Main_Test_Generator.flow_from_dataframe(dataframe=Main_Data_Prediction,
                                                       x_col="JPG",
                                                       y_col=None,
                                                       color_mode="grayscale",
                                                       class_mode=None,
                                                       bath_size=20,
                                                       target_size=(200,200))

In [None]:
Model_Test_Prediction = Model.predict(Main_Test_Set)


In [None]:
Model_Test_Prediction = Model_Test_Prediction.argmax(axis=-1)

In [None]:
print(Model_Test_Prediction)

In [None]:
Last_Prediction = []
[Last_Prediction.append('NO')if i==1 else Last_Prediction.append('TUMOR') for i in Model_Test_Prediction]
print(Last_Prediction)

In [None]:
fig, axes = plt.subplots(nrows=5,
                        ncols=5,
                        figsize=(20,20),
                        subplot_kw={'xticks': [], 'yticks': []})


for i, ax in enumerate(axes.flat):
    ax.imshow(plt.imread(Main_Data_Prediction["JPG"].loc[i]))
    ax.set_title(f"PREDICTION: {Last_Prediction[i]}")
plt.tight_layout()
plt.show()

In [None]:
#image data generator with diversification

Data_Generator_Div = ImageDataGenerator(rescale=1./255, brightness_range=[0.3, 0.9],
                                       rotation_range=30,
                                       width_shift_range=0.2,
                                       height_shift_range=0.2,
                                       shear_range=0.2,
                                       horizontal_flip=True,
                                       fill_mode="nearest",
                                       validation_split=0.1)

In [None]:
Train_Set_Div = Data_Generator_Div.flow_from_dataframe(dataframe=train_data,
                                                      x_col="JPG",
                                                      y_col="TUMOR_CATEGORY",
                                                      color_mode="grayscale",
                                                      class_mode="categorical",
                                                      subset="training",
                                                      batch_size=20,
                                                      target_size=(200,200))

In [None]:
Validation_Set_Div = Data_Generator_Div.flow_from_dataframe(dataframe=train_data,
                                                      x_col="JPG",
                                                      y_col="TUMOR_CATEGORY",
                                                      color_mode="grayscale",
                                                      class_mode="categorical",
                                                      subset="validation",
                                                      batch_size=20,
                                                      target_size=(200,200))

In [None]:
Test_Set_Div = Data_Generator_Div.flow_from_dataframe(dataframe=Main_Data_Prediction,
                                                      x_col="JPG",
                                                      y_col="TUMOR_CATEGORY",
                                                      color_mode="grayscale",
                                                      class_mode=None,
                                                      batch_size=20,
                                                      target_size=(200,200))

In [None]:
# CNN Model with diversification

Model_Two = Sequential()

Model_Two.add(Conv2D(32, (5,5), activation="relu", input_shape=(200, 200, 1)))
Model_Two.add(MaxPool2D((2,2)))
#
Model_Two.add(Conv2D(64, (3,3), activation="relu"))
Model_Two.add(MaxPool2D((2,2)))

#
Model_Two.add(Conv2D(128, (3,3), activation="relu"))
Model_Two.add(MaxPool2D((2,2)))
#
Model_Two.add(Conv2D(256, (3,3), activation="relu"))
Model_Two.add(MaxPool2D((2,2)))

# 
Model_Two.add(Flatten())
Model_Two.add(Dropout(0.5))
Model_Two.add(Dense(512, activation="relu"))
Model_Two.add(Dense(2, activation="softmax"))

In [None]:
Model_Two.compile(optimizer=RMSprop(learning_rate=0.001), loss="categorical_crossentropy", metrics=["accuracy"])

In [None]:
ANN_Model_Two = Model_Two.fit(Train_Set_Div, validation_data=Validation_Set_Div, batch_size=20, epochs=20)

In [None]:
# New test custom data

new_test_path = Path("../input/test-ragib")

In [None]:
new_test_jpg = list(new_test_path.glob(r"*.jpg"))

In [None]:
print(new_test_jpg)

In [None]:
ragib_test_JPG_labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1],new_test_jpg))

In [None]:
print(ragib_test_JPG_labels)

In [None]:
Ragib_Test_JPG_Path_Series = pd.Series(new_test_jpg, name="JPG").astype(str)
Ragib_Test_JPG_Labels_Series = pd.Series(ragib_test_JPG_labels, name="TUMOR_CATEGORY")

In [None]:
Test_Ragib = pd.concat([Ragib_Test_JPG_Path_Series, Ragib_Test_JPG_Labels_Series], axis=1)

In [None]:
print(Test_Ragib)

In [None]:
figure = plt.figure(figsize=(5,5))
plt.imshow(plt.imread(Test_Ragib["JPG"][0]))

In [None]:
Test_Set_2 = Generator_Basic.flow_from_dataframe(dataframe=Test_Ragib,
                                              x_col = "JPG",
                                              y_col = "TUMOR_CATEGORY", 
                                              color_mode = "grayscale",
                                              class_mode = "categorical",
                                              target_size = (200, 200)) 

In [None]:
for data_batch, label_batch in Test_Set_2:
    print("data shape: ",data_batch.shape)
    print("label shape: ",label_batch.shape)
    break

In [None]:
print(Test_Set_2.class_indices)
print(Test_Set_2.classes)
print(Test_Set_2.image_shape)

In [None]:
Ragib_Data_Prediction = pd.DataFrame({"JPG": Ragib_Test_JPG_Path_Series})

In [None]:
print(Ragib_Data_Prediction)

In [None]:
Ragibs_Test_Set = Main_Test_Generator.flow_from_dataframe(dataframe=Ragib_Data_Prediction,
                                                         x_col="JPG",
                                                         y_col=None,
                                                         color_mode="grayscale",
                                                         class_mode=None,
                                                         target_size=(200,200))

In [None]:
Ragib_Test_Prediction = Model.predict(Ragibs_Test_Set)

In [None]:
Ragib_Test_Prediction = Ragib_Test_Prediction.argmax(axis=-1)
print(Ragib_Test_Prediction)

In [None]:
#AWESOMEEEEEEEEEEEEEEEEE