In [1]:
import os
import cv2
import random
import numpy as np 
import pandas as pd 
import seaborn as sns
from tqdm import tqdm

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# To plot pretty figures
%matplotlib inline
import matplotlib as mlp
import matplotlib.pyplot as plt

KeyboardInterrupt: 

## Data Loading

In [None]:
labels = ['glioma_tumor', 'meningioma_tumor', 'no_tumor', 'pituitary_tumor']
train_img = [] #contains the images used for training the model
test_img = []
train_labels = [] #label of each image in x_train 
test_labels = []
TRAIN_PATH = '../input/brain-tumor-classification-mri/Training'
TEST_PATH = '../input/brain-tumor-classification-mri/Testing'
new_size = (255, 255)

for label in labels:
    img_dir = os.path.join(TRAIN_PATH, label)
    for img_file in os.listdir(img_dir):
        img = cv2.imread(f'{img_dir}/{img_file}')
        img = cv2.resize(img, new_size)
        img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)/255
        train_img.append(img)
        train_labels.append(label)
        
train_img = np.stack(train_img)
train_labels = np.stack(train_labels)

print("train_img shape : ", train_img.shape)
print("train_labels shape : ", train_labels.shape)

for label in labels:
    img_dir = os.path.join(TEST_PATH, label)
    for img_file in os.listdir(img_dir):
        img = cv2.imread(f'{img_dir}/{img_file}')
        img = cv2.resize(img, new_size)
        img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)/255
        test_img.append(img)
        test_labels.append(label)
        
test_img = np.stack(test_img)
test_labels = np.stack(test_labels)

print("test_img shape : ", test_img.shape)
print("test_labels shape : ", test_labels.shape)

class_map = {
    'no_tumor': 0,
    'glioma_tumor': 1,
    'pituitary_tumor': 2,
    'meningioma_tumor': 3
}

train_labels = np.array([class_map[label] for label in train_labels])
test_labels = np.array([class_map[label] for label in test_labels])

# Pre-processing

In [None]:
train_img = train_img.reshape(2870, 255, 255, 1)
test_img = test_img.reshape(394, 255, 255, 1)

# Convolutional Multilayer Perceptron

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score, confusion_matrix, f1_score
from sklearn.model_selection import cross_validate, cross_val_score

In [None]:
import keras 
from keras.models import Sequential 
from keras.layers import Dense, Dropout, Flatten 
from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D
from keras import backend as K 

### Create the model

In [None]:
model = Sequential() 

model.add(Conv2D(25, kernel_size = 5, activation='relu', input_shape=(255, 255, 1), strides = 1))
model.add(AveragePooling2D(pool_size = 10, strides = 2))

model.add(Conv2D(10, kernel_size = 5, activation='relu', strides = 1))
model.add(AveragePooling2D(pool_size = 5, strides = 2))

model.add(Conv2D(8, kernel_size = 2, activation='relu', strides = 1))
model.add(AveragePooling2D(pool_size = 2, strides = 2))

model.add(Flatten())
model.add(Dense(4, activation='softmax'))

model.summary()


### Compile the model

In [None]:
# model.compile(loss = keras.losses.categorical_crossentropy, optimizer = keras.optimizers.adadelta_v2.Adadelta(), metrics = ['accuracy'])
model.compile(optimizer='adadelta', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

### Train the model

In [None]:
model.fit(train_img, train_labels, validation_data = (test_img, test_labels), epochs = 10)

### Evaluate the model

In [None]:
score = model.evaluate(test_img, test_labels, verbose = 0) 

print('Test loss:', score[0]) 
print('Test accuracy:', score[1])

y_prob = model.predict(test_img)
y_pred = np.argmax(y_prob, axis = 1)
print(y_pred)

In [None]:
from sklearn.metrics import classification_report

print(classification_report(test_labels, y_pred, zero_division=0))

precision = precision_score(test_labels, y_pred, average = "weighted", zero_division = 0)
recall = recall_score(test_labels, y_pred, average = "weighted", zero_division = 0)
f1 = f1_score(test_labels, y_pred, average = "weighted", zero_division = 0)
roc = roc_auc_score(test_labels, y_prob, average = "weighted", multi_class="ovr")
print("roc = ", roc)

Even worse than random classifier. Never predicts class 0 and class 1. When it should predict class 2, it does though. Somehow the less predominant class is the most predicted.

## Architecture 2

In [None]:
model = Sequential() 

model.add(Conv2D(25, kernel_size = 3, activation='relu', input_shape=(255, 255, 1), strides = 1))
model.add(AveragePooling2D(pool_size = 5, strides = 2))

model.add(Conv2D(10, kernel_size = 3, activation='relu', strides = 1))
model.add(AveragePooling2D(pool_size = 5, strides = 2))

model.add(Conv2D(8, kernel_size = 2, activation='relu', strides = 1))
model.add(AveragePooling2D(pool_size = 2, strides = 2))

model.add(Flatten())
model.add(Dense(16, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(4, activation='softmax'))

model.summary()

In [None]:
model.compile(optimizer='adadelta', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(train_img, train_labels, validation_data = (test_img, test_labels), epochs = 10)

In [None]:
score = model.evaluate(test_img, test_labels, verbose = 0) 

print('Test loss:', score[0]) 
print('Test accuracy:', score[1])

y_prob = model.predict(test_img)
y_pred = np.argmax(y_prob, axis = 1)
print(y_pred)

print(classification_report(test_labels, y_pred, zero_division=0))

precision = precision_score(test_labels, y_pred, average = "weighted", zero_division = 0)
recall = recall_score(test_labels, y_pred, average = "weighted", zero_division = 0)
f1 = f1_score(test_labels, y_pred, average = "weighted", zero_division = 0)
roc = roc_auc_score(test_labels, y_prob, average = "weighted", multi_class="ovr")
print("roc = ", roc)

## Architecture 3

In [None]:
model = Sequential() 

model.add(Conv2D(35, kernel_size = 3, activation='relu', input_shape=(255, 255, 1), strides = 1))
model.add(AveragePooling2D(pool_size = 3, strides = 1))

model.add(Conv2D(20, kernel_size = 3, activation='relu', strides = 1))
model.add(AveragePooling2D(pool_size = 3, strides = 1))

model.add(Conv2D(10, kernel_size = 2, activation='relu', strides = 1))
model.add(AveragePooling2D(pool_size = 2, strides = 2))

model.add(Conv2D(8, kernel_size = 2, activation='relu', strides = 1))
model.add(AveragePooling2D(pool_size = 2, strides = 2))

model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(4, activation='softmax'))

model.summary()

In [None]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(train_img, train_labels, validation_data = (test_img, test_labels), epochs = 5)

In [None]:
score = model.evaluate(test_img, test_labels, verbose = 0) 

print('Test loss:', score[0]) 
print('Test accuracy:', score[1])

y_prob = model.predict(test_img)
y_pred = np.argmax(y_prob, axis = 1)
print(y_pred)

print(classification_report(test_labels, y_pred, zero_division=0))

precision = precision_score(test_labels, y_pred, average = "weighted", zero_division = 0)
recall = recall_score(test_labels, y_pred, average = "weighted", zero_division = 0)
f1 = f1_score(test_labels, y_pred, average = "weighted", zero_division = 0)
roc = roc_auc_score(test_labels, y_prob, average = "weighted", multi_class="ovr")
print("roc = ", roc)