Author: Matthew Viafora

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/brain-tumor/Brain Tumor.csv
/kaggle/input/brain-tumor/bt_dataset_t3.csv
/kaggle/input/brain-tumor/Brain Tumor/Brain Tumor/Image3202.jpg
/kaggle/input/brain-tumor/Brain Tumor/Brain Tumor/Image1153.jpg
/kaggle/input/brain-tumor/Brain Tumor/Brain Tumor/Image1465.jpg
/kaggle/input/brain-tumor/Brain Tumor/Brain Tumor/Image688.jpg
/kaggle/input/brain-tumor/Brain Tumor/Brain Tumor/Image233.jpg
/kaggle/input/brain-tumor/Brain Tumor/Brain Tumor/Image3561.jpg
/kaggle/input/brain-tumor/Brain Tumor/Brain Tumor/Image1166.jpg
/kaggle/input/brain-tumor/Brain Tumor/Brain Tumor/Image1975.jpg
/kaggle/input/brain-tumor/Brain Tumor/Brain Tumor/Image1263.jpg
/kaggle/input/brain-tumor/Brain Tumor/Brain Tumor/Image920.jpg
/kaggle/input/brain-tumor/Brain Tumor/Brain Tumor/Image1629.jpg
/kaggle/input/brain-tumor/Brain Tumor/Brain Tumor/Image1701.jpg
/kaggle/input/brain-tumor/Brain Tumor/Brain Tumor/Image695.jpg
/kaggle/input/brain-tumor/Brain Tumor/Brain Tumor/Image1273.jpg
/kaggle/input/brain-tu

In [2]:
import matplotlib.pyplot as plt
import seaborn as sns
from skimage.io import imread
from sklearn.model_selection import train_test_split
import keras
from keras import Sequential
from keras.applications import MobileNetV2
from keras.layers import Dense
from keras.preprocessing import image

**Put first two columns into a dataframe:**

In [3]:
df = pd.read_csv('/kaggle/input/brain-tumor/Brain Tumor.csv',usecols=[0,1])
df.head()

Unnamed: 0,Image,Class
0,Image1,0
1,Image2,0
2,Image3,1
3,Image4,1
4,Image5,0


**Dataset Insight:**

Check for null values:

In [4]:
df.isnull().sum()

Image    0
Class    0
dtype: int64

Check for values to account for imbalances:

In [5]:
df['Class'].value_counts()

0    2079
1    1683
Name: Class, dtype: int64

Clean and Prepare Data:

In [6]:
path_list = []
base_path = "/kaggle/input/brain-tumor/Brain Tumor/Brain Tumor"
for i in os.listdir(base_path):
    path_list.append(os.path.join(base_path, i))

In [7]:
path_dict = {os.path.splitext(os.path.basename(x))[0]: x for x in path_list}
df['Paths'] = df['Image'].map(path_dict.get)

In [8]:
df.head()

Unnamed: 0,Image,Class,Paths
0,Image1,0,/kaggle/input/brain-tumor/Brain Tumor/Brain Tu...
1,Image2,0,/kaggle/input/brain-tumor/Brain Tumor/Brain Tu...
2,Image3,1,/kaggle/input/brain-tumor/Brain Tumor/Brain Tu...
3,Image4,1,/kaggle/input/brain-tumor/Brain Tumor/Brain Tu...
4,Image5,0,/kaggle/input/brain-tumor/Brain Tumor/Brain Tu...


Resize all images:

In [9]:
from PIL.Image import open
df["Pixels"]=df["Paths"].map(lambda x: np.asarray(open(x).resize((224,224))))

In [10]:
df.head()

Unnamed: 0,Image,Class,Paths,Pixels
0,Image1,0,/kaggle/input/brain-tumor/Brain Tumor/Brain Tu...,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ..."
1,Image2,0,/kaggle/input/brain-tumor/Brain Tumor/Brain Tu...,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ..."
2,Image3,1,/kaggle/input/brain-tumor/Brain Tumor/Brain Tu...,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ..."
3,Image4,1,/kaggle/input/brain-tumor/Brain Tumor/Brain Tu...,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ..."
4,Image5,0,/kaggle/input/brain-tumor/Brain Tumor/Brain Tu...,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ..."


**Preprocessing**

In [11]:
image_list = []
for i in range(0,len(df)):
    brain_img = df["Pixels"][i].astype(np.float32)
    img_array = image.img_to_array(brain_img)
    # Append image to a list of all images
    image_list.append(keras.applications.mobilenet_v2.preprocess_input(img_array))
    
# Convert image list to single array
input_x = np.array(image_list)

In [12]:
print(input_x.shape)
df.info()

(3762, 224, 224, 3)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3762 entries, 0 to 3761
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Image   3762 non-null   object
 1   Class   3762 non-null   int64 
 2   Paths   3762 non-null   object
 3   Pixels  3762 non-null   object
dtypes: int64(1), object(3)
memory usage: 117.7+ KB


In [13]:
label_y = np.array(df.Class)
label_y

array([0, 0, 1, ..., 0, 0, 0])

**Split Dataset into training set (80%) and testing set (20%)**

In [14]:
x_train, x_test, y_train, y_test = train_test_split(input_x, label_y, test_size = 0.2, random_state = 0)

In [15]:
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(3009, 224, 224, 3)
(753, 224, 224, 3)
(3009,)
(753,)


**Build modelnet model**

In [16]:
num_classes = 1
model = Sequential()
model.add(MobileNetV2(input_shape=(224,224,3), weights = "imagenet", include_top = False))
model.add(keras.layers.GlobalAveragePooling2D())
model.add(Dense(num_classes, activation='sigmoid', name = 'preds'))
model.layers[0].trainable=False

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5


In [17]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
mobilenetv2_1.00_224 (Functi (None, 7, 7, 1280)        2257984   
_________________________________________________________________
global_average_pooling2d (Gl (None, 1280)              0         
_________________________________________________________________
preds (Dense)                (None, 1)                 1281      
Total params: 2,259,265
Trainable params: 1,281
Non-trainable params: 2,257,984
_________________________________________________________________


In [18]:
model.compile(
    loss = keras.losses.binary_crossentropy,
    optimizer=keras.optimizers.SGD(lr=0.1),
    metrics=['accuracy']
)

In [19]:
model.fit(
    x_train,
    y_train,
    epochs=50,
    verbose=1,
    validation_data=(x_test,y_test)
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x7fa46c125790>

In [20]:
model.save("model_brain.h5")
pretrained_cnn = keras.models.load_model('./model_brain.h5')

# evaluate model on holdout set
eval_score = pretrained_cnn.evaluate(x_test,y_test)
# print loss score
print('Eval loss:',eval_score[0])
# print accuracy score
print('Eval accuracy:',eval_score[1] )

Eval loss: 0.2913912236690521
Eval accuracy: 0.9096945524215698
