In [1]:
# This R environment comes with many helpful analytics packages installed
# It is defined by the kaggle/rstats Docker image: https://github.com/kaggle/docker-rstats
# For example, here's a helpful package to load
library(keras)
library(magrittr)
library(reticulate)
library(tidyverse) # metapackage of all tidyverse packages
library(e1071)
library(caTools)
library(caret)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

list.files(path = "../input")

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.0     [32m✔[39m [34mreadr    [39m 2.1.4
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.0
[32m✔[39m [34mggplot2  [39m 3.4.1     [32m✔[39m [34mtibble   [39m 3.1.8
[32m✔[39m [34mlubridate[39m 1.9.2     [32m✔[39m [34mtidyr    [39m 1.3.0
[32m✔[39m [34mpurrr    [39m 1.0.1     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mtidyr[39m::[32mextract()[39m   masks [34mmagrittr[39m::extract()
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m    masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m       masks [34mstats[39m::lag()
[31m✖[39m [34mpurrr[39m::[32mset_names()[39m masks [34mmagrittr[39m::set_names()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to b

# Data Preparation and Organisation

In [2]:
set.seed(1212)

In [3]:
train_data_glioma <- file.path("/kaggle/input/brain-tumor-classification-mri/Training/glioma_tumor")
train_data_meningioma <- file.path("/kaggle/input/brain-tumor-classification-mri/Training/meningioma_tumor")
train_data_pituitary <- file.path("/kaggle/input/brain-tumor-classification-mri/Training/pituitary_tumor")
train_data_notmr <- file.path("/kaggle/input/brain-tumor-classification-mri/Training/no_tumor");

**Volume of data in each class**

In [4]:
print(paste("Glioma :: ",length(list.files(train_data_glioma))))
print(paste("Meningioma :: ",length(list.files(train_data_meningioma))))
print(paste("No_tumour :: ",length(list.files(train_data_notmr))))
print(paste("Pituitary :: ",length(list.files(train_data_pituitary))))

[1] "Glioma ::  826"
[1] "Meningioma ::  822"
[1] "No_tumour ::  395"
[1] "Pituitary ::  827"


**Creating various working directories**

In [5]:
train_dir<-'./train';dir.create(train_dir)
test_dir<-'./test';dir.create(test_dir)

In [6]:
train_dir_glioma<-file.path(train_dir,"glioma");dir.create(train_dir_glioma)
train_dir_meningioma<-file.path(train_dir,"meningioma");dir.create(train_dir_meningioma)
train_dir_notmr<-file.path(train_dir,"notmr");dir.create(train_dir_notmr)
train_dir_pituitary<-file.path(train_dir,"pituitary");dir.create(train_dir_pituitary)

test_dir_glioma<-file.path(test_dir,"glioma");dir.create(test_dir_glioma)
test_dir_meningioma<-file.path(test_dir,"meningioma");dir.create(test_dir_meningioma)
test_dir_notmr<-file.path(test_dir,"notmr");dir.create(test_dir_notmr)
test_dir_pituitary<-file.path(test_dir,"pituitary");dir.create(test_dir_pituitary)

In [7]:
gliomavector <- c(list.files(train_data_glioma))
meningiomavector <- c(list.files(train_data_meningioma))
notmrvector <- c(list.files(train_data_notmr))
pituitaryvector <- c(list.files(train_data_pituitary))

# all other dir 600->train and rest test except for notmr -> 300 train and rest test

file.copy(file.path(train_data_glioma,gliomavector[1:600]),train_dir_glioma)
file.copy(file.path(train_data_meningioma,meningiomavector[1:600]),train_dir_meningioma)
file.copy(file.path(train_data_notmr,notmrvector[1:300]),train_dir_notmr)
file.copy(file.path(train_data_pituitary,pituitaryvector[1:600]),train_dir_pituitary)

#filling the test_dir 
file.copy(file.path(train_data_glioma,gliomavector[601:length(gliomavector)]),test_dir_glioma)
file.copy(file.path(train_data_meningioma,meningiomavector[601:length(meningiomavector)]),test_dir_meningioma)
file.copy(file.path(train_data_notmr,notmrvector[301:length(notmrvector)]),test_dir_notmr)
file.copy(file.path(train_data_pituitary,pituitaryvector[601:length(pituitaryvector)]),test_dir_pituitary)


In [8]:
#printing the len of the train and test data
print("Train Dataset :: ")
print(paste("Glioma train :: ",length(list.files(train_dir_glioma))))
print(paste("Meningioma train :: ",length(list.files(train_dir_meningioma))))
print(paste("Notmr train :: ",length(list.files(train_dir_notmr))))
print(paste("Pituitary train :: ",length(list.files(train_dir_pituitary))))

print("----------")
print("Test Dataset")
print(paste("Glioma test :: ",length(list.files(test_dir_glioma))))
print(paste("Meningioma test :: ",length(list.files(test_dir_meningioma))))
print(paste("Notmr test :: ",length(list.files(test_dir_notmr))))
print(paste("Pituitary test :: ",length(list.files(test_dir_pituitary))))

[1] "Train Dataset :: "
[1] "Glioma train ::  600"
[1] "Meningioma train ::  600"
[1] "Notmr train ::  300"
[1] "Pituitary train ::  600"
[1] "----------"
[1] "Test Dataset"
[1] "Glioma test ::  226"
[1] "Meningioma test ::  222"
[1] "Notmr test ::  95"
[1] "Pituitary test ::  227"


# Test and Train Data Splitting

In [9]:

train_datagen<-image_data_generator(rescale=1/255,
                                   zoom_range=0.2,
                                   height_shift_range=0.2,
                                   width_shift_range=0.2,
                                   fill_mode='nearest')

test_datagen<-image_data_generator(rescale = 1/255)

train_generator<-flow_images_from_directory(directory=train_dir,
                                           generator=train_datagen,
                                           class_mode="categorical",
                                           target_size=c(224,224),
                                           batch_size=32,
                                           seed = 123)

test_generator<-flow_images_from_directory(directory=test_dir,
                                           generator=test_datagen,
                                           class_mode="categorical",
                                           target_size=c(224,224),
                                           batch_size=32,
                                           shuffle = FALSE,
                                           seed = 123)

In [10]:
test_generator$classes

# Inception Resnet V2

In [46]:
train_datagen = image_data_generator(
  rescale = 1/255,
  rotation_range = 5,
  width_shift_range = 0.1,
  height_shift_range = 0.05,
  shear_range = 0.1,
  zoom_range = 0.15,
  horizontal_flip = TRUE,
  vertical_flip = FALSE,
  fill_mode = "reflect"
)

test_datagen = image_data_generator(rescale = 1/255)

training_batch_size = 32

train_generator = flow_images_from_directory(
    directory = train_dir,
    generator = train_datagen,
    class_mode = "categorical",
    target_size = c(224, 224),
    batch_size = 20
)

test_generator = flow_images_from_directory(
    directory = test_dir,
    generator = test_datagen,
    class_mode = "categorical",
    target_size = c(224, 224),
    batch_size = 10,
    shuffle=FALSE,
)

In [47]:
conv_base_irv2 <- application_inception_resnet_v2(
    weights = "../input/keras-pretrained-models/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5",
    include_top = FALSE,
    input_shape = c(224, 224, 3)
)

resnet_model <- keras_model_sequential() %>% 
    conv_base_irv2 %>% 
    layer_global_average_pooling_2d()%>%
    layer_flatten() %>%
    layer_dense(units=256, activation="relu") %>%
    layer_batch_normalization() %>%
    layer_dense(units=256, activation="relu") %>%
    layer_batch_normalization() %>%
    layer_dense(units=256, activation="relu") %>%
    layer_batch_normalization() %>%
    layer_dense(units=4, activation="softmax")

freeze_weights(conv_base_irv2)

resnet_model %>% compile(
    loss = "categorical_crossentropy",
    optimizer_adam(learning_rate = 1e-2),
    metrics = c("accuracy")
)

training_step_size = ceiling(length(list.files(train_dir, recursive = T)) / training_batch_size)

summary(resnet_model)

Model: "sequential_6"
________________________________________________________________________________
Layer (type)                        Output Shape                    Param #     
inception_resnet_v2 (Functional)    (None, 5, 5, 1536)              54336736    
________________________________________________________________________________
global_average_pooling2d_6 (GlobalA (None, 1536)                    0           
________________________________________________________________________________
flatten_2 (Flatten)                 (None, 1536)                    0           
________________________________________________________________________________
dense_18 (Dense)                    (None, 256)                     393472      
________________________________________________________________________________
batch_normalization_311 (BatchNorma (None, 256)                     1024        
________________________________________________________________________________
dense_

In [48]:
history<-resnet_model %>% fit(
    train_generator,
    steps_per_epoch=20,
    epoch=20,
    validation_data=test_generator,
    validation_step=10
)

In [49]:
table(test_generator$classes)


  0   1   2   3 
226 222  95 227 

## Summary

In [50]:
max_class <- function(vec) { return(which.max(vec)-1) }


preds <- predict(resnet_model, test_generator)
pred_classes <- apply(preds, 1, max_class)

table(pred_classes)

pred_classes
  0   1   2   3 
254 272  68 176 

In [51]:
cf <- table(test_generator$classes, pred_classes)
confusionMatrix(cf)

Confusion Matrix and Statistics

   pred_classes
      0   1   2   3
  0 194  28   1   3
  1  43 154  10  15
  2   2  29  57   7
  3  15  61   0 151

Overall Statistics
                                         
               Accuracy : 0.7221         
                 95% CI : (0.689, 0.7535)
    No Information Rate : 0.3532         
    P-Value [Acc > NIR] : < 2.2e-16      
                                         
                  Kappa : 0.6156         
                                         
 Mcnemar's Test P-Value : 3.505e-10      

Statistics by Class:

                     Class: 0 Class: 1 Class: 2 Class: 3
Sensitivity            0.7638   0.5662  0.83824   0.8580
Specificity            0.9380   0.8635  0.94587   0.8721
Pos Pred Value         0.8584   0.6937  0.60000   0.6652
Neg Pred Value         0.8897   0.7847  0.98370   0.9540
Prevalence             0.3299   0.3532  0.08831   0.2286
Detection Rate         0.2519   0.2000  0.07403   0.1961
Detection Prevalence   0.2935  