# Brain Tumor Detection Using VGG19
Источник модели - https://www.kaggle.com/code/sivantm/brain-tumor-detection-using-vgg19-and-resnet/notebook

Скачать датасет - https://www.kaggle.com/datasets/navoneel/brain-mri-images-for-brain-tumor-detection?resource=download

In [98]:
import os.path
from typing import Tuple

import numpy as np
from sklearn.utils.class_weight import compute_class_weight

import keras
from keras import Sequential
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.vgg19 import VGG19
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense


class NNExample:
    def __init__(self, image_size, batch_size,
                 epoch=2, weight_path=None, **kwargs):

        self.epoch: int = epoch
        self.batch_size: int = batch_size
        self.image_size: Tuple[int] = image_size

        self.weights_path: str = weight_path

        self.color_mode = kwargs.get('color_mode', 'rgb')
        self.history = None

        self.train_data_gen = ImageDataGenerator(
            rescale=1. / 255,
            horizontal_flip=2, 
            vertical_flip=2)
        self.val_data_gen = ImageDataGenerator(rescale=1. / 255)

    def construct_model(self):
        model = VGG19(
          input_shape = (self.image_size[0], self.image_size[1], 3),
          include_top = False,
          weights = 'imagenet'
        )

        for layers in model.layers:
          layers.trainable = False

        x = Flatten()(model.output)
        x = Dropout(0.4)(x)
        x = Dense(1, activation = "sigmoid")(x)

        model = keras.Model(model.input, x)
        model.compile(loss = "binary_crossentropy", optimizer = "adam", metrics = "accuracy")
        return model

    def create_data_iterator(self, generator, folder_path):
        sub_folders = os.listdir(folder_path)
        iterator = generator.flow_from_directory(
            folder_path,
            target_size=self.image_size,
            color_mode=self.color_mode,
            class_mode='binary' if len(sub_folders) == 2 else 'categorical',
            batch_size=self.batch_size,
            shuffle=True
        )
        return iterator

    def get_callbacks(self) -> list:
        from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

        early_stop = EarlyStopping(monitor='val_loss', mode='min', patience=3)
        learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss',
                                                    patience=2, verbose=1, factor=0.3, min_lr=0.00001)

        return [early_stop, learning_rate_reduction]

    def fit(self, train_folder, val_folder=None):
        # Data preparation and increase the amount of data set
        gen_train = self.create_data_iterator(self.train_data_gen, train_folder)
        gen_val = None
        if val_folder:
            gen_val = self.create_data_iterator(self.val_data_gen, val_folder)

        model = self.construct_model()

        weights = compute_class_weight(class_weight='balanced', classes=np.unique(gen_train.classes),
                                       y=gen_train.classes)
        class_weight = dict(zip(np.unique(gen_train.classes), weights))

        callbacks_list = self.get_callbacks()
        try:
            steps_per_epoch = len(gen_train.filepaths) // self.batch_size
            validation_steps = len(gen_val.filepaths) // self.batch_size
            self.history = model.fit(
                gen_train,
                validation_data=gen_val,
                epochs=self.epoch,
                class_weight=class_weight,
                steps_per_epoch=steps_per_epoch,
                validation_steps=validation_steps,
                callbacks=callbacks_list
            )
        except Exception as e:
            raise Exception(f'Fitting error: {e}')

        if self.weights_path is None:
            self.weights_path = f'weights_{hash(gen_train)}_{self.epoch}.h5'

        model.save(self.weights_path)

        return self

    def evaluate(self, test_folder, predict_column: str = 'predict', proba_column='proba'):
        if self.weights_path is None or not os.path.exists(self.weights_path):
            raise Exception(f'Weights were not found by path: {self.weights_path}')

        model = keras.models.load_model(self.weights_path)

        iterator = self.val_data_gen.flow_from_directory(
            test_folder,
            target_size=self.image_size,
            color_mode=self.color_mode,
            class_mode=None,
            batch_size=self.batch_size,
            shuffle=False
        )

        N = len(iterator.classes)
        proba = model.predict(iterator, batch_size=self.batch_size, steps=N // self.batch_size + 1)

        if proba.shape[0] > N:
            # in case of odd number N / self.batch_size
            proba = proba[:N]

        if proba.shape[1] == 1:
            proba_ext = np.concatenate((1 - proba, proba), axis=1)
        else:
            proba_ext = proba
          
        class_map = dict(map(lambda v: (v[1], v[0]), iterator.class_indices.items()))
        predict = list(map(lambda v: class_map[v], np.argmax(proba_ext, axis=1)))

        # Считается обратная вероятность (необходима для некоторых методов оценки)
        proba = np.vstack((1 - proba, proba)).T

        return {predict_column: predict, proba_column: proba}


In [None]:
!unzip brain-mri-images-for-brain-tumor-detection.zip  && rm brain-mri-images-for-brain-tumor-detection.zip

Archive:  brain-mri-images-for-brain-tumor-detection.zip
  inflating: brain_tumor_dataset/no/1 no.jpeg  
  inflating: brain_tumor_dataset/no/10 no.jpg  
  inflating: brain_tumor_dataset/no/11 no.jpg  
  inflating: brain_tumor_dataset/no/12 no.jpg  
  inflating: brain_tumor_dataset/no/13 no.jpg  
  inflating: brain_tumor_dataset/no/14 no.jpg  
  inflating: brain_tumor_dataset/no/15 no.jpg  
  inflating: brain_tumor_dataset/no/17 no.jpg  
  inflating: brain_tumor_dataset/no/18 no.jpg  
  inflating: brain_tumor_dataset/no/19 no.jpg  
  inflating: brain_tumor_dataset/no/2 no.jpeg  
  inflating: brain_tumor_dataset/no/20 no.jpg  
  inflating: brain_tumor_dataset/no/21 no.jpg  
  inflating: brain_tumor_dataset/no/22 no.jpg  
  inflating: brain_tumor_dataset/no/23 no.jpg  
  inflating: brain_tumor_dataset/no/24 no.jpg  
  inflating: brain_tumor_dataset/no/25 no.jpg  
  inflating: brain_tumor_dataset/no/26 no.jpg  
  inflating: brain_tumor_dataset/no/27 no.jpg  
  inflating: brain_tumor_datase

In [None]:
!pip install split-folders

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting split-folders
  Downloading split_folders-0.5.1-py3-none-any.whl (8.4 kB)
Installing collected packages: split-folders
Successfully installed split-folders-0.5.1


In [None]:
import splitfolders
splitfolders.ratio('brain_tumor_dataset', output="output", seed=1337, ratio=(.8, 0.1,0.1)) 

Copying files: 253 files [00:00, 2233.57 files/s]


In [99]:
model = NNExample((224,224), 16, epoch=2)
model.fit("output/train", "output/val")

Found 202 images belonging to 2 classes.
Found 24 images belonging to 2 classes.
Epoch 1/2
Epoch 2/2


<__main__.NNExample at 0x7f7d0d0b4e10>

In [100]:
model.evaluate("output/test")

Found 27 images belonging to 2 classes.


{'predict': ['no',
  'no',
  'no',
  'yes',
  'no',
  'no',
  'no',
  'no',
  'no',
  'no',
  'no',
  'no',
  'yes',
  'yes',
  'yes',
  'yes',
  'yes',
  'yes',
  'yes',
  'no',
  'yes',
  'yes',
  'yes',
  'yes',
  'no',
  'yes',
  'yes'],
 'proba': array([[0.8775116 , 0.96902156, 0.68115103, 0.10853159, 0.96456504,
         0.9841113 , 0.84471273, 0.9727123 , 0.9511342 , 0.9452441 ,
         0.952281  , 0.78384817, 0.03264159, 0.2111988 , 0.11742222,
         0.44140506, 0.30950856, 0.32341135, 0.07849282, 0.85770977,
         0.11218548, 0.05545378, 0.31509215, 0.11887336, 0.945148  ,
         0.10977006, 0.06427348, 0.12248835, 0.03097841, 0.31884897,
         0.8914684 , 0.03543496, 0.01588869, 0.15528724, 0.02728775,
         0.04886577, 0.0547559 , 0.047719  , 0.2161518 , 0.9673584 ,
         0.7888012 , 0.8825778 , 0.55859494, 0.69049144, 0.67658865,
         0.9215072 , 0.1422902 , 0.8878145 , 0.9445462 , 0.68490785,
         0.88112664, 0.05485201, 0.89022994, 0.9357265 ]], 