# Brain Tumor MRI Dataset


### What is a brain tumor?

A brain tumor is a collection, or mass, of abnormal cells in your brain. Your skull, which encloses your brain, is very rigid. Any growth inside such a restricted space can cause problems. Brain tumors can be cancerous (malignant) or noncancerous (benign). When benign or malignant tumors grow, they can cause the pressure inside your skull to increase. This can cause brain damage, and it can be life-threatening.


### The importance of the subject

Early detection and classification of brain tumors is an important research domain in the field of medical imaging and accordingly helps in selecting the most convenient treatment method to save patients life therefore


### About Dataset

El conjunto de datos contiene 3064 imágenes potenciadas con contraste potenciadas en T1 de 233 pacientes con tres tipos de tumores cerebrales:

*   meningioma (708 cortes)
*   glioma (1426 cortes)
*   pituitario (930 cortes)

Each image is of dimension 512 x 512 x 1 , these are black and white images thus having a single channel.

Download link: https://figshare.com/articles/dataset/brain_tumor_dataset/1512427



### Author

Jun Cheng
School of Biomedical Engineering
Southern Medical University, Guangzhou, China
Email: chengjun583@qq.com

## Download Dataset

In [None]:
# Unzip dataset
from zipfile import ZipFile
import os

# Extract images
import h5py
import numpy as np
import matplotlib.image as mpimg
from tqdm import tqdm

# Download images
import requests
import argparse

# Ordeno imagenes
import shutil

In [None]:
def clear_screen():
    """Clears the console screen irrespective of os used"""
    import platform
    if platform.system() == 'Windows':
        os.system('cls')
        return
    os.system('clear')

def make_folder(target_folder):
    """Creates folder if there is no folder in the specified path.
    Parameters:
        target_folder(str): path of the folder which needs to be created.

    Returns: None
    """
    if not (os.path.isdir(target_folder)):
        print(f'Creating {target_folder} folder')
        os.mkdir(target_folder)

def check_if_file_exits(file):
    """ Checks if the file specified is downloaded or not.
    Parameters:
        file(str): Name of the file to be checked.

    Returns: None
    """
    extension = file[-3:]
    file = file[:-4] + '_done.'+extension
    return True if os.path.isfile(file) else False

In [None]:
def download_file(url, path):
    """ Download the file in url to the path specified.
    Parameters:
        url(str): URL of the file to be downloaded.
        path(str): Destination where the downloaded file will be saved.

    Returns: None
    """
    # Check if file already exists.
    if check_if_file_exits(path):
        print(f'Already existing file {path}')
        return

    # Deleting the partial downloaded file.
    if os.path.isfile(path):
        print(f'Deleted existing partial file {path}')
        os.remove(path)

    response = requests.get(url, stream=True)
    handle = open(path, "wb")
    with open(path, "wb") as handle:
        chunk_size = 1024
        total_size = round(int(response.headers['Content-Length']), 3)
        pbar = tqdm(unit="B", total=total_size)
        for chunk in response.iter_content(chunk_size=chunk_size):
            if chunk:  # filter out keep-alive new chunks
                handle.write(chunk)
                pbar.update(len(chunk))

    # Marking the file as downloaded.
    extension = path[-3:]
    os.rename(path, path[:-4]+'_done.'+extension)


def main():
    # URL of the dataset used.
    dataset_urls = ['https://ndownloader.figshare.com/files/3381290',
                    'https://ndownloader.figshare.com/files/3381296',
                    'https://ndownloader.figshare.com/files/3381293',
                    'https://ndownloader.figshare.com/files/3381302']

    # URL of dataset README
    dataset_readme = 'https://ndownloader.figshare.com/files/7953679'

    target_folder = 'dataset'
    dataset_part = 1
    dataset_file_name = f'brain_tumor_dataset_part_'

    clear_screen()
    make_folder(target_folder)

    print(f'\n\tDownloading dataset README.txt')
    download_file(dataset_readme, os.path.join(target_folder, 'README.TXT'))

    print('\n\tStarting download process\n')
    for url in dataset_urls:
        try:
            path = os.path.join(
                target_folder, f'{dataset_file_name}{dataset_part}.zip')
            print(f'\t\tDownloading :  {path}')
            download_file(url, path)
            dataset_part += 1
        except KeyboardInterrupt:
            print('\t\t\n\nDownload stopped')
            break

main()


	Downloading dataset README.txt


100%|██████████| 1627/1627 [00:00<00:00, 210777.51B/s]



	Starting download process

		Downloading :  dataset/brain_tumor_dataset_part_1.zip


100%|██████████| 214401279/214401279 [00:09<00:00, 23645903.61B/s]


		Downloading :  dataset/brain_tumor_dataset_part_2.zip


100%|██████████| 217848429/217848429 [00:08<00:00, 25494023.09B/s]


		Downloading :  dataset/brain_tumor_dataset_part_3.zip


100%|██████████| 215563856/215563856 [00:10<00:00, 21021238.88B/s]


		Downloading :  dataset/brain_tumor_dataset_part_4.zip


100%|██████████| 231679762/231679762 [00:11<00:00, 20883250.77B/s]


## Unzip Dataset

In [None]:
def unzip_file(source_name, destination):
    """ Unizips a zip file and stores the contents in destination folder.
    Parameters:
        source_name(str): Full path of the source path
        destination(str): Full folder path where contents of source_name will be stored.

    Returns: None
    """
    with ZipFile(source_name, 'r') as zipfile:
        # extracting all the files
        print(f'\tExtracting files of {source_name}')
        zipfile.extractall(destination)
        print(f'\tDone with {source_name}')

def main():
    # Clears the screen.
    clear_screen()

    # File names in a list.
    file_names = [
        f'brain_tumor_dataset_part_{i}_done.zip' for i in range(1, 5)]

    # Destination folder to store files.
    destination = os.path.join('dataset', 'mat_dataset')
    # Make the destination folder.
    make_folder(os.path.join('dataset', 'mat_dataset'))

    for file in file_names:
        path = os.path.join('dataset', file)
        unzip_file(path, destination)

main()

Creating dataset/mat_dataset folder
	Extracting files of dataset/brain_tumor_dataset_part_1_done.zip
	Done with dataset/brain_tumor_dataset_part_1_done.zip
	Extracting files of dataset/brain_tumor_dataset_part_2_done.zip
	Done with dataset/brain_tumor_dataset_part_2_done.zip
	Extracting files of dataset/brain_tumor_dataset_part_3_done.zip
	Done with dataset/brain_tumor_dataset_part_3_done.zip
	Extracting files of dataset/brain_tumor_dataset_part_4_done.zip
	Done with dataset/brain_tumor_dataset_part_4_done.zip


## Extract images

In [None]:
def get_image_data(filename, path):
    """ Reads the mat image file and returns the image & mask array.
    Parameters:
        filename(str): Name of the file without the extension.
        path(str): Path where the filename is located.

    Returns:
        data(dict): A dictionary with the image & mask numpy array.
                    'image': The numpy array for image.
                    'mask' : The numpy array for the above image mask.
    """
    path = os.path.join(path, filename+'.mat')
    file = h5py.File(path, 'r')
    data = dict()
    data['image'] = np.array(file.get('cjdata/image'))
    data['mask'] = np.array(file.get('cjdata/tumorMask'))
    return data


def save_image_data(filename, path, data):
    """ Saves the image & mask array in png format.
    Parameters:
        filename(str): Name of the file without the extension.
        path(str): Path where the filename is to be saved.
        data(dict): A dictionary with the image & mask numpy array.
                    'image': The numpy array for image.
                    'mask' : The numpy array for the above image mask.

    Returns: None
    """
    path_image = os.path.join(path, filename+'.png')
    path_mask = os.path.join(path, filename+'_mask.png')
    mpimg.imsave(path_image, data['image'], cmap='gray', format='png')
    mpimg.imsave(path_mask, data['mask'], cmap='gray', format='png')


def main():
    # Total number of images
    total_images = 3064

    # Dataset paths
    data_read_path = os.path.join('dataset', 'mat_dataset')
    data_save_path = os.path.join('dataset', 'png_dataset')

    clear_screen()

    # Make if folder is missing.
    make_folder(data_save_path)

    print(f'Starting to save images in {data_save_path}')

    for filename in tqdm(range(1, total_images+1)):
        filename = str(filename)
        data = get_image_data(filename, data_read_path)
        save_image_data(str(int(filename)-1), data_save_path, data)

main()

Creating dataset/png_dataset folder
Starting to save images in dataset/png_dataset


100%|██████████| 3064/3064 [05:54<00:00,  8.65it/s]


## Organizar dataset en imagenes y labels


In [None]:
# Rutas de las carpetas original y destino
input_folder = '/content/dataset/png_dataset'
output_images_folder = '/content/dataset/imagenes'
output_labels_folder = '/content/dataset/labels'

# Crear las carpetas de salida si no existen
os.makedirs(output_images_folder, exist_ok=True)
os.makedirs(output_labels_folder, exist_ok=True)

# Lista de archivos en la carpeta de entrada
files = os.listdir(input_folder)

# Iterar sobre los archivos y moverlos a las carpetas correspondientes
for file in files:
    if file.endswith('.png'):
        if '_mask.png' in file:
            file_name_without_mask = file.replace('_mask.png', '.png')
            shutil.move(os.path.join(input_folder, file), os.path.join(output_labels_folder, file_name_without_mask))
        else:
            # Mover archivo a la carpeta de imágenes
            shutil.move(os.path.join(input_folder, file), os.path.join(output_images_folder, file))

## Comprimir carpetas para descargar Dataset

In [None]:
# Carpeta para contener las carpetas comprimidas
output_dataset_folder = '/content/fmri_dataset'

# Nombre del archivo ZIP de salida
output_zip_file = '/content/fmri_dataset'

shutil.rmtree(output_dataset_folder, ignore_errors=True)  # Elimina la carpeta si ya existe
shutil.copytree(output_images_folder, output_dataset_folder + '/imagenes')
shutil.copytree(output_labels_folder, output_dataset_folder + '/labels')

# Comprimir las carpetas en un archivo ZIP
shutil.make_archive(output_zip_file, 'zip', output_dataset_folder)

'/content/fmri_dataset.zip'