# Image Preprocessing

This dataset was compiled and uploaded to Kaggle by **[Masoud Nickparvar](https://www.kaggle.com/datasets/masoudnickparvar/brain-tumor-mri-dataset)** from the following sources: *figshare* , *SARTAJ dataset*, and *Br35H*.

There are two functions:  `process_image` and `perform_processing`.

- `process_image` performs a series of operations on the image in order to prep it for Deep Learning.

- `perform_processing` applies the `process_image` function on a batch of images found in a directory defined by the argument `input_dir`





In [None]:
import os
import time
import glob
from PIL import Image
from tqdm.auto import tqdm
import numpy as np
from skimage import io, color, exposure, filters, restoration, util
from skimage.transform import resize
from skimage.filters import unsharp_mask

In [None]:
def process_image(img, target_size=(256, 256)):

    if img.ndim == 2:
        img = color.gray2rgb(img)


    img= color.rgb2gray(img)


    img_resized = resize(img, target_size, anti_aliasing=True)

    # Contrast Enhancement (CLAHE)
    img_enhanced = exposure.equalize_adapthist(img_resized)


    img_denoised = restoration.denoise_wavelet(img_enhanced,sigma=0.1, mode='soft',rescale_sigma=True)

    # Unsharp Masking
    img_sharpened = unsharp_mask(
        img_denoised,
        radius=1,
        amount=1
    )


    img_normalized = (img_sharpened - np.min(img_sharpened)) / (np.max(img_sharpened) - np.min(img_sharpened))

    return util.img_as_ubyte(img_normalized) # 8-bit format

In [None]:
data = ['Training', 'Testing']
classes = ['glioma', 'notumor','meningioma','pituitary']

input_dir = "/path/to/your/dataset/"


def perform_processing(input_dir: str, data: str, classes : str):
    dir = f'{input_dir}/{data}/{classes}/'
    images = glob.glob(os.path.join(dir, "*.jpg"))
    with tqdm(total=len(images), desc="Processing images") as pbar:
        for filename in images:
            input_path = os.path.join(dir, filename)

            img = Image.open(input_path).convert('RGB')
            img = np.array(img)

            processed = process_image(img, target_size=(256, 256))
            pbar.update(1)

        return f"Processed {classes} for the {data} dataset"

In [None]:
perform_processing(input_dir,data[1],classes[3])

Processing images:   0%|          | 0/300 [00:00<?, ?it/s]

'Processed pituitary for the Testing dataset'