In [1]:
import os
import matplotlib.pyplot as plt
import cv2
import random
import time
import achintya_toolkit as ach
from tqdm import tqdm
import shutil

import tensorflow as tf
import numpy as np

In [2]:
parent_directory = "Brain Tumor Classification 2D"

for i in os.listdir(parent_directory):
    print(f'{i} ---> {len(os.listdir(os.path.join(parent_directory, i)))}')

Astrocytoma ---> 354
Glioblastoma ---> 1462
Gliomas ---> 2474
Meningioma ---> 1280
Neurocytoma ---> 388


In [3]:
ach.preprocess_images(parent_directory, remove=False)

Processing Astrocytoma...
Processing Glioblastoma...
Processing Gliomas...
Processing Meningioma...
Processing Neurocytoma...
--------------------------------------------------------------------------------
Remaining image in Astrocytoma are 1058
Remaining image in Glioblastoma are 1464
Remaining image in Gliomas are 2474
Remaining image in Meningioma are 1280
Remaining image in Neurocytoma are 538


In [4]:
#Remove bad paths

import glob
import os

img_paths = glob.glob(os.path.join(parent_directory,'*/*.*')) # assuming you point to the directory containing the label folders.

for image_path in img_paths:
    try:
        img_bytes = tf.io.read_file(image_path)
        decoded_img = tf.io.decode_image(img_bytes)
    except tf.errors.InvalidArgumentError as e:
        print(f"Found bad path {image_path}...{e}. Removing...")
        #os.remove(image_path)

Found bad path Brain Tumor Classification 2D\Astrocytoma\Astrocytoma_T1CE_000.tif...{{function_node __wrapped__DecodeImage_device_/job:localhost/replica:0/task:0/device:CPU:0}} Unknown image file format. One of JPEG, PNG, GIF, BMP required. [Op:DecodeImage] name: . Removing...
Found bad path Brain Tumor Classification 2D\Astrocytoma\Astrocytoma_T1CE_000_mask.tif...{{function_node __wrapped__DecodeImage_device_/job:localhost/replica:0/task:0/device:CPU:0}} Unknown image file format. One of JPEG, PNG, GIF, BMP required. [Op:DecodeImage] name: . Removing...
Found bad path Brain Tumor Classification 2D\Astrocytoma\Astrocytoma_T1CE_001.tif...{{function_node __wrapped__DecodeImage_device_/job:localhost/replica:0/task:0/device:CPU:0}} Unknown image file format. One of JPEG, PNG, GIF, BMP required. [Op:DecodeImage] name: . Removing...
Found bad path Brain Tumor Classification 2D\Astrocytoma\Astrocytoma_T1CE_001_mask.tif...{{function_node __wrapped__DecodeImage_device_/job:localhost/replica:0/t

In [11]:
images = []
class_dir = r"Brain Tumor Classification 2D\Glioblastoma"

for i in os.listdir(class_dir):

    if "mask" not in i: 
        images.append(i)
        
len(images)

732

In [13]:
img_name = random.choice(images)
img_path = os.path.join(class_dir, img_name)
mask_path = os.path.join(class_dir, img_name[:-4] + "_mask" + img_name[-4:])

img = cv2.imread(img_path)
mask = cv2.imread(mask_path)

print(img_name)
print(mask_path)
print(img.shape)
print(mask.shape)

masked_image = cv2.addWeighted(img, 0.2, mask, 0.8, 0)
display_image = cv2.hconcat([img, masked_image])

cv2.imshow("astrocytoma", display_image)
cv2.waitKey(0)
cv2.destroyAllWindows() 

#Error shows that some masks have different name formatting

Glioblastoma_flair_182.tif
Brain Tumor Classification 2D\Glioblastoma\Glioblastoma_flair_182_mask.tif
(240, 240, 3)


AttributeError: 'NoneType' object has no attribute 'shape'

In [18]:
img = cv2.imread(r"Brain Tumor Classification 2D\Glioblastoma\Glioblastoma_flair_000mask_.tif")

for i in np.array(img):
    for j in i:
        if(j[0] != 0):
            print(j)
            break

[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]
[255 255 255]


### Setting up directory structure and preprocessing

In [3]:
def make_class_folder(main_path, class_name):

    if not os.path.exists(os.path.join(main_path, class_name)):
        os.mkdir(os.path.join(main_path, class_name))

In [6]:
parent_directory = "Brain Tumor Classification 2D"
dataset_directory = "Dataset"
dataset_directory_images = os.path.join(dataset_directory, "images")
dataset_directory_masks = os.path.join(dataset_directory, "masks")

paths = [dataset_directory, dataset_directory_images, dataset_directory_masks]

for path in paths: 
    if not os.path.exists(path):
        os.mkdir(path)


for main_folder in os.listdir(dataset_directory):
    for class_name in os.listdir(parent_directory):
        make_class_folder(os.path.join(dataset_directory, main_folder), class_name)


In [9]:
for class_name in tqdm(os.listdir(parent_directory)): 
    print(f"Processing {class_name}")

    for image in os.listdir(os.path.join(parent_directory, class_name)):
        if "mask" in image:
            continue
        else:
            file_name, extension = os.path.splitext(image)
            print(f"Found {image}")

            mask_name = file_name + "_mask"
            alt_mask_name = file_name + "mask_"

            if os.path.exists(os.path.join(parent_directory, class_name, mask_name + extension)):
                print(f"Found {mask_name}")
            elif os.path.exists(os.path.join(parent_directory, class_name, alt_mask_name + extension)):
                print(f"Found {alt_mask_name}")
                print("Renaming...")
                os.rename(os.path.join(parent_directory, class_name, alt_mask_name + extension), os.path.join(parent_directory, class_name, mask_name + extension))
            else:
                print(f"No mask found for {image}")
                continue

            img_path = os.path.join(parent_directory, class_name, image)
            mask_path = os.path.join(parent_directory, class_name, mask_name + extension)

            img = cv2.imread(img_path)
            mask = cv2.imread(mask_path)

            if img.shape == mask.shape:
                print("shape matched")
                print("Moving files...")
                shutil.move(os.path.join(parent_directory, class_name, image), os.path.join(dataset_directory_images, class_name))
                shutil.move(os.path.join(parent_directory, class_name, mask_name + extension), os.path.join(dataset_directory_masks, class_name))
            else:
                print("Shape Mismatch")

100%|██████████| 5/5 [00:00<00:00, 32.01it/s]

Processing Astrocytoma
Processing Glioblastoma
Processing Gliomas
Processing Meningioma
Processing Neurocytoma
Found 1.tif
No mask found for 1.tif
Found 106.tif
No mask found for 106.tif
Found 109.tif
No mask found for 109.tif
Found 11.tif
No mask found for 11.tif
Found 111.tif
No mask found for 111.tif
Found 112.tif
No mask found for 112.tif
Found 114.tif
No mask found for 114.tif
Found 116.tif
No mask found for 116.tif
Found 117.tif
No mask found for 117.tif
Found 119.tif
No mask found for 119.tif
Found 12.tif
No mask found for 12.tif
Found 120.tif
No mask found for 120.tif
Found 121.tif
No mask found for 121.tif
Found 122.tif
No mask found for 122.tif
Found 125.tif
No mask found for 125.tif
Found 13.tif
No mask found for 13.tif
Found 131.tif
No mask found for 131.tif
Found 132.tif
No mask found for 132.tif
Found 133.tif
No mask found for 133.tif
Found 137.tif
No mask found for 137.tif
Found 138.tif
No mask found for 138.tif
Found 139.tif
No mask found for 139.tif
Found 141.tif
No ma




In [14]:
#Additional processing needed for "Neurocytoma due to different naming"

neurocytoma_directory = r"Brain Tumor Classification 2D\Neurocytoma"
class_name = "Neurocytoma"

for image in os.listdir(neurocytoma_directory):

    if "mask" in image:
        continue
    else:
        file_name, extension = os.path.splitext(image)
        print(f"Found {image}")

        mask_name = "T2_" + file_name + "_mask"
        req_mask_name = file_name + "_mask"

        if os.path.exists(os.path.join(neurocytoma_directory, mask_name + extension)):
            print(f"Found {mask_name}")
            print("Renaming...")
            os.rename(os.path.join(neurocytoma_directory, mask_name + extension), os.path.join(neurocytoma_directory, req_mask_name + extension))
        else:
            print(f"No mask found for {image}")
            continue

        img_path = os.path.join(neurocytoma_directory, image)
        mask_path = os.path.join(neurocytoma_directory, req_mask_name + extension)

        img = cv2.imread(img_path)
        mask = cv2.imread(mask_path)

        if img.shape == mask.shape:
            print("shape matched")
            print("Moving files...")
            shutil.move(os.path.join(neurocytoma_directory, image), os.path.join(dataset_directory_images, class_name))
            shutil.move(os.path.join(neurocytoma_directory, req_mask_name + extension), os.path.join(dataset_directory_masks, class_name))
        else:
            print("Shape Mismatch")

Found 1.tif
Found T2_1_mask
Renaming...
Shape Mismatch
Found 106.tif
No mask found for 106.tif
Found 109.tif
No mask found for 109.tif
Found 11.tif
No mask found for 11.tif
Found 111.tif
No mask found for 111.tif
Found 112.tif
No mask found for 112.tif
Found 114.tif
No mask found for 114.tif
Found 116.tif
No mask found for 116.tif
Found 117.tif
No mask found for 117.tif
Found 119.tif
No mask found for 119.tif
Found 12.tif
Found T2_12_mask
Renaming...
Shape Mismatch
Found 120.tif
No mask found for 120.tif
Found 121.tif
No mask found for 121.tif
Found 122.tif
No mask found for 122.tif
Found 125.tif
No mask found for 125.tif
Found 13.tif
No mask found for 13.tif
Found 131.tif
No mask found for 131.tif
Found 132.tif
No mask found for 132.tif
Found 133.tif
No mask found for 133.tif
Found 137.tif
No mask found for 137.tif
Found 138.tif
No mask found for 138.tif
Found 139.tif
No mask found for 139.tif
Found 141.tif
No mask found for 141.tif
Found 143.tif
No mask found for 143.tif
Found 145.ti

In [21]:
print("The following is the final image count per class")

import time

for class_name in tqdm(os.listdir(os.path.join(dataset_directory_images))):
    print(f"{class_name} ------ {len(os.listdir(os.path.join(dataset_directory_images, class_name)))}")

The following is the final image count per class


100%|██████████| 5/5 [00:00<00:00, 793.26it/s]

Astrocytoma ------ 529
Glioblastoma ------ 732
Gliomas ------ 1237
Meningioma ------ 640
Neurocytoma ------ 176



