In [1]:
# type: ignore
import os
import cv2
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img

### Important Functions

In [2]:
def list_image_paths(dirs: list[str]):
    resultant_image_paths = []

    for dir in dirs:
        image_paths = os.listdir(dir)
        if image_paths.count('.DS_Store'):
            image_paths.remove('.DS_Store')

        for i, name in enumerate(image_paths):
            image_paths[i] = f'{dir}/{name}'

        resultant_image_paths.extend(image_paths)

    return sorted(resultant_image_paths)

In [3]:
def are_identical_images(image_path_1: str, image_path_2: str) -> bool:
    img1 = cv2.imread(image_path_1)
    img2 = cv2.imread(image_path_2)

    if img1.shape == img2.shape:
        difference = cv2.subtract(img1, img2)
        b, g, r = cv2.split(difference)
        
        if cv2.countNonZero(b) == 0 and cv2.countNonZero(g) == 0 and cv2.countNonZero(r) == 0:
            print(f'{image_path_1} == {image_path_2}')
            return True

    return False

In [4]:
def remove_identical_images(image_paths: list[str]):
    for img_path_1 in image_paths:
        for img_path_2 in image_paths:
            if img_path_1 == img_path_2:
                continue
            if are_identical_images(img_path_1, img_path_2):
                image_paths.remove(img_path_2)
                os.remove(img_path_2)
    
    return image_paths

In [5]:
def rename_images(image_paths: list[str], initial_idx = 0, prefix = "", suffix = ""):
    n = initial_idx + len(image_paths)
    idxs = [(len(str(n)) - len(str(idx))) * '0' + str(idx) for idx in range(initial_idx, n)]

    for idx, image_path in enumerate(image_paths):
        path, filename = os.path.split(image_path)
        ext = os.path.splitext(filename)[1]
        new_filename = prefix + (len(str(n)) - len(str(initial_idx + idx))) * '0' + str(initial_idx + idx) + suffix + ext
        new_image_path = os.path.join(path, new_filename)
        os.rename(image_path, new_image_path)

In [6]:
def move_images(source_image_paths: list[str], destination_dir: str):
    for image_path in source_image_paths:
            image = os.path.split(image_path)[-1]
            destination_path = os.path.join(destination_dir, image)
            os.rename(image_path, destination_path)

In [7]:
def refine_image_names(dirs: list[str]):
    for dir in dirs:
        image_paths = list_image_paths([dir])
        rename_images(image_paths, initial_idx=0)

In [8]:
datagen = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest'
)

In [9]:
def generate_images(image_paths: list[str], n_images_per_image = 5):
    for image_path in image_paths:
        image = load_img(image_path)
        image = [image]
        image = np.array(image)
        n = 0
        dir = os.path.split(image_path)[0]
        for batch in datagen.flow(image, batch_size=1, save_to_dir=dir, save_format='jpg'):
            n += 1
            if n > n_images_per_image:
                break

In [10]:
def resize_images(image_paths: list[str], size: tuple[int, int]):
    for image_path in image_paths:
        image = load_img(image_path)
        image = image.resize(size)
        image.save(image_path)

In [11]:
def replace_image_ext_with_txt(file_path: str):
    return os.path.splitext(file_path)[0] + '.txt'

In [12]:
def add_text_file_with(text: str, file_path: str):
    with open(file_path, 'w', encoding='utf-8') as f:
        f.write(text)

---

### Augment Images

In [None]:
# generate_images(
#     image_paths = list_image_paths(['./Data/test/pituitary_tumor'])[123:130],
#     n_images_per_image = 4
# )

### Refine Image Names

In [None]:
# refine_image_names([
#     './Data/test/pituitary_tumor',
# ])

### Resize Images

In [None]:
# resize_images(
#     image_paths = list_image_paths(['./Data/test/pituitary_tumor']),
#     size = (256, 256)
# )

### Generate Placeholder Text Files

In [None]:
glioma_tumor_image_paths = list_image_paths([
    './Data/train/glioma_tumor',
    './Data/test/glioma_tumor'
])

meningioma_tumor_image_paths = list_image_paths([
    './Data/train/meningioma_tumor',
    './Data/test/meningioma_tumor'
])

pituitary_tumor_image_paths = list_image_paths([
    './Data/train/pituitary_tumor',
    './Data/test/pituitary_tumor'
])

no_tumor_image_paths = list_image_paths([
    './Data/train/no_tumor',
    './Data/test/no_tumor'
])

# for image_path in glioma_tumor_image_paths:
#     add_text_file_with("Brain Tumor Report: This image shows that the patient has a glioma tumor in the brain.", replace_image_ext_with_txt(image_path))

# for image_path in meningioma_tumor_image_paths:
#     add_text_file_with("Brain Tumor Report: This image shows that the patient has a meningioma tumor in the brain.", replace_image_ext_with_txt(image_path))

# for image_path in pituitary_tumor_image_paths:
#     add_text_file_with("Brain Tumor Report: This image shows that the patient has a pituitary tumor in the brain.", replace_image_ext_with_txt(image_path))

# for image_path in no_tumor_image_paths:
#     add_text_file_with("Brain Tumor Report: This image shows that the patient has no tumor in the brain.", replace_image_ext_with_txt(image_path))