In [None]:
from google.colab import drive
drive.mount("/content/drive")
import os
os.chdir("/content/drive/MyDrive/Colab Notebooks/Data Science Group Project/data/processed/mri")
print(os.getcwd())
import cv2
import matplotlib.pyplot as plt
from pathlib import Path
import numpy as np
from tqdm import tqdm

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/.shortcut-targets-by-id/1X35NgpoRZLTS0jivDsMTksGG26Hr7xU5/data/processed/mri


In [None]:
def show_image(img):
  fig, ax = plt.subplots(figsize=(6, 6))
  ax.imshow(img, cmap="grey")
  ax.axis("off")

In [None]:
class Preprocessing:
  def __init__(self, raw_dataset_path, processed_dataset_path, image_size):
    self.image_size = image_size
    self.raw_dataset_path = raw_dataset_path
    self.processed_dataset_path = processed_dataset_path
    self.source_folders = [f for f in Path(raw_dataset_path).rglob("*") if f.is_dir() and any(p.is_file() for p in f.iterdir())]

  def make_directory(self, source):
    processed_folder = Path(self.processed_dataset_path) / source.relative_to(self.raw_dataset_path)
    processed_folder.mkdir(parents=True, exist_ok=True)
    return processed_folder

  @staticmethod
  def denoise_img(img):
    if img.dtype != np.uint8:
      img_min, img_max = img.min(), img.max()
      if img_max <= 1.0:
        img = (img * 255).astype(np.uint8)
      else:
        img = img.astype(np.uint8)

    return cv2.fastNlMeansDenoising(img, h=10)

  def preprocess_image(self, img):
    img = cv2.resize(img, self.image_size, interpolation=cv2.INTER_LINEAR)

    img = self.denoise_img(img)

    return img

  def preprocess_images(self):
    for source in self.source_folders:
      processed_folder = self.make_directory(source)

      print(f"Processing: {source}")
      print(f"Outputting to : {processed_folder}")

      for img in tqdm(source.glob("*")):
        image = cv2.imread(str(img), cv2.IMREAD_GRAYSCALE)

        if image is None:
          print(f"Warning: could not read {img}, skipping")
          continue

        processed_img = self.preprocess_image(image)

        output_path = processed_folder / img.name
        cv2.imwrite(str(output_path), processed_img)

In [None]:
raw = "raw"
processed = "processed_nlmd_224"
IMAGE_SIZE = (224, 224)

preprocessor = Preprocessing(raw_dataset_path=raw, processed_dataset_path=processed, image_size=IMAGE_SIZE)
preprocessor.preprocess_images()

Processing: raw/train/glioma
Outputting to : processed_nlmd_224/train/glioma


1278it [02:40,  7.96it/s]


Processing: raw/train/meningioma
Outputting to : processed_nlmd_224/train/meningioma


1197it [02:31,  7.92it/s]


Processing: raw/train/pituitary
Outputting to : processed_nlmd_224/train/pituitary


706it [01:30,  7.80it/s]


Processing: raw/val/glioma
Outputting to : processed_nlmd_224/val/glioma


365it [00:44,  8.13it/s]


Processing: raw/val/meningioma
Outputting to : processed_nlmd_224/val/meningioma


342it [00:40,  8.46it/s]


Processing: raw/val/pituitary
Outputting to : processed_nlmd_224/val/pituitary


201it [00:25,  7.96it/s]


Processing: raw/test/glioma
Outputting to : processed_nlmd_224/test/glioma


183it [00:21,  8.68it/s]


Processing: raw/test/meningioma
Outputting to : processed_nlmd_224/test/meningioma


171it [00:21,  8.06it/s]


Processing: raw/test/pituitary
Outputting to : processed_nlmd_224/test/pituitary


102it [00:12,  8.17it/s]
