In [1]:
from google.colab import drive
drive.mount("/content/drive")
import os
os.chdir("/content/drive/MyDrive/Colab Notebooks/Data Science Group Project/data/processed/mri")
print(os.getcwd())
import cv2
import matplotlib.pyplot as plt
from pathlib import Path
import torch
import numpy as np
from tqdm import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2

Mounted at /content/drive
/content/drive/.shortcut-targets-by-id/1X35NgpoRZLTS0jivDsMTksGG26Hr7xU5/data/processed/mri


In [3]:
raw = "raw"
processed1 = "processed_nlmd_224"
processed2 = "processed_clahe_224"
IMAGE_SIZE = (224, 224)
classes = ['glioma', 'meningioma', 'pituitary']
transform = "transform_v4"

In [4]:
train_transform = A.Compose([
    A.Rotate(limit=10, border_mode=cv2.BORDER_REFLECT, p=0.5),

    A.HorizontalFlip(p=0.5),

    ToTensorV2()
], additional_targets={'image0': 'image', 'image1': 'image'})

val_test_transform = A.Compose([ToTensorV2()], additional_targets={'image0': 'image', 'image1': 'image'})

def zscore_norm_tensor(x):
    x = x.float() / 255.0
    mean = x.mean(dim=[1, 2], keepdim=True)
    std = x.std(dim=[1, 2], keepdim=True, unbiased=False)
    return (x - mean) / (std + 1e-8)

In [5]:
class Transforming:
    def __init__(self, raw_dataset_path, processed_dataset_path_1, processed_dataset_path_2, image_size, transform, apply_to, classes, save_to):
        self.raw_dataset_path = Path(raw_dataset_path)
        self.processed_dataset_path_1 = Path(processed_dataset_path_1)
        self.processed_dataset_path_2 = Path(processed_dataset_path_2)
        self.image_size = image_size
        self.transform = transform
        self.apply_to = apply_to
        self.save_to = Path(save_to)
        self.class_to_idx = {cls: i for i, cls in enumerate(classes)}
        self.source_folders = [f for f in self.raw_dataset_path.rglob("*") if f.is_dir() and (str(f.parent.name) == self.apply_to) and (any(p.is_file() for p in f.iterdir()))]

    def transform_images(self):
        raw_imgs, proc_imgs_1, proc_imgs_2, labels = [], [], [], []

        for source in self.source_folders:
            for img_file in tqdm(list(source.iterdir()), desc=f"Processing {source.name}"):
                raw_img = cv2.imread(str(img_file), cv2.IMREAD_GRAYSCALE)
                proc_img_1 = cv2.imread(str(self.processed_dataset_path_1 / source.relative_to(source.parent.parent) / img_file.name), cv2.IMREAD_GRAYSCALE)
                proc_img_2 = cv2.imread(str(self.processed_dataset_path_2 / source.relative_to(source.parent.parent) / img_file.name), cv2.IMREAD_GRAYSCALE)

                raw_img = cv2.resize(raw_img, self.image_size)
                proc_img_1 = cv2.resize(proc_img_1, self.image_size)
                proc_img_2 = cv2.resize(proc_img_2, self.image_size)

                # Add channel dim
                raw_img = raw_img[..., None]
                proc_img_1 = proc_img_1[..., None]
                proc_img_2 = proc_img_2[..., None]

                augmented = self.transform(image=raw_img, image0=proc_img_1, image1=proc_img_2)
                raw_tensor = augmented['image']
                proc_tensor_1 = augmented['image0']
                proc_tensor_2 = augmented['image1']

                # Optional: z-score normalization
                raw_tensor = zscore_norm_tensor(raw_tensor)
                proc_tensor_1 = zscore_norm_tensor(proc_tensor_1)
                proc_tensor_2 = zscore_norm_tensor(proc_tensor_2)

                raw_imgs.append(raw_tensor)
                proc_imgs_1.append(proc_tensor_1)
                proc_imgs_2.append(proc_tensor_2)
                labels.append(self.class_to_idx[source.name])

        raw_imgs = torch.stack(raw_imgs)
        proc_imgs_1 = torch.stack(proc_imgs_1)
        proc_imgs_2 = torch.stack(proc_imgs_2)
        labels = torch.tensor(labels, dtype=torch.long)

        self.save_to.mkdir(parents=True, exist_ok=True)

        torch.save((raw_imgs, proc_imgs_1, proc_imgs_2, labels), f"{self.save_to}/{self.apply_to}.pt")

In [6]:
transforming_train = Transforming(raw, processed1, processed2, IMAGE_SIZE, train_transform, "train", classes, transform)
transforming_train.transform_images()

Processing glioma: 100%|██████████| 1278/1278 [14:56<00:00,  1.43it/s]
Processing meningioma: 100%|██████████| 1197/1197 [13:33<00:00,  1.47it/s]
Processing pituitary: 100%|██████████| 706/706 [08:27<00:00,  1.39it/s]


In [7]:
transforming_val = Transforming(raw, processed1, processed2, IMAGE_SIZE, val_test_transform, "val", classes, transform)
transforming_val.transform_images()

Processing glioma: 100%|██████████| 365/365 [04:01<00:00,  1.51it/s]
Processing meningioma: 100%|██████████| 342/342 [03:53<00:00,  1.47it/s]
Processing pituitary: 100%|██████████| 201/201 [02:24<00:00,  1.39it/s]


In [8]:
transforming_test = Transforming(raw, processed1, processed2, IMAGE_SIZE, val_test_transform, "test", classes, transform)
transforming_test.transform_images()

Processing glioma: 100%|██████████| 183/183 [02:01<00:00,  1.51it/s]
Processing meningioma: 100%|██████████| 171/171 [01:48<00:00,  1.58it/s]
Processing pituitary: 100%|██████████| 102/102 [01:12<00:00,  1.41it/s]
