In [325]:
from google.colab import drive
drive.mount("/content/drive")

import cv2
import torch
from pathlib import Path
import albumentations as A
from albumentations.pytorch import ToTensorV2
import numpy as np
from tqdm import tqdm

import os

os.chdir("/content/drive/MyDrive/Colab Notebooks/Data Science Group Project/data/processed/mri")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [326]:
image_size = (224, 224)

train_transform = A.Compose([
    A.Rotate(limit=10, border_mode=cv2.BORDER_REFLECT, p=0.5),

    A.HorizontalFlip(p=0.5),

    ToTensorV2()
], additional_targets={'image0': 'image'})

val_test_transform = A.Compose([ToTensorV2()], additional_targets={'image0': 'image'})


def zscore_norm_tensor(x):
    x = x.float() / 255.0
    mean = x.mean(dim=[1, 2], keepdim=True)
    std = x.std(dim=[1, 2], keepdim=True, unbiased=False)
    return (x - mean) / (std + 1e-8)

In [348]:
class Transforming:
    def __init__(self, raw_dataset_path, processed_dataset_path, image_size, transform, apply_to, classes, save_to):
        self.raw_dataset_path = Path(raw_dataset_path)
        self.processed_dataset_path = Path(processed_dataset_path)
        self.image_size = image_size
        self.transform = transform
        self.apply_to = apply_to
        self.save_to = Path(save_to)
        self.class_to_idx = {cls: i for i, cls in enumerate(classes)}
        self.source_folders = [f for f in self.raw_dataset_path.rglob("*") if f.is_dir() and (str(f.parent.name) == self.apply_to) and (any(p.is_file() for p in f.iterdir()))]

    def transform_images(self):
        raw_imgs, proc_imgs, labels = [], [], []

        for source in self.source_folders:
            for img_file in tqdm(list(source.iterdir()), desc=f"Processing {source.name}"):
                raw_img = cv2.imread(str(img_file), cv2.IMREAD_GRAYSCALE)
                proc_img = cv2.imread(str(self.processed_dataset_path / source.relative_to(source.parent.parent) / img_file.name), cv2.IMREAD_GRAYSCALE)

                raw_img = cv2.resize(raw_img, self.image_size)
                proc_img = cv2.resize(proc_img, self.image_size)

                # Add channel dim
                raw_img = raw_img[..., None]
                proc_img = proc_img[..., None]

                augmented = self.transform(image=raw_img, image0=proc_img)
                raw_tensor = augmented['image']
                proc_tensor = augmented['image0']

                # Optional: z-score normalization
                raw_tensor = zscore_norm_tensor(raw_tensor)
                proc_tensor = zscore_norm_tensor(proc_tensor)

                raw_imgs.append(raw_tensor)
                proc_imgs.append(proc_tensor)
                labels.append(self.class_to_idx[source.name])

        raw_imgs = torch.stack(raw_imgs)
        proc_imgs = torch.stack(proc_imgs)
        labels = torch.tensor(labels, dtype=torch.long)

        self.save_to.mkdir(parents=True, exist_ok=True)

        torch.save((raw_imgs, proc_imgs, labels), f"{self.save_to}/{self.apply_to}.pt")

In [349]:
classes = ['glioma', 'meningioma', 'pituitary']

In [350]:
transforming_train = Transforming("raw", "processed_nlmd_224", image_size, train_transform, "train", classes, "transform_v1")
transforming_train.transform_images()

Processing glioma: 100%|██████████| 1278/1278 [00:35<00:00, 35.55it/s]
Processing meningioma: 100%|██████████| 1197/1197 [00:27<00:00, 42.91it/s]
Processing pituitary: 100%|██████████| 706/706 [00:16<00:00, 43.17it/s]


In [351]:
transforming_val = Transforming("raw", "processed_nlmd_224", image_size, val_test_transform, "val", classes, "transform_v1")
transforming_val.transform_images()

Processing glioma: 100%|██████████| 365/365 [00:07<00:00, 51.39it/s]
Processing meningioma: 100%|██████████| 342/342 [00:06<00:00, 51.28it/s]
Processing pituitary: 100%|██████████| 201/201 [00:03<00:00, 52.47it/s]


In [352]:
transforming_test = Transforming("raw", "processed_nlmd_224", image_size, val_test_transform, "test", classes, "transform_v1")
transforming_test.transform_images()

Processing glioma: 100%|██████████| 183/183 [00:03<00:00, 53.08it/s]
Processing meningioma: 100%|██████████| 171/171 [00:04<00:00, 34.50it/s]
Processing pituitary: 100%|██████████| 102/102 [00:01<00:00, 51.98it/s]
