<a href="https://colab.research.google.com/github/Dylan-Geraci/neuroimaging-tumor-detector/blob/main/notebooks/01_explore_and_clean.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import Libraries

In [2]:
import pandas as pd
import numpy as np
from google.colab import drive
from torchvision import datasets, transforms
from sklearn.model_selection import StratifiedShuffleSplit
from torch.utils.data import Dataset, DataLoader, Subset

Set Training Data Path

In [11]:
drive.mount('/content/drive')
train_path = "/content/drive/MyDrive/neuro-imaging/data/Training"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Define Transformations

In [12]:
train_tfms = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])

val_tfms = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])

In [13]:
base = datasets.ImageFolder(train_path)  # no transform here
y = np.array([label for _, label in base.samples])

In [14]:
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.15, random_state=42)
train_idx, val_idx = next(sss.split(np.zeros(len(y)), y))

In [15]:
class TransformSubset(Dataset):
    def __init__(self, base_ds, indices, transform):
        self.base = base_ds
        self.indices = indices
        self.transform = transform
    def __len__(self): return len(self.indices)
    def __getitem__(self, i):
        path, label = self.base.samples[self.indices[i]]
        img = self.base.loader(path)           # PIL image
        if self.transform: img = self.transform(img)
        return img, label

train_ds = TransformSubset(base, train_idx, train_tfms)
val_ds   = TransformSubset(base, val_idx,   val_tfms)

In [16]:
train_dl = DataLoader(train_ds, batch_size=32, shuffle=True,  num_workers=2, pin_memory=True)
val_dl   = DataLoader(val_ds,   batch_size=32, shuffle=False, num_workers=2, pin_memory=True)

In [17]:
classes = base.classes
print(classes)

['glioma', 'meningioma', 'notumor', 'pituitary']
