In [1]:
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import os
import cv2
from utils import get_project_root

In [2]:
labels = ['glioma_tumor', 'meningioma_tumor', 'no_tumor', 'pituitary_tumor']
label_mapping = {
    'glioma_tumor': 0,
    'meningioma_tumor': 1,
    'no_tumor': 2,
    'pituitary_tumor': 3
}

image_size = 256

In [6]:
X_train = []
y_train = []

for label in labels:
    path = f"../data/Training/{label}"
    for filename in tqdm(os.listdir(path)):
        img_path = f"{path}/{filename}"
        img = cv2.imread(img_path)
        if img is not None:
            img = cv2.resize(img, (image_size, image_size))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            img = np.expand_dims(img, axis=0)
            X_train.append(img)
            y_train.append(label_mapping[label])

100%|██████████| 826/826 [00:00<00:00, 1252.44it/s]
100%|██████████| 822/822 [00:00<00:00, 1418.78it/s]
100%|██████████| 395/395 [00:00<00:00, 1413.89it/s]
100%|██████████| 827/827 [00:00<00:00, 1229.90it/s]


In [15]:
X_val = []
y_val = []

for label in labels:
    train_path = "data/Testing"
    path = os.path.join(get_project_root, train_path, label)
    for filename in tqdm(os.listdir(path)):
        img_path = f"{path}/{filename}"
        img = cv2.imread(img_path)
        if img is not None:
            img = cv2.resize(img, (image_size, image_size))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            img = np.expand_dims(img, axis=0)
            X_val.append(img)
            y_val.append(label_mapping[label])

100%|██████████| 100/100 [00:00<00:00, 1090.57it/s]
100%|██████████| 115/115 [00:00<00:00, 1452.30it/s]
100%|██████████| 105/105 [00:00<00:00, 2362.68it/s]
100%|██████████| 74/74 [00:00<00:00, 538.59it/s]


In [16]:
X_train = np.array(X_train)
y_train = np.array(y_train)
X_val = np.array(X_val)
y_val = np.array(y_val)

In [12]:
X_train[0]

np.save("tmp.npy", X_train[0])

In [18]:
import torch
from torch.utils.data import TensorDataset, DataLoader

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)


X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)

val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

In [10]:
from versioning import save_data, get_next_version

print(get_next_version(type="data"))

save_data(train_dataset, val_dataset, version=get_next_version(type="data"))


v1.0.0
Dataset version v1.0.0 saved!
