In [1]:
import pickle
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from sklearn.model_selection import train_test_split

In [2]:
# loading saved training data
with open("./dataset/training_data.pickle", "rb") as f:
    training_data = pickle.load(f)

In [3]:
# extracting labels and features from training data
labels_list, features_list = [], []
for X, y in training_data:
    labels_list.append(y)
    features_list.append(X)

In [4]:
class TumobrainorDataset(Dataset):
    def __init__(self, images, labels) -> None:
        self.X = images
        self.y = labels
        # function for images transformations
        self.random_transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize(256),
            transforms.RandomHorizontalFlip(),
            transforms.RandomVerticalFlip(),
            transforms.RandomRotation(degrees=45),
            transforms.ColorJitter(
                brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

    def __len__(self):
        return len(self.X)

    def __getitem__(self, index):
        num_augment = 8
        augmented_batch = []
        # creating augmented data
        for i in range(num_augment):
            new_item = self.random_transform(self.X[index])
            augmented_batch.append(new_item)
        # labels with one-hot encoding
        labels = torch.zeros(4, dtype=torch.float32)
        labels[int(self.y[index]) - 1] = 1

        new_labels = [labels, labels, labels,
                      labels, labels, labels, labels, labels]

        return torch.stack(augmented_batch), torch.stack(new_labels)

In [5]:
# 70 % training, 15% validating, 15% testing
X_train, X_test, y_train, y_test = train_test_split(features_list, labels_list, test_size=0.3, shuffle=True)  # 70% training, 30% testing
X_valid, X_test, y_valid, y_test = train_test_split(X_test, y_test, test_size=0.5, shuffle=True)  # split testing set into 50% validation , 50% testing 

In [7]:
# testing our dataset class
koten_set = TumobrainorDataset(X_valid, y_valid)
koten_loader = DataLoader(koten_set, batch_size=4, shuffle=True, pin_memory=True)
for X, y in koten_loader:
    print(X.shape, y.shape)

torch.Size([4, 8, 3, 224, 224]) torch.Size([4, 8, 4])
torch.Size([4, 8, 3, 224, 224]) torch.Size([4, 8, 4])
torch.Size([4, 8, 3, 224, 224]) torch.Size([4, 8, 4])
torch.Size([4, 8, 3, 224, 224]) torch.Size([4, 8, 4])
torch.Size([4, 8, 3, 224, 224]) torch.Size([4, 8, 4])
torch.Size([4, 8, 3, 224, 224]) torch.Size([4, 8, 4])
torch.Size([4, 8, 3, 224, 224]) torch.Size([4, 8, 4])
torch.Size([4, 8, 3, 224, 224]) torch.Size([4, 8, 4])
torch.Size([4, 8, 3, 224, 224]) torch.Size([4, 8, 4])
torch.Size([4, 8, 3, 224, 224]) torch.Size([4, 8, 4])
torch.Size([4, 8, 3, 224, 224]) torch.Size([4, 8, 4])
torch.Size([4, 8, 3, 224, 224]) torch.Size([4, 8, 4])
torch.Size([4, 8, 3, 224, 224]) torch.Size([4, 8, 4])
torch.Size([4, 8, 3, 224, 224]) torch.Size([4, 8, 4])
torch.Size([4, 8, 3, 224, 224]) torch.Size([4, 8, 4])
torch.Size([4, 8, 3, 224, 224]) torch.Size([4, 8, 4])
torch.Size([4, 8, 3, 224, 224]) torch.Size([4, 8, 4])
torch.Size([4, 8, 3, 224, 224]) torch.Size([4, 8, 4])
torch.Size([4, 8, 3, 224, 22

In [8]:
# creating datasets from numpy arrays
train_set = TumobrainorDataset(X_train, y_train)
valid_set = TumobrainorDataset(X_valid, y_valid)
test_set = TumobrainorDataset(X_test, y_test)

In [9]:
# creating data loaders from datasets
train_loader = DataLoader(train_set, batch_size=4, shuffle=True, pin_memory=True, drop_last=True)
valid_loader = DataLoader(valid_set, batch_size=4, shuffle=True, pin_memory=True, drop_last=True)
test_loader = DataLoader(test_set, batch_size=4, shuffle=True, pin_memory=True, drop_last=True)

In [10]:
# testing data loader
X,y = next(iter(test_loader))
X.shape, y.shape, y

(torch.Size([4, 8, 3, 224, 224]),
 torch.Size([4, 8, 4]),
 tensor([[[0., 0., 0., 1.],
          [0., 0., 0., 1.],
          [0., 0., 0., 1.],
          [0., 0., 0., 1.],
          [0., 0., 0., 1.],
          [0., 0., 0., 1.],
          [0., 0., 0., 1.],
          [0., 0., 0., 1.]],
 
         [[1., 0., 0., 0.],
          [1., 0., 0., 0.],
          [1., 0., 0., 0.],
          [1., 0., 0., 0.],
          [1., 0., 0., 0.],
          [1., 0., 0., 0.],
          [1., 0., 0., 0.],
          [1., 0., 0., 0.]],
 
         [[0., 1., 0., 0.],
          [0., 1., 0., 0.],
          [0., 1., 0., 0.],
          [0., 1., 0., 0.],
          [0., 1., 0., 0.],
          [0., 1., 0., 0.],
          [0., 1., 0., 0.],
          [0., 1., 0., 0.]],
 
         [[0., 1., 0., 0.],
          [0., 1., 0., 0.],
          [0., 1., 0., 0.],
          [0., 1., 0., 0.],
          [0., 1., 0., 0.],
          [0., 1., 0., 0.],
          [0., 1., 0., 0.],
          [0., 1., 0., 0.]]]))

In [11]:
# saving all data loaders
with open("./dataset/train_loader.pickle", "wb") as f:
    pickle.dump(train_loader, f)
with open("./dataset/valid_loader.pickle", "wb") as f:
    pickle.dump(valid_loader, f)
with open("./dataset/test_loader.pickle", "wb") as f:
    pickle.dump(test_loader, f)