In [63]:
# https://github.com/aladdinpersson/Machine-Learning-Collection/blob/master/ML/Pytorch/Basics/custom_dataset/custom_dataset.py
import torch
import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
import torch.optim as optim  # For all Optimization algorithms, SGD, Adam, etc.
import torchvision.transforms as transforms  # Transformations we can perform on our dataset
import torchvision
import os
import pandas as pd
# from skimage import io
from PIL import Image
import matplotlib.pyplot as plt
from torch.utils.data import (
    Dataset,
    DataLoader,
)

In [64]:
class TumorDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.annotations = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        img_name = self.annotations.id[index] + '.tif'
        img_path = os.path.join(self.root_dir, img_name)
        image = Image.open(img_path)
        y_label = torch.tensor(self.annotations.label[index])

        if self.transform:
            image = self.transform(image)

        return (image, y_label)

In [65]:
DIR = '../../input/'

In [71]:
transform = transforms.Compose([
    transforms.CenterCrop(64),
    transforms.ToTensor(),  
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [72]:
dataset = TumorDataset(DIR + 'tumor_train_labels.csv', 
                      DIR + 'tumor_data/', transform=transform)

In [73]:
dataloader = DataLoader(dataset=dataset, 
                        batch_size=32, shuffle=True)

In [74]:
len(dataset)

160000

In [75]:
for i, (data, label) in enumerate(dataloader):
    print(label)
    break

tensor([1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1,
        0, 0, 1, 0, 1, 1, 1, 0])
