<div style='background: pink; font-size: 74px; font-weight: bold'>LOADING CUSTOM IMAGE DATASETS</div>

In [1]:
import torch # Import Pytorch
from torch.utils.data import Dataset, DataLoader # The base class to be inherited by the dataloading function

<div style='background: pink; font-size: 34px; font-weight: bold'>Custom dataloading function</div>

In [2]:
# KEY POINTS
# CustomDataset must override __len__(), and __getitem__()
# __len__() returns the size of the dataset e.g., len(dDataset)
#__getitem__() for indexing such that dataset[i] is the ith example

class CustomDataset(Dataset): # Inherit the dataset class
    def __init__(self, image_paths):
        self.image_paths = image_paths
 
    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx): # Returns a tuple
        image_filepath = self.image_paths[idx] 
        label = image_filepath.split('/')[-2]
        label = class_to_idx[label]

        
        return image, label



    

<div style='background: pink; font-size: 34px; font-weight: bold'>1). Example One (Loading tabular data)</div>

In [3]:
import pandas as pd
import numpy as np

# Custom data loader
class TabDataLoader(Dataset):
    def __init__(self, root_dir):
        self.root_dir = root_dir
        self.dataset = pd.read_csv(self.root_dir)
    def __len__(self):
        return len(self.dataset)
    def __getitem__(self, idx):
        X = self.dataset.iloc[idx, :2].values
        y = self.dataset.iloc[idx, 2]
        return X, y
        

In [5]:
# Create an instance of the class
dataset = TabDataLoader(root_dir="../datasets/table_data.csv")

In [6]:
# Define a dataloader (it is basically Dataset wrapped around an iterable)
data_loader = DataLoader(dataset=dataset, batch_size=4, shuffle=True)

In [7]:
# Dummy training loop
for data  in data_loader:
    print(data)
    print(f"SHAPES: {data[0].shape}, {data[1].shape}")
    break

[tensor([[   47, 23000],
        [   41, 52000],
        [   18, 82000],
        [   37, 62000]]), tensor([1, 0, 0, 0])]
SHAPES: torch.Size([4, 2]), torch.Size([4])


<div style='background: pink; font-size: 34px; font-weight: bold'>2). Example Two (Loading Image data)</div>

In [9]:
from PIL import Image
import torchvision.datasets as datasets
from torchvision import transforms

class ImgDataLoader(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.img_dataset = datasets.ImageFolder(root=self.root_dir, transform=transform)
        # print(self.img_dataset.classes)
        # print(self.img_dataset.class_to_idx)
    def __len__(self):
        return len(self.root_dir)
        
    def __getitem__(self, idx):
        image = self.img_dataset[idx]
        return image
    
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])
img_dataset = ImgDataLoader(root_dir="../datasets/processed/", transform=transform)


In [10]:
img_dataloader = DataLoader(dataset=img_dataset, batch_size=5, shuffle=True)

In [11]:
for imgs, labels in img_dataloader:
    print(imgs, labels)

tensor([[[[0.5529, 0.5686, 0.5765,  ..., 0.1647, 0.1529, 0.1294],
          [0.5373, 0.5804, 0.5216,  ..., 0.1451, 0.1412, 0.1373],
          [0.5255, 0.6275, 0.5686,  ..., 0.1333, 0.1333, 0.1412],
          ...,
          [0.6824, 0.6824, 0.6824,  ..., 0.6353, 0.6471, 0.6392],
          [0.6941, 0.6863, 0.6784,  ..., 0.6353, 0.6510, 0.6510],
          [0.6824, 0.6745, 0.6667,  ..., 0.6392, 0.6627, 0.6667]],

         [[0.5255, 0.5412, 0.5490,  ..., 0.1804, 0.1686, 0.1451],
          [0.5098, 0.5529, 0.4941,  ..., 0.1608, 0.1569, 0.1529],
          [0.4980, 0.6000, 0.5412,  ..., 0.1490, 0.1490, 0.1569],
          ...,
          [0.6431, 0.6431, 0.6431,  ..., 0.5647, 0.5765, 0.5686],
          [0.6549, 0.6471, 0.6392,  ..., 0.5647, 0.5804, 0.5804],
          [0.6431, 0.6353, 0.6275,  ..., 0.5686, 0.5922, 0.5961]],

         [[0.6549, 0.6706, 0.6784,  ..., 0.2941, 0.2824, 0.2588],
          [0.6392, 0.6824, 0.6235,  ..., 0.2745, 0.2706, 0.2667],
          [0.6275, 0.7294, 0.6706,  ..., 0

In [13]:
image_data = datasets.ImageFolder(root="../datasets/processed/", transform=transform)
data_loader2 = DataLoader(dataset=image_data, batch_size=32, shuffle=True)
for imgs, labels in data_loader2:
    print(imgs, labels)
    break

tensor([[[[0.5098, 0.4980, 0.5412,  ..., 0.5137, 0.5216, 0.5176],
          [0.4863, 0.4824, 0.5255,  ..., 0.5373, 0.5333, 0.5255],
          [0.5255, 0.5137, 0.5373,  ..., 0.5490, 0.5333, 0.5216],
          ...,
          [0.3765, 0.3686, 0.3647,  ..., 0.3961, 0.3961, 0.4196],
          [0.3686, 0.3725, 0.3686,  ..., 0.3843, 0.4118, 0.3961],
          [0.3765, 0.3765, 0.3804,  ..., 0.4196, 0.4157, 0.4078]],

         [[0.4510, 0.4392, 0.4824,  ..., 0.4549, 0.4627, 0.4588],
          [0.4275, 0.4235, 0.4667,  ..., 0.4784, 0.4745, 0.4667],
          [0.4667, 0.4549, 0.4784,  ..., 0.4902, 0.4745, 0.4627],
          ...,
          [0.3176, 0.3098, 0.3059,  ..., 0.3490, 0.3490, 0.3725],
          [0.3098, 0.3137, 0.3098,  ..., 0.3373, 0.3647, 0.3490],
          [0.3176, 0.3176, 0.3216,  ..., 0.3725, 0.3686, 0.3608]],

         [[0.4706, 0.4588, 0.5020,  ..., 0.4745, 0.4824, 0.4784],
          [0.4471, 0.4431, 0.4863,  ..., 0.4980, 0.4941, 0.4863],
          [0.4863, 0.4745, 0.4980,  ..., 0