In [1]:
import torch
from sklearn.datasets import make_classification

import cv2

# Tabulate data

In [2]:
class CustomDataset:
    def __init__(self, data, targets):
        self.data = data
        self.targets = targets
        
    def __len__(self):
#         return len(self.data)
        return self.data.shape[0]
    
    def __getitem__(self, idx):
        current_sample = self.data[idx,:]
        current_target = self.targets[idx]
        
        return {
            "X":torch.tensor(current_sample,dtype=torch.float),
            "y":torch.tensor(current_target,dtype=torch.long)
        }

In [3]:
data, targets = make_classification(n_samples=1000,n_features=20)
data.shape, targets.shape

((1000, 20), (1000,))

In [4]:
custom_dataset = CustomDataset(data, targets)

In [5]:
custom_dataset

<__main__.CustomDataset at 0x1bfe1599070>

In [6]:
len(custom_dataset)

1000

In [7]:
custom_dataset[0]

{'X': tensor([-0.1782,  0.8887, -0.6245,  0.5825,  0.0763,  1.3325, -0.9081,  0.2370,
          0.7721,  1.5686, -1.2454, -1.0038,  0.7012,  0.0830, -0.8152, -1.1070,
          0.4285, -0.4300,  1.3382, -0.3612]),
 'y': tensor(1)}

In [8]:
custom_dataset[0]["X"].shape,custom_dataset[0]["X"].shape[0]

(torch.Size([20]), 20)

In [9]:
for data in custom_dataset:
    print(data)
    break

{'X': tensor([-0.1782,  0.8887, -0.6245,  0.5825,  0.0763,  1.3325, -0.9081,  0.2370,
         0.7721,  1.5686, -1.2454, -1.0038,  0.7012,  0.0830, -0.8152, -1.1070,
         0.4285, -0.4300,  1.3382, -0.3612]), 'y': tensor(1)}


In [10]:
for idx in range(len(custom_dataset)):
    print(custom_dataset[idx])
    break

{'X': tensor([-0.1782,  0.8887, -0.6245,  0.5825,  0.0763,  1.3325, -0.9081,  0.2370,
         0.7721,  1.5686, -1.2454, -1.0038,  0.7012,  0.0830, -0.8152, -1.1070,
         0.4285, -0.4300,  1.3382, -0.3612]), 'y': tensor(1)}


In [11]:
train_loader = torch.utils.data.DataLoader(custom_dataset, batch_size=4,num_workers=2) # generator

In [12]:
train_loader

<torch.utils.data.dataloader.DataLoader at 0x1bfe1599bb0>

In [13]:
# for data in train_loader:
#     print(data["X"].shape)
#     print(data["y"].shape)
#     break

In [14]:
# for _ in range(10):
#     for data in train_loader:
#         X = data["X"]
#         y = data["y"]
#         output = model(X,y)
#         loss
#         loss.backward()
        

# Text data

In [15]:
class CustomDataset:
    def __init__(self, data, targets, tokenizer):
        self.data = data
        self.targets = targets
        self.tokenizer = tokenizer
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        text = self.data[idx]
        target = self.targets[idx]
#         if self.targets.shape[1] > 1:
#             target = self.targets[idx,:]
#         else:
#             target = self.targets[idx]

        input_ids = self.tokenizer(text)

        
        return {
            "text":torch.tensor(input_ids,dtype=torch.long),
            "target":torch.tensor(target)
        }

# Image data

In [16]:
class CustomDataset:
    def __init__(self, image_paths, targets, augmentations):
        self.image_paths = image_paths
        self.targets = targets
        self.augmentations = augmentations
        
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        target = self.targets[idx]
        image = cv2.imread(Self.image_paths[idx])
        image = cv2.cvtColor(image, cv2.COLOR_BAYER_BGR2RGB)
        
        if self.augmentations is not None:
            augmented= self.augmentations(image = image)
            image = augmented["image"]
            
        image = np.transpose(image,(2,0,1)).astype(np.float32)
        
        
        return {
            "image":torch.tensor(image),
            "target":torch.tensor(target)
        }