# Make dataset

In [1]:
# Imports
import glob
import cv2
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader

In [23]:
# Create dataset class
class CustomDataset(Dataset):

    def __init__(self):

        # Get folder paths
        self.imgs_path = "./data/dogs_cats_dataset/"
        file_list = glob.glob(self.imgs_path + "*")

        # Get all datapaths for each class
        self.data = []
        for class_path in file_list:
            class_name = class_path.split("\\")[-1]
            print(class_name)
            print(glob.glob(class_path + "\\*.jpg"))
            for img_path in glob.glob(class_path + "\\*.jpg"):
                self.data.append([img_path, class_name])

        # Maps the string of the class to an integer
        self.class_map = {"dogs" : 0, "cats": 1}

        # Resizeing of all images
        self.img_dim = (416, 416)

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):

        # Get image and resize
        img_path, class_name = self.data[idx]
        img = cv2.imread(img_path)
        img = cv2.resize(img, self.img_dim)

        # Difine class
        class_id = self.class_map[class_name]

        # Make tensor
        img_tensor = torch.from_numpy(img)
        img_tensor = img_tensor.permute(2, 0, 1)
        class_id = torch.tensor([class_id])
        return img_tensor, class_id

In [24]:
# Use dataset
dataset = CustomDataset()
data_loader = DataLoader(dataset, batch_size=4, shuffle=True)
for imgs, labels in data_loader:
    print("Batch of images has shape: ", imgs.shape)
    print("Batch of labels has shape: ", labels.shape)

cats
['./data/dogs_cats_dataset\\cats\\1.jpg', './data/dogs_cats_dataset\\cats\\2.jpg', './data/dogs_cats_dataset\\cats\\3.jpg', './data/dogs_cats_dataset\\cats\\4.jpg']
dogs
['./data/dogs_cats_dataset\\dogs\\1.jpg', './data/dogs_cats_dataset\\dogs\\2.jpg', './data/dogs_cats_dataset\\dogs\\3.jpg']
[['./data/dogs_cats_dataset\\cats\\1.jpg', 'cats'], ['./data/dogs_cats_dataset\\cats\\2.jpg', 'cats'], ['./data/dogs_cats_dataset\\cats\\3.jpg', 'cats'], ['./data/dogs_cats_dataset\\cats\\4.jpg', 'cats'], ['./data/dogs_cats_dataset\\dogs\\1.jpg', 'dogs'], ['./data/dogs_cats_dataset\\dogs\\2.jpg', 'dogs'], ['./data/dogs_cats_dataset\\dogs\\3.jpg', 'dogs']]
Batch of images has shape:  torch.Size([4, 3, 416, 416])
Batch of labels has shape:  torch.Size([4, 1])
Batch of images has shape:  torch.Size([3, 3, 416, 416])
Batch of labels has shape:  torch.Size([3, 1])
