In [None]:
# Import necessary libraries for dataset and image processing
import torch  # PyTorch main package for tensor operations
from torch.utils.data import Dataset, DataLoader  # For custom datasets and batching
from torchvision.transforms import transforms  # For image transformations
import os  # For file and directory operations
from PIL import Image  # For image loading and processing

In [None]:
# Define a custom dataset class inheriting from PyTorch's Dataset
class test_dataset(Dataset):
    image_paths = []  # List to store image file paths
    
    def __init__(self, image_dir):
        # Initialize the dataset by scanning the directory for image files
        self.image_paths = []
        # Loop through all files in the given directory
        for file in os.listdir(image_dir):
            # Check if the file is a .jpg or .png image
            if file.endswith('.jpg') or file.endswith('.png'):
                self.image_paths.append(file)  # Add image file to the list

    def __len__(self):
        # Return the total number of images in the dataset
        return len(self.image_paths)

    def __getitem__(self, index):
        # Define a sequence of image transformations to apply
        image_transform = transforms.Compose([
            transforms.RandomHorizontalFlip(p=0.1),  # Randomly flip image horizontally with 10% probability
            transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5),  # Randomly change brightness, contrast, etc.
            transforms.CenterCrop((300, 300)),  # Crop the center of the image to 300x300 pixels
            transforms.ToTensor() # Convert image to PyTorch tensor and scale between 0 and 1
        ])
        image_path = self.image_paths[index]  # Get the image file path for the given index
        img = Image.open(image_path)  # Open the image using PIL

        img_tensor = image_transform(img)  # Apply the transformations to the image
        return img_tensor  # Return the transformed image tensor

In [None]:
# Create an instance of the custom dataset using the current directory
# This will scan for all .jpg and .png images in the directory
# and store their paths in the dataset

# Instantiate the dataset
dataset = test_dataset('.')

# Retrieve the first image tensor from the dataset
# This will apply the defined transformations and return a tensor
first_image_tensor = dataset[0]  # get first image tensor
first_image_tensor  # Display the tensor

tensor([[[0.0314, 0.0980, 0.3804,  ..., 0.1882, 0.1882, 0.1922],
         [0.0196, 0.1176, 0.2745,  ..., 0.2000, 0.2118, 0.2000],
         [0.1882, 0.2902, 0.1373,  ..., 0.2118, 0.1686, 0.1569],
         ...,
         [0.0000, 0.0000, 0.0000,  ..., 0.2784, 0.3647, 0.3216],
         [0.0000, 0.0000, 0.0000,  ..., 0.3765, 0.3451, 0.1922],
         [0.0000, 0.0000, 0.0000,  ..., 0.1922, 0.3569, 0.2745]],

        [[0.0235, 0.0863, 0.3647,  ..., 0.1255, 0.1176, 0.1255],
         [0.0078, 0.1098, 0.2706,  ..., 0.1333, 0.1451, 0.1176],
         [0.1882, 0.2902, 0.1490,  ..., 0.1333, 0.0902, 0.0706],
         ...,
         [0.0000, 0.0000, 0.0000,  ..., 0.2706, 0.3451, 0.3059],
         [0.0000, 0.0000, 0.0000,  ..., 0.3608, 0.3255, 0.1725],
         [0.0000, 0.0000, 0.0000,  ..., 0.1686, 0.3176, 0.2392]],

        [[0.0000, 0.0078, 0.2392,  ..., 0.5059, 0.5098, 0.5020],
         [0.0000, 0.0118, 0.1490,  ..., 0.5294, 0.5294, 0.5098],
         [0.0863, 0.1765, 0.0471,  ..., 0.5333, 0.4941, 0.

In [None]:
# Create a DataLoader to batch and shuffle the dataset
# DataLoader helps in loading data in batches and optionally shuffling it for training

dl = DataLoader(dataset, batch_size=2, shuffle=True)  # Create DataLoader with batch size 2 and shuffling enabled

# Retrieve the next batch of images from the DataLoader
output = next(iter(dl))  # Get the first batch (as a tensor of shape [2, 3, 300, 300])
print(output.shape) # should be [2, 3, 300, 300]

torch.Size([2, 3, 300, 300])
