In [3]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import os
import glob

import torchvision.transforms as transforms

class NpyImageDataset(Dataset):
    """Dataset for loading images stored as .npy files."""
    def __init__(self, data_dir, transform=None):
        """
        Args:
            data_dir (string): Directory with all the .npy files.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.data_dir = data_dir
        self.file_paths = glob.glob(os.path.join(data_dir, '*.npy'))
        self.transform = transform

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_path = self.file_paths[idx]
        # Load image data from .npy file (assuming HxWxC format, values 0-1)
        image = np.load(img_path).astype(np.float32)

        print(image.shape)
        # Convert numpy array to PyTorch tensor
        image = torch.from_numpy(image)

        # Permute dimensions from HxWxC to CxHxW
        image = image.permute(2, 0, 1)

        if self.transform:
            image = self.transform(image)

        return image

# Define the normalization transform (0, 1) -> (-1, 1)
# y = 2x - 1
normalize_transform = transforms.Lambda(lambda x: x * 2.0 - 1.0)

# --- Configuration ---
DATA_DIRECTORY = r'D:\pdbFiles\imgs2' # Replace with the actual path to your .npy files
BATCH_SIZE = 2
NUM_WORKERS = 0 # Adjust based on your system

# --- Create Dataset and DataLoader ---




 
image_dataset = NpyImageDataset(data_dir=DATA_DIRECTORY, transform=normalize_transform)

# Create the DataLoader
image_dataloader = DataLoader(image_dataset,
                            batch_size=BATCH_SIZE,
                            shuffle=True,
                            num_workers=NUM_WORKERS)

print(f"DataLoader created successfully.")
print(f"Number of samples: {len(image_dataset)}")
print(f"Number of batches: {len(image_dataloader)}")

# Example: Iterate over one batch to check
if len(image_dataloader) > 0 and False:
    first_batch = next(iter(image_dataloader))
    print(f"Shape of the first batch: {first_batch.shape}")
    print(f"Data range (min, max): ({first_batch.min().item():.2f}, {first_batch.max().item():.2f})")
else:
    print("DataLoader is empty. Check if .npy files exist in the specified directory.")

 


DataLoader created successfully.
Number of samples: 1096
Number of batches: 548
DataLoader is empty. Check if .npy files exist in the specified directory.


In [4]:
image_dataset[0]

(128, 128)


RuntimeError: permute(sparse_coo): number of dimensions in the tensor input does not match the length of the desired ordering of dimensions i.e. input.dim() = 2 is not equal to len(dims) = 3