# Loading the 200GB dataset with PyTorch

In [1]:
import os
import numpy as np
from torch.utils.data import Dataset, DataLoader
import time
import matplotlib.pyplot as plt
import torch

class CustomDataset(Dataset):
    def __init__(self, data_dir):
        self.data_dir = data_dir
        self.file_list = [f for f in os.listdir(data_dir) if f.startswith("part_")]
        self.labels = np.load(os.path.join(data_dir, "labels.npy"), allow_pickle = True).item()
        
    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        file_path = os.path.join(self.data_dir, self.file_list[idx])
        image = np.load(file_path)
        label = self.labels[self.file_list[idx]]
        return image, label

data_dir = "data/train_200GB"
dataset = CustomDataset(data_dir)
dataloader = DataLoader(dataset, batch_size=48, shuffle=True, num_workers=8)

In [2]:
num_epochs = 20

start_time = time.time()
for epoch in range(num_epochs):
    epoch_start = time.time()
    for images, labels in dataloader:
        pass
    epoch_end = time.time()
    epoch_time = epoch_end - epoch_start
    print(f"Epoch {epoch+1} done in {epoch_time} seconds.")
end_time = time.time()

total_time = end_time - start_time

print(f"Total time taken: {total_time} seconds")
print(f"Total time per epoch: {total_time/num_epochs} seconds")

Epoch 1 done in 519.0264642238617 seconds.
Epoch 2 done in 527.9110577106476 seconds.
Epoch 3 done in 564.508202791214 seconds.
Epoch 4 done in 504.62781500816345 seconds.
Epoch 5 done in 535.0007140636444 seconds.
Epoch 6 done in 585.1651082038879 seconds.
Epoch 7 done in 531.1468064785004 seconds.
Epoch 8 done in 642.8812713623047 seconds.
Epoch 9 done in 563.2509710788727 seconds.
Epoch 10 done in 572.5111229419708 seconds.
Epoch 11 done in 641.9526226520538 seconds.
Epoch 12 done in 557.3534407615662 seconds.
Epoch 13 done in 614.1739091873169 seconds.
Epoch 14 done in 612.5322651863098 seconds.
Epoch 15 done in 556.8544085025787 seconds.
Epoch 16 done in 591.4019739627838 seconds.
Epoch 17 done in 529.8777468204498 seconds.
Epoch 18 done in 545.0977780818939 seconds.
Epoch 19 done in 531.2759389877319 seconds.
Epoch 20 done in 593.2343735694885 seconds.
Total time taken: 11319.82952952385 seconds
Total time per epoch: 565.9914764761925 seconds
