In [2]:
from torch.utils.data import random_split

# Define dataset splits
splits = [
    (40000, 20000),
    (50000, 10000),
    (30000, 30000),
    (55000, 5000),
    (58000, 2000)
]

# Create different train-validation splits
datasets_splits = []

for train_size, val_size in splits:
    train_set, val_set = random_split(mnist_train, [train_size, val_size])
    datasets_splits.append((train_set, val_set))
    print(f"Created dataset split - Train: {train_size}, Validation: {val_size}")


Created dataset split - Train: 40000, Validation: 20000
Created dataset split - Train: 50000, Validation: 10000
Created dataset split - Train: 30000, Validation: 30000
Created dataset split - Train: 55000, Validation: 5000
Created dataset split - Train: 58000, Validation: 2000


In [3]:
from torch.utils.data import DataLoader

# Define batch sizes
batch_sizes = [10, 25, 64, 128, 256, 512]

# Store results
dataloaders_info = []

# Iterate over each dataset split
for (train_set, val_set), (train_size, val_size) in zip(datasets_splits, splits):
    print(f"\n=== Train: {train_size}, Validation: {val_size} ===")
    
    # Iterate over different batch sizes
    for batch_size in batch_sizes:
        train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)

        num_train_batches = len(train_loader)
        num_val_batches = len(val_loader)
        
        dataloaders_info.append((train_size, val_size, batch_size, num_train_batches, num_val_batches))

        print(f"Batch Size: {batch_size} --> Train Batches: {num_train_batches}, Validation Batches: {num_val_batches}")



=== Train: 40000, Validation: 20000 ===
Batch Size: 10 --> Train Batches: 4000, Validation Batches: 2000
Batch Size: 25 --> Train Batches: 1600, Validation Batches: 800
Batch Size: 64 --> Train Batches: 625, Validation Batches: 313
Batch Size: 128 --> Train Batches: 313, Validation Batches: 157
Batch Size: 256 --> Train Batches: 157, Validation Batches: 79
Batch Size: 512 --> Train Batches: 79, Validation Batches: 40

=== Train: 50000, Validation: 10000 ===
Batch Size: 10 --> Train Batches: 5000, Validation Batches: 1000
Batch Size: 25 --> Train Batches: 2000, Validation Batches: 400
Batch Size: 64 --> Train Batches: 782, Validation Batches: 157
Batch Size: 128 --> Train Batches: 391, Validation Batches: 79
Batch Size: 256 --> Train Batches: 196, Validation Batches: 40
Batch Size: 512 --> Train Batches: 98, Validation Batches: 20

=== Train: 30000, Validation: 30000 ===
Batch Size: 10 --> Train Batches: 3000, Validation Batches: 3000
Batch Size: 25 --> Train Batches: 1200, Validation 