Split-MNIST

In [14]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split, ConcatDataset, TensorDataset
from PIL import Image
import torch

# Download MNIST dataset
fmnist = datasets.MNIST(root='data/', train=True, download=True)
fmnist.name, fmnist.num_classes = 'MNIST', 10

# Define task splits
timestep_task_classes = {
    0: [0, 1],  # Task 0: Classes 0 and 1
    1: [2, 3],  # Task 1: Classes 2 and 3
    2: [4, 5],  # Task 2: Classes 4 and 5
    3: [6, 7],  # Task 3: Classes 6 and 7
    4: [8, 9]   # Task 4: Classes 8 and 9
}

# Transformations
preprocess = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Prepare tasks
VAL_FRAC = 0.1
TEST_FRAC = 0.1
BATCH_SIZE = 256

timestep_tasks = {}
task_test_sets = []

for t, task_classes in timestep_task_classes.items():
    # Get indices for current task classes
    task_indices = [i for i, label in enumerate(fmnist.targets) if label in task_classes]
    
    # Map old labels to 0-based labels for the task
    class_to_idx = {orig: idx for idx, orig in enumerate(task_classes)}
    task_images = [Image.fromarray(fmnist.data[i].numpy(), mode='L') for i in task_indices]
    task_labels = [class_to_idx[fmnist.targets[i].item()] for i in task_indices]
    
    # Create tensors
    task_images_tensor = torch.stack([preprocess(img) for img in task_images])
    task_labels_tensor = torch.tensor(task_labels, dtype=torch.long)
    task_ids_tensor = torch.full((len(task_labels_tensor),), t, dtype=torch.long)
    
    # TensorDataset
    task_dataset = TensorDataset(task_images_tensor, task_labels_tensor, task_ids_tensor)
    
    # Train/Validation/Test split
    train_size = int((1 - VAL_FRAC - TEST_FRAC) * len(task_dataset))
    val_size = int(VAL_FRAC * len(task_dataset))
    test_size = len(task_dataset) - train_size - val_size
    train_set, val_set, test_set = random_split(task_dataset, [train_size, val_size, test_size])
    
    # Store
    timestep_tasks[t] = (train_set, val_set)
    task_test_sets.append(test_set)

# Final datasets
final_test_data = ConcatDataset(task_test_sets)
final_test_loader = DataLoader(final_test_data, batch_size=BATCH_SIZE, shuffle=True)
print(f"Final Split-MNIST Test Set Size: {len(final_test_data)}")


Final Split-MNIST Test Set Size: 6003


Split-CIFAR100

In [8]:
from torchvision import datasets, transforms
from PIL import Image
from torch.utils.data import DataLoader, random_split, ConcatDataset, TensorDataset
import torch

# Download CIFAR-100 dataset
cifar100 = datasets.CIFAR100(root='data/', train=True, download=True)
cifar100.name, cifar100.num_classes = 'CIFAR-100', 100

# Define task splits
NUM_TASKS = 10
task_classes_per_task = 100 // NUM_TASKS

timestep_task_classes = {
    t: list(range(t * task_classes_per_task, (t + 1) * task_classes_per_task))
    for t in range(NUM_TASKS)
}

# Transformations
preprocess = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Prepare tasks
VAL_FRAC = 0.1
TEST_FRAC = 0.1
BATCH_SIZE = 256

timestep_tasks = {}
task_test_sets = []

for t, task_classes in timestep_task_classes.items():
    # Get indices for current task classes
    task_indices = [i for i, label in enumerate(cifar100.targets) if label in task_classes]
    
    # Map old labels to 0-based labels for the task
    class_to_idx = {orig: idx for idx, orig in enumerate(task_classes)}
    task_images = [Image.fromarray(cifar100.data[i]) for i in task_indices]
    task_labels = [class_to_idx[cifar100.targets[i]] for i in task_indices]
    
    # Create tensors
    task_images_tensor = torch.stack([preprocess(img) for img in task_images])
    task_labels_tensor = torch.tensor(task_labels, dtype=torch.long)
    task_ids_tensor = torch.full((len(task_labels_tensor),), t, dtype=torch.long)
    
    # TensorDataset
    task_dataset = TensorDataset(task_images_tensor, task_labels_tensor, task_ids_tensor)
    
    # Train/Validation/Test split
    train_size = int((1 - VAL_FRAC - TEST_FRAC) * len(task_dataset))
    val_size = int(VAL_FRAC * len(task_dataset))
    test_size = len(task_dataset) - train_size - val_size
    train_set, val_set, test_set = random_split(task_dataset, [train_size, val_size, test_size])
    
    # Store
    timestep_tasks[t] = (train_set, val_set)
    task_test_sets.append(test_set)

# Final datasets
final_test_data = ConcatDataset(task_test_sets)
final_test_loader = DataLoader(final_test_data, batch_size=BATCH_SIZE, shuffle=True)
print(f"Final Split-CIFAR-100 Test Set Size: {len(final_test_data)}")


Files already downloaded and verified


TypeError: pic should be Tensor or ndarray. Got <class 'PIL.Image.Image'>.

TinyImagenet


In [None]:
#!wget http://cs231n.stanford.edu/tiny-imagenet-200.zip
#!unzip tiny-imagenet-200.zip -d tiny-imagenet-200


In [13]:
from torchvision import datasets, transforms
import os
from torch.utils.data import DataLoader, random_split, ConcatDataset, TensorDataset
import torch

# Tiny ImageNet directory
data_dir = '/home/ndelafuente/TSR-5/tiny-imagenet-200/tiny-imagenet-200'

# Transformations
preprocess = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

# Load dataset
tiny_imagenet_train = datasets.ImageFolder(os.path.join(data_dir, 'train'), transform=preprocess)

# Define task splits
NUM_TASKS = 10
task_classes_per_task = 200 // NUM_TASKS

timestep_task_classes = {
    t: list(range(t * task_classes_per_task, (t + 1) * task_classes_per_task))
    for t in range(NUM_TASKS)
}

# Prepare tasks
VAL_FRAC = 0.1
TEST_FRAC = 0.1
BATCH_SIZE = 256

timestep_tasks = {}
task_test_sets = []

for t, task_classes in timestep_task_classes.items():
    # Get indices for current task classes
    task_indices = [i for i, (_, label) in enumerate(tiny_imagenet_train.samples) if label in task_classes]
    
    # Map old labels to 0-based labels for the task
    class_to_idx = {orig: idx for idx, orig in enumerate(task_classes)}
    task_images = [tiny_imagenet_train[i][0] for i in task_indices]
    task_labels = [class_to_idx[tiny_imagenet_train[i][1]] for i in task_indices]
    
    # Create tensors
    task_images_tensor = torch.stack([img for img in task_images])
    task_labels_tensor = torch.tensor(task_labels, dtype=torch.long)
    task_ids_tensor = torch.full((len(task_labels_tensor),), t, dtype=torch.long)
    
    # TensorDataset
    task_dataset = TensorDataset(task_images_tensor, task_labels_tensor, task_ids_tensor)
    
    # Train/Validation/Test split
    train_size = int((1 - VAL_FRAC - TEST_FRAC) * len(task_dataset))
    val_size = int(VAL_FRAC * len(task_dataset))
    test_size = len(task_dataset) - train_size - val_size
    train_set, val_set, test_set = random_split(task_dataset, [train_size, val_size, test_size])
    
    # Store
    timestep_tasks[t] = (train_set, val_set)
    task_test_sets.append(test_set)

# Final datasets
final_test_data = ConcatDataset(task_test_sets)
final_test_loader = DataLoader(final_test_data, batch_size=BATCH_SIZE, shuffle=True)
print(f"Final Tiny ImageNet Test Set Size: {len(final_test_data)}")


Final Tiny ImageNet Test Set Size: 10000
