In [1]:
import torch
import torch.nn as nn
from os.path import expanduser
from torchvision.datasets import CIFAR100
from torchvision.transforms import ToTensor, Normalize, Compose, RandomCrop, RandomHorizontalFlip
from avalanche.benchmarks import nc_benchmark
from avalanche.training.supervised import EWC
from avalanche.models import as_multitask
from avalanche.evaluation.metrics import accuracy_metrics
from avalanche.logging import InteractiveLogger
from avalanche.training.plugins import EvaluationPlugin


class ResNet18(nn.Module):
    """ResNet-18 architecture for CIFAR-100"""
    def __init__(self, num_classes=10):
        super().__init__()
        
        # Initial convolution layer
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        
        # Residual blocks
        self.layer1 = self._make_layer(64, 64, 2, stride=1)
        self.layer2 = self._make_layer(64, 128, 2, stride=2)
        self.layer3 = self._make_layer(128, 256, 2, stride=2)
        self.layer4 = self._make_layer(256, 512, 2, stride=2)
        
        # Average pooling and classifier
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.classifier = nn.Linear(512, num_classes)
    
    def _make_layer(self, in_channels, out_channels, blocks, stride):
        layers = []
        # First block with potential stride change
        layers.append(BasicBlock(in_channels, out_channels, stride))
        # Remaining blocks
        for _ in range(1, blocks):
            layers.append(BasicBlock(out_channels, out_channels, 1))
        return nn.Sequential(*layers)
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x


class BasicBlock(nn.Module):
    """Basic residual block for ResNet"""
    expansion = 1
    
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )
    
    def forward(self, x):
        identity = x
        
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        
        out = self.conv2(out)
        out = self.bn2(out)
        
        out += self.shortcut(identity)
        out = self.relu(out)
        
        return out


def main():
    
    scenario_name = "multitask_cifar100"
    
    # EWC settings
    ewc_mode = "separate"  # Options: "separate", "online"
    ewc_lambda = 0.5  # Penalty hyperparameter for EWC
    decay_factor = 0.1  # Decay factor (only used when ewc_mode is "online")
    
    # Training settings
    learning_rate = 1e-3
    epochs = 10  # Increased for CIFAR-100
    minibatch_size = 64  # Reduced for CIFAR-100 (more memory intensive)
    num_tasks = 10
    classes_per_task = 10
    
    # Device settings
    cuda_id = 0  # GPU id to use, set to -1 for CPU
    
    device = torch.device(cuda_id if torch.cuda.is_available() and cuda_id >= 0 else "cpu")
    print(f"Using device: {device}")
    
    # Create model with ResNet-18 architecture
    model = ResNet18(num_classes=classes_per_task)
    model = as_multitask(model, "classifier")
    model.to(device)
    
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=1e-4)
    criterion = torch.nn.CrossEntropyLoss()
    
    # Data augmentation for CIFAR-100
    train_transform = Compose([
        RandomCrop(32, padding=4),
        RandomHorizontalFlip(),
        ToTensor(),
        Normalize(
            mean=[0.5071, 0.4867, 0.4408],
            std=[0.2675, 0.2565, 0.2761]
        )
    ])
    
    test_transform = Compose([
        ToTensor(),
        Normalize(
            mean=[0.5071, 0.4867, 0.4408],
            std=[0.2675, 0.2565, 0.2761]
        )
    ])
    
    # Create benchmark
    cifar100_train = CIFAR100(
        root=expanduser("~") + "/.avalanche/data/cifar100/",
        train=True,
        download=True,
        transform=train_transform,
    )
    cifar100_test = CIFAR100(
        root=expanduser("~") + "/.avalanche/data/cifar100/",
        train=False,
        download=True,
        transform=test_transform,
    )
    
    # Create nc_benchmark with 10 tasks, 10 classes each
    scenario = nc_benchmark(
        cifar100_train,
        cifar100_test,
        num_tasks,
        task_labels=True,
        seed=1234,
        class_ids_from_zero_in_each_exp=True,
    )
    
    # Setup evaluation plugin
    interactive_logger = InteractiveLogger()
    eval_plugin = EvaluationPlugin(
        accuracy_metrics(
            minibatch=True, epoch=True, experience=True, stream=True
        ),
        loggers=[interactive_logger],
    )
    
    if ewc_mode == 'separate':
        decay_factor = None
    
    strategy = EWC(
        model,
        optimizer,
        criterion,
        ewc_lambda,
        ewc_mode,
        decay_factor=decay_factor,
        train_epochs=epochs,
        device=device,
        train_mb_size=minibatch_size,
        evaluator=eval_plugin,
    )
  
    
    print("Starting experiment...")
    print(f"Scenario: {scenario_name}")
    print(f"Number of tasks: {num_tasks}")
    print(f"Classes per task: {classes_per_task}")
    print(f"EWC Mode: {ewc_mode}")
    print(f"EWC Lambda: {ewc_lambda}")
    print(f"Epochs per task: {epochs}")
    print(f"Batch size: {minibatch_size}")
    print(f"Learning rate: {learning_rate}")
    print("=" * 70)
    
    results = []
    for experience in scenario.train_stream:
        print(f"\nStart training on experience {experience.current_experience}")
        
        strategy.train(experience)
        
        print(f"End training on experience {experience.current_experience}")
        print("Computing accuracy on the test set")
        
        results.append(strategy.eval(scenario.test_stream[:]))
    
    print("\n" + "=" * 70)
    print("Experiment completed!")
    print("=" * 70)
    
    
    return results



if __name__ == "__main__":
    results = main()


  from .autonotebook import tqdm as notebook_tqdm


Using device: cuda:0




Starting experiment...
Scenario: multitask_cifar100
Number of tasks: 10
Classes per task: 10
EWC Mode: separate
EWC Lambda: 0.5
Epochs per task: 10
Batch size: 64
Learning rate: 0.001

Start training on experience 0
-- >> Start of training phase << --
100%|██████████| 79/79 [00:06<00:00, 11.94it/s]
Epoch 0 ended.
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.3048
	Top1_Acc_MB/train_phase/train_stream/Task000 = 0.3750
100%|██████████| 79/79 [00:05<00:00, 13.89it/s]
Epoch 1 ended.
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.4842
	Top1_Acc_MB/train_phase/train_stream/Task000 = 0.6250
100%|██████████| 79/79 [00:05<00:00, 13.65it/s]
Epoch 2 ended.
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.5448
	Top1_Acc_MB/train_phase/train_stream/Task000 = 0.6250
100%|██████████| 79/79 [00:05<00:00, 13.87it/s]
Epoch 3 ended.
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.5886
	Top1_Acc_MB/train_phase/train_stream/Task000 = 0.6250
100%|██████████| 79/79 [00:05<00:00, 14.24it