In [8]:
import torch
import random
import numpy as np
import torch.nn as nn
import torch.optim as optim
from pathlib import Path
import torch.backends.cudnn as cudnn
from torchvision import transforms
from torch.optim.lr_scheduler import MultiStepLR
from utils.util import icarl_clear100_augment_data, get_dataset_per_pixel_mean

from avalanche.models import IcarlNet
from avalanche.training.supervised import ICaRL
from avalanche.logging import InteractiveLogger, WandBLogger
from avalanche.benchmarks.classic import SplitCIFAR10
from avalanche.benchmarks.datasets.clear import _CLEARImage
from avalanche.benchmarks.generators import nc_benchmark
from avalanche.benchmarks.utils import AvalancheDataset
from avalanche.training.plugins import EvaluationPlugin
from avalanche.training.plugins.lr_scheduling import LRSchedulerPlugin
from avalanche.evaluation.metrics import ExperienceAccuracy, ExperienceLoss, ExperienceForgetting, ExperienceCPUUsage, ExperienceMaxGPU, ExperienceMaxRAM, ExperienceTime, EpochAccuracy

In [9]:
seed = 0
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
# torch.cuda.manual_seed_all(seed) # if use multi-GPU
cudnn.deterministic = True  # 연산 처리 속도 감소 -> 모델과 코드를 배포해야 하는 연구 후반 단계에 사용
cudnn.benchmark = False

In [10]:
DATASET_NAME = "clear100_cvpr2022"
ROOT = Path("/home/data/clear")
DATA_ROOT = ROOT / DATASET_NAME

pixel_transforms = transforms.Compose(
              [
              transforms.Resize([224, 244]),
              transforms.ToTensor()
              ]
       )
per_pixel_mean = get_dataset_per_pixel_mean(_CLEARImage(DATA_ROOT, data_name="clear100_cvpr2022", download=True, split="train", seed=seed, transform=pixel_transforms))

transforms_group = dict(
       train=(
       transforms.Compose(
              [
              transforms.Resize([224, 244]),
              
              transforms.ToTensor(),
              lambda img_pattern: img_pattern - per_pixel_mean,
              transforms.RandomCrop([224, 224])
              # icarl_clear100_augment_data,
              ]
       ),
       None,
       ),
       eval=(
       transforms.Compose(
              [
              transforms.Resize([224, 244]),
              transforms.ToTensor(),
              lambda img_pattern: img_pattern - per_pixel_mean,
              ]
       ),
       None,
       )
)

train_set = _CLEARImage(DATA_ROOT, data_name="clear100_cvpr2022", download=True, split="train", seed=seed)
test_set = _CLEARImage(DATA_ROOT, data_name="clear100_cvpr2022", download=True, split="test", seed=seed)

train_set = AvalancheDataset(train_set, transform_groups=transforms_group, initial_transform_group="train")
test_set = AvalancheDataset(test_set, transform_groups=transforms_group, initial_transform_group="eval")

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [11]:
interactive_logger = InteractiveLogger()
wandb_logger = WandBLogger(run_name="iCaRL-CLEAR100")
eval_plugin = EvaluationPlugin(
    EpochAccuracy(),
    ExperienceAccuracy(),
    ExperienceLoss(),
    ExperienceForgetting(),
    ExperienceCPUUsage(),
    ExperienceMaxGPU(gpu_id=0),
    ExperienceMaxRAM(),
    ExperienceTime(),
    loggers=[interactive_logger, wandb_logger])



In [12]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

num_class = 100
incremental = 5
lr_milestones = [49, 63]
lr_factor = 5.0
# fixed_class_order = [4, 1, 7, 5, 3, 9, 0, 8, 6, 2]

scenario = nc_benchmark(train_dataset=train_set,
                        test_dataset=test_set,
                        n_experiences=incremental,
                        task_labels=True,
                        seed=seed,
                        shuffle=False,
                        )

model = IcarlNet(num_classes=num_class)    # n = ResidualBlock, c = input_dim
model.to(device)

optimizer = optim.SGD(model.parameters(), lr=2.0, momentum=0.9, weight_decay=1e-5)
sched = LRSchedulerPlugin(
        MultiStepLR(optimizer, lr_milestones, gamma=1.0 / lr_factor)
    )

In [19]:
memory_size = 2000
train_batch = 16
eval_batch = 16
epoch = 70

# buffer_transform = transforms.Compose([icarl_clear100_augment_data])
buffer_transform = transforms.Compose([transforms.RandomCrop([224, 224])])

strategies = ICaRL(model.feature_extractor, model.classifier, optimizer, memory_size, buffer_transform=buffer_transform, fixed_memory=True, train_mb_size=train_batch, train_epochs=epoch, eval_mb_size=eval_batch, device=device, plugins=[sched], evaluator=eval_plugin)  # criterion = ICaRLLossPlugin()

In [20]:

for i, exp in enumerate(scenario.train_stream):
    eval_exps = [e for e in scenario.test_stream][: i + 1]
    strategies.train(exp)
    strategies.eval(eval_exps)

-- >> Start of training phase << --
100%|██████████| 963/963 [10:49<00:00,  1.48it/s]   
Epoch 0 ended.
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.1693
100%|██████████| 963/963 [03:09<00:00,  5.09it/s]
Epoch 1 ended.
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.2219
 79%|███████▊  | 758/963 [03:04<00:55,  3.67it/s]

RuntimeError: CUDA error: the launch timed out and was terminated
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.