In [15]:
import torch
import random
import torchvision
import numpy as np
from pathlib import Path
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torchvision import transforms
from utils.util import make_scheduler

from avalanche.training.supervised import Naive
from avalanche.benchmarks.classic.clear import CLEAR, CLEARMetric
from avalanche.benchmarks.utils import AvalancheDataset
from avalanche.training.plugins import EvaluationPlugin
from avalanche.training.plugins.lr_scheduling import LRSchedulerPlugin
from avalanche.logging import InteractiveLogger, WandBLogger
from avalanche.evaluation.metrics import ExperienceAccuracy, ExperienceLoss, ExperienceForgetting, ExperienceCPUUsage, ExperienceMaxGPU, ExperienceMaxRAM, ExperienceTime, EpochAccuracy

In [16]:
seed = 0
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
# torch.cuda.manual_seed_all(seed) # if use multi-GPU
cudnn.deterministic = True  # 연산 처리 속도 감소 -> 모델과 코드를 배포해야 하는 연구 후반 단계에 사용
cudnn.benchmark = False

In [17]:
# For CLEAR dataset setup
DATASET_NAME = "clear100_cvpr2022"
NUM_CLASSES = {"clear10": 11, "clear100_cvpr2022": 100}
assert DATASET_NAME in NUM_CLASSES.keys()

# please refer to paper for discussion on streaming v.s. iid protocol
EVALUATION_PROTOCOL = "streaming"  # trainset = testset per timestamp
# EVALUATION_PROTOCOL = "iid"  # 7:3 trainset_size:testset_size

# For saving the datasets/models/results/log files
ROOT = Path("./data")
DATA_ROOT = ROOT / DATASET_NAME
MODEL_ROOT = ROOT / "models"
DATA_ROOT.mkdir(parents=True, exist_ok=True)
MODEL_ROOT.mkdir(parents=True, exist_ok=True)

normalize = torchvision.transforms.Normalize(
    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
)
train_transform = torchvision.transforms.Compose(
    [
        torchvision.transforms.Resize(224),
        torchvision.transforms.RandomCrop(224),
        torchvision.transforms.ToTensor(),
        normalize,
    ]
)
test_transform = torchvision.transforms.Compose(
    [
        torchvision.transforms.Resize(224),
        torchvision.transforms.CenterCrop(224),
        torchvision.transforms.ToTensor(),
        normalize,
    ]
)

In [18]:
# Define hyperparameters/scheduler/augmentation
HPARAM = {
    "batch_size": 256,
    'num_epoch' : 100,
    "step_scheduler_decay": 30,
    "scheduler_step": 0.1,
    "start_lr": 0.01,
    "weight_decay": 1e-5,
    "momentum": 0.9,
}

if EVALUATION_PROTOCOL == "streaming":
    seed = None
else:
    seed = 0

In [19]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

num_class = 10
incremental = 5
lr_milestones = [49, 63]
lr_factor = 5.0
fixed_class_order = [4, 1, 7, 5, 3, 9, 0, 8, 6, 2]

scenario = CLEAR(
    data_name=DATASET_NAME,
    evaluation_protocol=EVALUATION_PROTOCOL,
    feature_type=None,
    seed=seed,
    train_transform=train_transform,
    eval_transform=test_transform,
    dataset_root=DATA_ROOT,
)

model = torchvision.models.resnet18(pretrained=False)
model.to(device)

optimizer = optim.SGD(model.parameters(), lr=2.0, momentum=0.9, weight_decay=1e-5)

scheduler = make_scheduler(
    optimizer,
    HPARAM["step_scheduler_decay"],
    HPARAM["scheduler_step"],
)

plugin_list = [LRSchedulerPlugin(scheduler)]

Files already downloaded and verified
Files already downloaded and verified


In [20]:
interactive_logger = InteractiveLogger()
wandb_logger = WandBLogger(run_name="streaming-CLEAR")
eval_plugin = EvaluationPlugin(
    EpochAccuracy(),
    ExperienceAccuracy(),
    ExperienceLoss(),
    ExperienceForgetting(),
    ExperienceCPUUsage(),
    ExperienceMaxGPU(gpu_id=0),
    ExperienceMaxRAM(),
    ExperienceTime(),
    loggers=[interactive_logger, wandb_logger])

  "No benchmark provided to the evaluation plugin. "


In [21]:
memory_size = 2000
train_batch = 64
eval_batch = 32
epoch = 70

cl_strategy = Naive(
    model,
    optimizer,
    torch.nn.CrossEntropyLoss(),
    train_mb_size=HPARAM["batch_size"],
    train_epochs=HPARAM["num_epoch"],
    eval_mb_size=HPARAM["batch_size"],
    evaluator=eval_plugin,
    device=device,
    plugins=plugin_list,
)

In [22]:
print("Starting experiment...")
results = []
print("Current protocol : ", EVALUATION_PROTOCOL)
for index, experience in enumerate(scenario.train_stream):
    print("Start of experience: ", experience.current_experience)
    print("Current Classes: ", experience.classes_in_this_experience)
    res = cl_strategy.train(experience)
    torch.save(
        model.state_dict(),
        str(MODEL_ROOT / f"model{str(index).zfill(2)}.pth")
    )
    print("Training completed")
    print(
        "Computing accuracy on the whole test set with"
        f" {EVALUATION_PROTOCOL} evaluation protocol"
    )
    results.append(cl_strategy.eval(scenario.test_stream))
# generate accuracy matrix
num_timestamp = len(results)
accuracy_matrix = np.zeros((num_timestamp, num_timestamp))
for train_idx in range(num_timestamp):
    for test_idx in range(num_timestamp):
        accuracy_matrix[train_idx][test_idx] = results[train_idx][
            f"Top1_Acc_Stream/eval_phase/test_stream/Task00{test_idx}"]
print('Accuracy_matrix : ')
print(accuracy_matrix)
metric = CLEARMetric().get_metrics(accuracy_matrix)
print(metric)

Starting experiment...
Current protocol :  streaming
Start of experience:  0
Current Classes:  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
-- >> Start of training phase << --
0it [00:00, ?it/s]

RuntimeError: CUDA error: unknown error
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.