# **Finetuning strategy**

**Experiment information**

- dataset: cifar100
- shuffle: True
- init_cls: 5
- increment: 5
- convnet_type: resnet32
- seed: 1993


Sets seeds

In [1]:
import torch

seed = 1993
torch.manual_seed(1)
torch.cuda.manual_seed(1)
torch.cuda.manual_seed_all(1)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [2]:
from utils.data_manager import DataManager

data_manager = DataManager(dataset_name="cifar100", shuffle=True, seed=seed, init_cls=5, increment=5)

Files already downloaded and verified
Files already downloaded and verified


## Data management

TODO: Maybe optimize transforms.

In [3]:
print(type(data_manager._train_data))
print("Train data shape:", data_manager._train_data.shape)
print("Test data shape:", data_manager._test_data.shape)

print()
print("Train transforms:", data_manager._train_trsf)
print("Test transforms:", data_manager._test_trsf)
print("Common transforms:", data_manager._common_trsf)

<class 'numpy.ndarray'>
Train data shape: (50000, 32, 32, 3)
Test data shape: (10000, 32, 32, 3)

Train transforms: [RandomCrop(size=(32, 32), padding=4), RandomHorizontalFlip(p=0.5), ColorJitter(brightness=(0.7529411764705882, 1.2470588235294118), contrast=None, saturation=None, hue=None)]
Test transforms: []
Common transforms: [ToTensor(), Normalize(mean=(0.5071, 0.4867, 0.4408), std=(0.2675, 0.2565, 0.2761))]


# Model strategy       

In [4]:
from utils import factory

# model_args = {"convnet_type": "resnet32", "skip": False, "memory_size": 200, "device": [torch.device("cuda:0"), torch.device("cuda:1")]}
model_args = {"convnet_type": "conv2", "skip": False, "memory_size": 200, "device": [torch.device("cuda:0"), torch.device("cuda:1")]}
model = factory.get_model("finetune", args=model_args)


In [5]:
model._network

IncrementalNet(
  (convnet): ConvNet2(
    (avgpool): AvgPool2d(kernel_size=8, stride=8, padding=0)
    (encoder): Sequential(
      (0): Sequential(
        (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
        (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      )
      (1): Sequential(
        (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
        (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      )
    )
  )
)

In [6]:
for task in range(data_manager.nb_tasks):
    model.incremental_train(data_manager)
    model.eval_task(save_conf=False)
    model.after_task()



RuntimeError: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx

In [6]:
import numpy as np
from torch import nn
from torch.utils.data import DataLoader


model._network.update_fc(5)
model._total_classes = 5
batch_size = 128
num_workers = 8

# setup datasets and dataloaders
train_dataset = data_manager.get_dataset(
    np.arange(model._known_classes, model._total_classes),
    source="train",
    mode="train",
)
train_loader = DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, persistent_workers=True, pin_memory=True
)
test_dataset = data_manager.get_dataset(
    np.arange(0, model._total_classes), source="test", mode="test"
)
test_loader = DataLoader(
    test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers
)

if len(model._multiple_gpus) > 1:
    model._network = nn.DataParallel(model._network, model._multiple_gpus)

print("Length of train dataset:", len(train_dataset))
print("Length of datalaoder:", len(train_loader))

Length of train dataset: 2500
Length of datalaoder: 20


In [7]:
model._cur_task = 0

For now we will only focus on the first stage

In [10]:
import torch
torch.cuda.is_available()

True

In [8]:
from torch import optim
import logging
from torch.nn import functional as F
from tqdm import tqdm

def tensor2numpy(x):
    return x.cpu().data.numpy() if x.is_cuda else x.data.numpy()

init_epoch = 200
init_lr = 0.1
init_milestones = [60, 120, 170]
init_lr_decay = 0.1
init_weight_decay = 0.0005

epochs = 80
lrate = 0.1
milestones = [40, 70]
lrate_decay = 0.1
weight_decay = 2e-4


model._network.to(model._device)
if model._cur_task == 0:
    # setup optimizer and scheduler
    optimizer = optim.SGD(
        model._network.parameters(),
        momentum=0.9,
        lr=init_lr,
        weight_decay=init_weight_decay,
    )
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer,
        T_max=init_epoch,
    )  

    # prog_bar = tqdm(range(init_epoch))
    prog_bar = tqdm(range(20))
    for _, epoch in enumerate(prog_bar):
        model._network.train()
        losses = 0.0
        correct, total = 0, 0
        for i, (_, inputs, targets) in enumerate(train_loader):
            inputs, targets = inputs.to(model._device), targets.to(model._device)
        #     logits = model._network(inputs)["logits"]

        #     loss = F.cross_entropy(logits, targets)
        #     optimizer.zero_grad()
        #     loss.backward()
        #     optimizer.step()
        #     losses += loss.item()

        #     _, preds = torch.max(logits, dim=1)
        #     correct += preds.eq(targets.expand_as(preds)).cpu().sum()
        #     total += len(targets)

        # scheduler.step()
        # train_acc = np.around(tensor2numpy(correct) * 100 / total, decimals=2)

        # if epoch % 5 == 0:
        #     info = "Task {}, Epoch {}/{} => Loss {:.3f}, Train_accy {:.2f}".format(
        #         model._cur_task,
        #         epoch + 1,
        #         init_epoch,
        #         losses / len(train_loader),
        #         train_acc,
        #     )
        # else:
        #     test_acc = model._compute_accuracy(model._network, test_loader)
        #     info = "Task {}, Epoch {}/{} => Loss {:.3f}, Train_accy {:.2f}, Test_accy {:.2f}".format(
        #         model._cur_task,
        #         epoch + 1,
        #         init_epoch,
        #         losses / len(train_loader),
        #         train_acc,
        #         test_acc,
        #     )
        # prog_bar.set_description(info)

    # logging.info(info)

    # # save checkpoint for fair comparison & save runing time
    # test_acc = model._compute_accuracy(model._network, test_loader)
    # model.save_checkpoint(test_acc)
    # logging.info("Save checkpoint successfully!")

100%|██████████| 10/10 [00:03<00:00,  2.59it/s]


In [9]:
if len(model._multiple_gpus) > 1:
    model._network = model._network.module