In [1]:
import argparse
import os
import warnings
import yaml

import pytorch_lightning as pl
import torch
from pytorch_lightning.callbacks import TQDMProgressBar
from torchvision import datasets
from torch.utils.data import Dataset, DataLoader
from nanodet.data.collate import naive_collate
from nanodet.data.dataset import build_dataset
from nanodet.evaluator import build_evaluator
from nanodet.trainer.task import TrainingTask
from torchvision.transforms import ToTensor, ToPILImage
from nanodet.util import (
    NanoDetLightningLogger,
    cfg,
    convert_old_model,
    env_utils,
    load_config,
    load_model_weight,
    mkdir,
)

#Set logger and seed
logger = NanoDetLightningLogger('test')
pl.seed_everything(1234)

  from .autonotebook import tqdm as notebook_tqdm
Global seed set to 1234


1234

In [2]:
#Function to create the task configuration file required for training
def create_exp_cfg(yml_path, task):
    all_names = ["aereoplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
    #Load the YAML file
    with open(yml_path, 'r') as file:
        temp_cfg = yaml.safe_load(file)
    #Save dir of the model
    temp_cfg['save_dir'] = 'models/task' + str(task)
    #If base task, training and testing classes are the same
    if task == 0:
        temp_cfg['data']['train']['class_names'] = all_names[:15]
        temp_cfg['data']['val']['class_names'] = all_names[:15]
        temp_cfg['model']['arch']['head']['num_classes'] = 15
        temp_cfg['model']['arch']['aux_head']['num_classes'] = 15
    #Else, training only on task specific class, and testing on all classes
    else:
        temp_cfg['data']['train']['class_names'] = [all_names[14+task]]
        temp_cfg['data']['val']['class_names'] = all_names[:15+task]
        temp_cfg['model']['arch']['head']['num_classes'] = 15+task
        temp_cfg['model']['arch']['aux_head']['num_classes'] = 15+task
        temp_cfg['schedule']['load_model'] = 'models/task' + str(task-1) + '/model_best.ckpt'
    temp_cfg_name = 'cfg/task' + str(task) + '.yml'
    print(temp_cfg)
    #Save the new configuration file
    with open(temp_cfg_name, 'w') as file:
        yaml.safe_dump(temp_cfg, file)

In [3]:
#Learning stream
#task 0: train on first 15 classes, test on 15 classes
#task 1: train on class n°16, test on 16 classes
#task 2: train on class n°17, test on 17 classes
#task 3: train on class n°18, test on 18 classes
#task 4: train on class n°19, test on 19 classes
#task 5: train on class n°20, test on 20 classes
for task in range (0, 5):
    logger = NanoDetLightningLogger('run_logs/task'+str(task))
    logger.info("Starting task" + str(task))
    logger.info("Setting up data...")
    #Create the task configuration file based on the task number and load the configuration
    create_exp_cfg('cfg/VOC.yml', task)
    load_config(cfg, 'cfg/task' + str(task) + '.yml')
    #Build datasets and dataloaders based on the task configuration file
    train_dataset = build_dataset(cfg.data.train, "train")
    val_dataset = build_dataset(cfg.data.val, "test")
    evaluator = build_evaluator(cfg.evaluator, val_dataset)
    train_dataloader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=cfg.device.batchsize_per_gpu,
        shuffle=True,
        num_workers=cfg.device.workers_per_gpu,
        pin_memory=True,
        collate_fn=naive_collate,
        drop_last=True,
    )
    val_dataloader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=cfg.device.batchsize_per_gpu,
        shuffle=False,
        num_workers=cfg.device.workers_per_gpu,
        pin_memory=True,
        collate_fn=naive_collate,
        drop_last=False,
    )
    #Create the model based on the task configuration file
    logger.info("Creating model...")
    task = TrainingTask(cfg, evaluator)
    #Load the model weights if task is not 0
    if "load_model" in cfg.schedule:
        ckpt = torch.load(cfg.schedule.load_model)
        if "pytorch-lightning_version" not in ckpt:
            warnings.warn(
                "Warning! Old .pth checkpoint is deprecated. "
                "Convert the checkpoint with tools/convert_old_checkpoint.py "
            )
            ckpt = convert_old_model(ckpt)
        load_model_weight(task.model, ckpt, logger)
        logger.info("Loaded model weight from {}".format(cfg.schedule.load_model))
    model_resume_path = (
        os.path.join(cfg.save_dir, "model_last.ckpt")
        if "resume" in cfg.schedule
        else None
    )
    if cfg.device.gpu_ids == -1:
        logger.info("Using CPU training")
        accelerator, devices, strategy, precision = (
            "cpu",
            None,
            None,
            cfg.device.precision,
        )
    else:
        accelerator, devices, strategy, precision = (
            "gpu",
            cfg.device.gpu_ids,
            None,
            cfg.device.precision,
        )

    if devices and len(devices) > 1:
        strategy = "ddp"
        env_utils.set_multi_processing(distributed=True)

    trainer = pl.Trainer(
        default_root_dir=cfg.save_dir,
        max_epochs=cfg.schedule.total_epochs,
        check_val_every_n_epoch=cfg.schedule.val_intervals,
        accelerator=accelerator,
        devices=devices,
        log_every_n_steps=cfg.log.interval,
        num_sanity_val_steps=0,
        callbacks=[TQDMProgressBar(refresh_rate=0)],
        logger=logger,
        benchmark=cfg.get("cudnn_benchmark", True),
        gradient_clip_val=cfg.get("grad_clip", 0.0),
        strategy=strategy,
        precision=precision,
    )
    trainer.fit(task, train_dataloader, val_dataloader, ckpt_path=model_resume_path)


[1m[35m[NanoDet][0m[34m[01-29 12:40:28][0m[32mINFO:[0m[97mStarting task0[0m
[1m[35m[NanoDet][0m[34m[01-29 12:40:28][0m[32mINFO:[0m[97mStarting task0[0m
[1m[35m[NanoDet][0m[34m[01-29 12:40:28][0m[32mINFO:[0m[97mSetting up data...[0m
[1m[35m[NanoDet][0m[34m[01-29 12:40:28][0m[32mINFO:[0m[97mSetting up data...[0m


{'save_dir': 'models/task0', 'model': {'weight_averager': {'name': 'ExpMovingAverager', 'decay': 0.9998}, 'arch': {'name': 'NanoDetPlus', 'detach_epoch': 10, 'backbone': {'name': 'ShuffleNetV2', 'model_size': '1.0x', 'out_stages': [2, 3, 4], 'activation': 'LeakyReLU'}, 'fpn': {'name': 'GhostPAN', 'in_channels': [116, 232, 464], 'out_channels': 96, 'kernel_size': 5, 'num_extra_level': 1, 'use_depthwise': True, 'activation': 'LeakyReLU'}, 'head': {'name': 'NanoDetPlusHead', 'num_classes': 15, 'input_channel': 96, 'feat_channels': 96, 'stacked_convs': 2, 'kernel_size': 5, 'strides': [8, 16, 32, 64], 'activation': 'LeakyReLU', 'reg_max': 7, 'norm_cfg': {'type': 'BN'}, 'loss': {'loss_qfl': {'name': 'QualityFocalLoss', 'use_sigmoid': True, 'beta': 2.0, 'loss_weight': 1.0}, 'loss_dfl': {'name': 'DistributionFocalLoss', 'loss_weight': 0.25}, 'loss_bbox': {'name': 'GIoULoss', 'loss_weight': 2.0}}}, 'aux_head': {'name': 'SimpleConvHead', 'num_classes': 15, 'input_channel': 192, 'feat_channels': 



creating index...
index created!


[1m[35m[NanoDet][0m[34m[01-29 12:41:54][0m[32mINFO:[0m[97mCreating model...[0m
[1m[35m[NanoDet][0m[34m[01-29 12:41:54][0m[32mINFO:[0m[97mCreating model...[0m
INFO:NanoDet:Creating model...


creating index...
index created!
model size is  1.0x
init weights...
=> loading pretrained model https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth
Finish initialize NanoDet-Plus Head.


[1m[35m[NanoDet][0m[34m[01-29 12:41:54][0m[32mINFO:[0m[97mUsing CPU training[0m
[1m[35m[NanoDet][0m[34m[01-29 12:41:54][0m[32mINFO:[0m[97mUsing CPU training[0m
INFO:NanoDet:Using CPU training
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name      | Type        | Params
------------------------------------------
0 | model     | NanoDetPlus | 4.2 M 
1 | avg_model | NanoDetPlus | 4.2 M 
------------------------------------------
8.4 M     Trainable params
0         Non-trainable params
8.4 M     Total params
33.546    Total estimated model params size (MB)
[1m[35m[NanoDet][0m[34m[01-29 12:41:54][0m[32mINFO:[0m[97mWeight Averaging is enabled[0m
[1m[35m[NanoDet][0m[34m[01-29 12:41:54][0m[32mINFO:[0m[97mWeight Averaging is enabled[0m
INFO:NanoDet:Weight Averaging is enabled
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
[1m[3

{'save_dir': 'models/task1', 'model': {'weight_averager': {'name': 'ExpMovingAverager', 'decay': 0.9998}, 'arch': {'name': 'NanoDetPlus', 'detach_epoch': 10, 'backbone': {'name': 'ShuffleNetV2', 'model_size': '1.0x', 'out_stages': [2, 3, 4], 'activation': 'LeakyReLU'}, 'fpn': {'name': 'GhostPAN', 'in_channels': [116, 232, 464], 'out_channels': 96, 'kernel_size': 5, 'num_extra_level': 1, 'use_depthwise': True, 'activation': 'LeakyReLU'}, 'head': {'name': 'NanoDetPlusHead', 'num_classes': 16, 'input_channel': 96, 'feat_channels': 96, 'stacked_convs': 2, 'kernel_size': 5, 'strides': [8, 16, 32, 64], 'activation': 'LeakyReLU', 'reg_max': 7, 'norm_cfg': {'type': 'BN'}, 'loss': {'loss_qfl': {'name': 'QualityFocalLoss', 'use_sigmoid': True, 'beta': 2.0, 'loss_weight': 1.0}, 'loss_dfl': {'name': 'DistributionFocalLoss', 'loss_weight': 0.25}, 'loss_bbox': {'name': 'GIoULoss', 'loss_weight': 2.0}}}, 'aux_head': {'name': 'SimpleConvHead', 'num_classes': 16, 'input_channel': 192, 'feat_channels': 