From ad20c3edfb85f23d08fa0f05d58a81bca98a2652 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 23 Oct 2023 15:35:24 +0900 Subject: [PATCH 001/167] Modify classification augmentation config --- config/augmentation/classification.yaml | 37 ++++++------------------- 1 file changed, 8 insertions(+), 29 deletions(-) diff --git a/config/augmentation/classification.yaml b/config/augmentation/classification.yaml index d8ac19f95..cd1c22ef1 100644 --- a/config/augmentation/classification.yaml +++ b/config/augmentation/classification.yaml @@ -1,31 +1,10 @@ augmentation: img_size: &img_size 256 - hsv_h: ~ - hsv_s: ~ - hsv_v: ~ - degrees: ~ - translate: ~ - scale: ~ - max_scale: ~ - min_scale: ~ - crop_size_h: ~ - crop_size_w: ~ - resize_ratio0: ~ - resize_ratiof: ~ - resize_add: ~ - shear: ~ - perspective: ~ - flipud: ~ - fliplr: 0.5 - mosaic: ~ - mixup: 1.0 - copy_paste: ~ - mixup_alpha: 0.0 - cutmix_alpha: 0.0 - mixup_switch_prob: 0.5 - color_jitter: - brightness: ~ - contrast: ~ - saturation: ~ - hue: ~ - colorjitter_p: ~ \ No newline at end of file + augment_info: + - + name: RandomResizedCrop + size: *img_size + interpolation: bilinear + - + name: RandomHorizontalFlip + p: 0.5 From c1f9ca6a8863a39273d5c2029f103b26f061a875 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 23 Oct 2023 15:35:49 +0900 Subject: [PATCH 002/167] Add TRANSFORM_DICT --- .../dataloaders/augmentation/registry.py | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 src/netspresso_trainer/dataloaders/augmentation/registry.py diff --git a/src/netspresso_trainer/dataloaders/augmentation/registry.py b/src/netspresso_trainer/dataloaders/augmentation/registry.py new file mode 100644 index 000000000..fbb649664 --- /dev/null +++ b/src/netspresso_trainer/dataloaders/augmentation/registry.py @@ -0,0 +1,8 @@ +from typing import Dict, Callable + +from .custom import RandomHorizontalFlip, RandomResizedCrop + +TRANSFORM_DICT: Dict[str, Callable] = { + 'randomresizedcrop': RandomResizedCrop, + 'randomhorizontalflip': RandomHorizontalFlip +} From e7f5ea235bb064c0f56fb42c6edf444ab4380727 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 23 Oct 2023 15:36:46 +0900 Subject: [PATCH 003/167] Fix transforms_custom_train to get from config --- .../dataloaders/classification/transforms.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/netspresso_trainer/dataloaders/classification/transforms.py b/src/netspresso_trainer/dataloaders/classification/transforms.py index 9ac5f0004..639daf983 100644 --- a/src/netspresso_trainer/dataloaders/classification/transforms.py +++ b/src/netspresso_trainer/dataloaders/classification/transforms.py @@ -1,21 +1,29 @@ +import inspect from typing import Optional from torchvision.transforms.functional import InterpolationMode from ..augmentation import custom as TC +from ..augmentation.registry import TRANSFORM_DICT from ..utils.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD def transforms_custom_train(conf_augmentation): assert conf_augmentation.img_size > 32 - primary_tfl = [TC.RandomResizedCrop(conf_augmentation.img_size, interpolation=InterpolationMode.BILINEAR), - TC.RandomHorizontalFlip(p=conf_augmentation.fliplr) - ] - preprocess = [ + preprocess = [] + for augment in conf_augmentation.augment_info: + name = augment.name.lower() + transform_args = list(inspect.signature(TRANSFORM_DICT[name]).parameters) + transform_kwargs = {key:augment[key] for key in transform_args if hasattr(augment, key)} + + transform = TRANSFORM_DICT[name](**transform_kwargs) + preprocess.append(transform) + + preprocess = preprocess + [ TC.ToTensor(), TC.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD) ] - return TC.Compose(primary_tfl + preprocess) + return TC.Compose(preprocess) def transforms_custom_eval(conf_augmentation): From f2c8c0f2f15f7d9dbb36b01f50413d292a1d72b0 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 23 Oct 2023 15:37:09 +0900 Subject: [PATCH 004/167] Fix RandomResizedCrop to get str interpolation --- .../dataloaders/augmentation/custom.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/netspresso_trainer/dataloaders/augmentation/custom.py b/src/netspresso_trainer/dataloaders/augmentation/custom.py index b81861abb..9f51900a6 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/custom.py +++ b/src/netspresso_trainer/dataloaders/augmentation/custom.py @@ -7,6 +7,7 @@ import torch import torchvision.transforms as T import torchvision.transforms.functional as F +from torchvision.transforms.functional import InterpolationMode BBOX_CROP_KEEP_THRESHOLD = 0.2 MAX_RETRY = 5 @@ -275,6 +276,21 @@ def __repr__(self): class RandomResizedCrop(T.RandomResizedCrop): visualize = True + def __init__(self, + size, + scale=(0.08, 1.0), + ratio=(3.0 / 4.0, 4.0 / 3.0), + interpolation='bilinear', + antialias: Optional[bool]=None): + + inverse_modes_mapping = { + 'nearest': InterpolationMode.NEAREST, + 'bilinear': InterpolationMode.BILINEAR, + 'bicubic': InterpolationMode.BICUBIC, + } + interpolation = inverse_modes_mapping[interpolation] + super().__init__(size, scale, ratio, interpolation, antialias) + def _crop_bbox(self, bbox, i, j, h, w): area_original = (bbox[..., 2] - bbox[..., 0]) * (bbox[..., 3] - bbox[..., 1]) From 6b30a8f979727807da04bcaadfb1cfe932da0a97 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 23 Oct 2023 15:37:38 +0900 Subject: [PATCH 005/167] Add init --- src/netspresso_trainer/dataloaders/augmentation/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/netspresso_trainer/dataloaders/augmentation/__init__.py b/src/netspresso_trainer/dataloaders/augmentation/__init__.py index 0c893e748..e07ec14c0 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/__init__.py +++ b/src/netspresso_trainer/dataloaders/augmentation/__init__.py @@ -12,3 +12,4 @@ Resize, ToTensor, ) +from .registry import CUSTOM_TRANSFORM \ No newline at end of file From bf42a57df71ea4be8f1b3e62664d0a5dde4e4ed4 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 23 Oct 2023 15:39:18 +0900 Subject: [PATCH 006/167] Ruff fix --- src/netspresso_trainer/dataloaders/augmentation/registry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/netspresso_trainer/dataloaders/augmentation/registry.py b/src/netspresso_trainer/dataloaders/augmentation/registry.py index fbb649664..ec482fa5e 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/registry.py +++ b/src/netspresso_trainer/dataloaders/augmentation/registry.py @@ -1,4 +1,4 @@ -from typing import Dict, Callable +from typing import Callable, Dict from .custom import RandomHorizontalFlip, RandomResizedCrop From 4b631dcef4db1ed53300ebb3ce9a9113199e15e5 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 23 Oct 2023 15:45:22 +0900 Subject: [PATCH 007/167] Fix init --- src/netspresso_trainer/dataloaders/augmentation/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/netspresso_trainer/dataloaders/augmentation/__init__.py b/src/netspresso_trainer/dataloaders/augmentation/__init__.py index e07ec14c0..34ae87e36 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/__init__.py +++ b/src/netspresso_trainer/dataloaders/augmentation/__init__.py @@ -12,4 +12,4 @@ Resize, ToTensor, ) -from .registry import CUSTOM_TRANSFORM \ No newline at end of file +from .registry import TRANSFORM_DICT \ No newline at end of file From ce98a15c0b8951fd65653d7a40d2adb08f1948b8 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 1 Nov 2023 15:09:37 +0900 Subject: [PATCH 008/167] Change augment_info to recipe --- config/augmentation/classification.yaml | 2 +- src/netspresso_trainer/dataloaders/classification/transforms.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/config/augmentation/classification.yaml b/config/augmentation/classification.yaml index cd1c22ef1..840744236 100644 --- a/config/augmentation/classification.yaml +++ b/config/augmentation/classification.yaml @@ -1,6 +1,6 @@ augmentation: img_size: &img_size 256 - augment_info: + recipe: - name: RandomResizedCrop size: *img_size diff --git a/src/netspresso_trainer/dataloaders/classification/transforms.py b/src/netspresso_trainer/dataloaders/classification/transforms.py index 639daf983..123f3aa91 100644 --- a/src/netspresso_trainer/dataloaders/classification/transforms.py +++ b/src/netspresso_trainer/dataloaders/classification/transforms.py @@ -11,7 +11,7 @@ def transforms_custom_train(conf_augmentation): assert conf_augmentation.img_size > 32 preprocess = [] - for augment in conf_augmentation.augment_info: + for augment in conf_augmentation.recipe: name = augment.name.lower() transform_args = list(inspect.signature(TRANSFORM_DICT[name]).parameters) transform_kwargs = {key:augment[key] for key in transform_args if hasattr(augment, key)} From 357ddded4fd8cb160a14424a497cd4bc135a21ac Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 1 Nov 2023 15:32:28 +0900 Subject: [PATCH 009/167] Remove inspect --- .../dataloaders/classification/transforms.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/netspresso_trainer/dataloaders/classification/transforms.py b/src/netspresso_trainer/dataloaders/classification/transforms.py index 123f3aa91..b76aaae52 100644 --- a/src/netspresso_trainer/dataloaders/classification/transforms.py +++ b/src/netspresso_trainer/dataloaders/classification/transforms.py @@ -1,4 +1,3 @@ -import inspect from typing import Optional from torchvision.transforms.functional import InterpolationMode @@ -13,10 +12,10 @@ def transforms_custom_train(conf_augmentation): preprocess = [] for augment in conf_augmentation.recipe: name = augment.name.lower() - transform_args = list(inspect.signature(TRANSFORM_DICT[name]).parameters) - transform_kwargs = {key:augment[key] for key in transform_args if hasattr(augment, key)} - - transform = TRANSFORM_DICT[name](**transform_kwargs) + augment_kwargs = list(augment.keys()) + augment_kwargs.remove('name') + augment_kwargs = {k:augment[k] for k in augment_kwargs} + transform = TRANSFORM_DICT[name](**augment_kwargs) preprocess.append(transform) preprocess = preprocess + [ From 36454dae6791309584c262617af02c84d5fdbba1 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 1 Nov 2023 15:40:57 +0900 Subject: [PATCH 010/167] Update TRANSFORM_DICT --- .../dataloaders/augmentation/registry.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/netspresso_trainer/dataloaders/augmentation/registry.py b/src/netspresso_trainer/dataloaders/augmentation/registry.py index ec482fa5e..be0ab3ec4 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/registry.py +++ b/src/netspresso_trainer/dataloaders/augmentation/registry.py @@ -1,8 +1,13 @@ from typing import Callable, Dict -from .custom import RandomHorizontalFlip, RandomResizedCrop +from .custom import ColorJitter, Pad, RandomCrop, RandomHorizontalFlip, RandomResizedCrop, RandomVerticalFlip, Resize TRANSFORM_DICT: Dict[str, Callable] = { + 'colorjitter': ColorJitter, + 'pad': Pad, + 'randomcrop': RandomCrop, 'randomresizedcrop': RandomResizedCrop, - 'randomhorizontalflip': RandomHorizontalFlip + 'randomhorizontalflip': RandomHorizontalFlip, + 'randomVerticalFlip': RandomVerticalFlip, + 'resize': Resize, } From dbb324498ce42019861e57603c2b91b7bb81f6f8 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 1 Nov 2023 16:05:25 +0900 Subject: [PATCH 011/167] Change transforms to non task specific --- .../dataloaders/augmentation/transforms.py | 38 +++++++++++++++++++ src/netspresso_trainer/dataloaders/builder.py | 5 +-- .../dataloaders/registry.py | 12 ++---- 3 files changed, 43 insertions(+), 12 deletions(-) create mode 100644 src/netspresso_trainer/dataloaders/augmentation/transforms.py diff --git a/src/netspresso_trainer/dataloaders/augmentation/transforms.py b/src/netspresso_trainer/dataloaders/augmentation/transforms.py new file mode 100644 index 000000000..8e1b90517 --- /dev/null +++ b/src/netspresso_trainer/dataloaders/augmentation/transforms.py @@ -0,0 +1,38 @@ +from torchvision.transforms.functional import InterpolationMode + +from . import custom as TC +from .registry import TRANSFORM_DICT +from ..utils.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD + + +def transforms_custom_train(conf_augmentation): + assert conf_augmentation.img_size > 32 + preprocess = [] + for augment in conf_augmentation.recipe: + name = augment.name.lower() + augment_kwargs = list(augment.keys()) + augment_kwargs.remove('name') + augment_kwargs = {k:augment[k] for k in augment_kwargs} + transform = TRANSFORM_DICT[name](**augment_kwargs) + preprocess.append(transform) + + preprocess = preprocess + [ + TC.ToTensor(), + TC.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD) + ] + return TC.Compose(preprocess) + + +def transforms_custom_eval(conf_augmentation): + assert conf_augmentation.img_size > 32 + preprocess = [ + TC.Resize((conf_augmentation.img_size, conf_augmentation.img_size), + interpolation=InterpolationMode.BILINEAR), + TC.ToTensor(), + TC.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD) + ] + return TC.Compose(preprocess) + + +def create_transform(is_training=False): + return transforms_custom_train if is_training else transforms_custom_eval diff --git a/src/netspresso_trainer/dataloaders/builder.py b/src/netspresso_trainer/dataloaders/builder.py index f13a7feac..f0bc0271c 100644 --- a/src/netspresso_trainer/dataloaders/builder.py +++ b/src/netspresso_trainer/dataloaders/builder.py @@ -19,11 +19,10 @@ def build_dataset(conf_data, conf_augmentation, task: str, model_name: str): task = conf_data.task - assert task in CREATE_TRANSFORM, f"The given task `{task}` is not supported!" assert task in DATA_SAMPLER, f"Data sampler for {task} is not yet supported!" - train_transform = CREATE_TRANSFORM[task](model_name, is_training=True) - target_transform = CREATE_TRANSFORM[task](model_name, is_training=False) + train_transform = CREATE_TRANSFORM(is_training=True) + target_transform = CREATE_TRANSFORM(is_training=False) data_format = conf_data.format diff --git a/src/netspresso_trainer/dataloaders/registry.py b/src/netspresso_trainer/dataloaders/registry.py index bc71e992b..1ab25afea 100644 --- a/src/netspresso_trainer/dataloaders/registry.py +++ b/src/netspresso_trainer/dataloaders/registry.py @@ -1,26 +1,20 @@ from typing import Callable, Dict, Type -from .augmentation import custom as TC +from .augmentation.transforms import create_transform from .base import BaseCustomDataset, BaseDataSampler, BaseHFDataset from .classification import ( ClassficationDataSampler, ClassificationCustomDataset, ClassificationHFDataset, - create_transform_classification, ) -from .detection import DetectionCustomDataset, DetectionDataSampler, create_transform_detection +from .detection import DetectionCustomDataset, DetectionDataSampler from .segmentation import ( SegmentationCustomDataset, SegmentationDataSampler, SegmentationHFDataset, - create_transform_segmentation, ) -CREATE_TRANSFORM: Dict[str, Callable[..., Callable[..., TC.Compose]]] = { - 'classification': create_transform_classification, - 'segmentation': create_transform_segmentation, - 'detection': create_transform_detection -} +CREATE_TRANSFORM = create_transform CUSTOM_DATASET: Dict[str, Type[BaseCustomDataset]] = { 'classification': ClassificationCustomDataset, From cfdcd82fca39a8a3933f3a55f913e188f0471017 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 1 Nov 2023 18:14:16 +0900 Subject: [PATCH 012/167] Fix Resize --- .../dataloaders/augmentation/custom.py | 26 ++++++++++++++----- .../dataloaders/augmentation/transforms.py | 3 +-- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/src/netspresso_trainer/dataloaders/augmentation/custom.py b/src/netspresso_trainer/dataloaders/augmentation/custom.py index 6ec8b83ef..a40b4b495 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/custom.py +++ b/src/netspresso_trainer/dataloaders/augmentation/custom.py @@ -11,6 +11,12 @@ BBOX_CROP_KEEP_THRESHOLD = 0.2 MAX_RETRY = 5 +INVERSE_MODES_MAPPING = { + 'nearest': InterpolationMode.NEAREST, + 'bilinear': InterpolationMode.BILINEAR, + 'bicubic': InterpolationMode.BICUBIC, +} + class Compose: def __init__(self, transforms, additional_targets: Dict = None): @@ -92,6 +98,18 @@ def __repr__(self): class Resize(T.Resize): visualize = True + def __init__(self, size, interpolation='bilinear', max_size=None, antialias=None): + interpolation = INVERSE_MODES_MAPPING[interpolation] + + # TODO: There is logic error in forward. If `size` is int, this specify edge for shorter one. + # And, this is not match with bbox computing logic. + # Thus, automatically transform to sequence format for now, + # but this should be specified whether Resize receives sequence or int. + if isinstance(size, int): + size = [size, size] + + super().__init__(size, interpolation, max_size, antialias) + def forward(self, image, mask=None, bbox=None): w, h = image.size @@ -282,13 +300,7 @@ def __init__(self, ratio=(3.0 / 4.0, 4.0 / 3.0), interpolation='bilinear', antialias: Optional[bool]=None): - - inverse_modes_mapping = { - 'nearest': InterpolationMode.NEAREST, - 'bilinear': InterpolationMode.BILINEAR, - 'bicubic': InterpolationMode.BICUBIC, - } - interpolation = inverse_modes_mapping[interpolation] + interpolation = INVERSE_MODES_MAPPING[interpolation] super().__init__(size, scale, ratio, interpolation, antialias) def _crop_bbox(self, bbox, i, j, h, w): diff --git a/src/netspresso_trainer/dataloaders/augmentation/transforms.py b/src/netspresso_trainer/dataloaders/augmentation/transforms.py index 8e1b90517..788a42fc2 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/transforms.py +++ b/src/netspresso_trainer/dataloaders/augmentation/transforms.py @@ -26,8 +26,7 @@ def transforms_custom_train(conf_augmentation): def transforms_custom_eval(conf_augmentation): assert conf_augmentation.img_size > 32 preprocess = [ - TC.Resize((conf_augmentation.img_size, conf_augmentation.img_size), - interpolation=InterpolationMode.BILINEAR), + TC.Resize((conf_augmentation.img_size, conf_augmentation.img_size), interpolation='bilinear'), TC.ToTensor(), TC.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD) ] From d9a9d09d36ed2da4cc11620781155cf8a1031a12 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Thu, 2 Nov 2023 11:00:40 +0900 Subject: [PATCH 013/167] Revert model_name --- src/netspresso_trainer/dataloaders/augmentation/transforms.py | 2 +- src/netspresso_trainer/dataloaders/builder.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/netspresso_trainer/dataloaders/augmentation/transforms.py b/src/netspresso_trainer/dataloaders/augmentation/transforms.py index 788a42fc2..25bfd3152 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/transforms.py +++ b/src/netspresso_trainer/dataloaders/augmentation/transforms.py @@ -33,5 +33,5 @@ def transforms_custom_eval(conf_augmentation): return TC.Compose(preprocess) -def create_transform(is_training=False): +def create_transform(model_name: str, is_training=False): return transforms_custom_train if is_training else transforms_custom_eval diff --git a/src/netspresso_trainer/dataloaders/builder.py b/src/netspresso_trainer/dataloaders/builder.py index f0bc0271c..acba53526 100644 --- a/src/netspresso_trainer/dataloaders/builder.py +++ b/src/netspresso_trainer/dataloaders/builder.py @@ -21,8 +21,8 @@ def build_dataset(conf_data, conf_augmentation, task: str, model_name: str): assert task in DATA_SAMPLER, f"Data sampler for {task} is not yet supported!" - train_transform = CREATE_TRANSFORM(is_training=True) - target_transform = CREATE_TRANSFORM(is_training=False) + train_transform = CREATE_TRANSFORM(model_name, is_training=True) + target_transform = CREATE_TRANSFORM(model_name, is_training=False) data_format = conf_data.format From f3a11a66cbd212ffb9e0b732a1790c3517722a39 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Thu, 2 Nov 2023 11:06:20 +0900 Subject: [PATCH 014/167] Leave pidnet as exception --- .../dataloaders/augmentation/transforms.py | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/netspresso_trainer/dataloaders/augmentation/transforms.py b/src/netspresso_trainer/dataloaders/augmentation/transforms.py index 25bfd3152..47f2c6603 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/transforms.py +++ b/src/netspresso_trainer/dataloaders/augmentation/transforms.py @@ -33,5 +33,34 @@ def transforms_custom_eval(conf_augmentation): return TC.Compose(preprocess) +def train_transforms_pidnet(conf_augmentation): + preprocess = [] + for augment in conf_augmentation.recipe: + name = augment.name.lower() + augment_kwargs = list(augment.keys()) + augment_kwargs.remove('name') + augment_kwargs = {k:augment[k] for k in augment_kwargs} + transform = TRANSFORM_DICT[name](**augment_kwargs) + preprocess.append(transform) + + preprocess = preprocess + [ + TC.ToTensor(), + TC.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD) + ] + return TC.Compose(preprocess, additional_targets={'edge': 'mask'}) + + +def val_transforms_pidnet(conf_augmentation): + assert conf_augmentation.img_size > 32 + preprocess = [ + TC.Resize((conf_augmentation.img_size, conf_augmentation.img_size), interpolation='bilinear'), + TC.ToTensor(), + TC.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD) + ] + return TC.Compose(preprocess, additional_targets={'edge': 'mask'}) + + def create_transform(model_name: str, is_training=False): + if model_name == 'pidnet': + return train_transforms_pidnet if is_training else val_transforms_pidnet return transforms_custom_train if is_training else transforms_custom_eval From cedd226a79286279b3bce9e20ecd37394e3de8c6 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Thu, 2 Nov 2023 11:06:35 +0900 Subject: [PATCH 015/167] Change detection and segmentation aug config --- config/augmentation/detection.yaml | 34 +++------------------ config/augmentation/segmentation.yaml | 44 +++++++++------------------ 2 files changed, 20 insertions(+), 58 deletions(-) diff --git a/config/augmentation/detection.yaml b/config/augmentation/detection.yaml index 4dafab461..5adf7e5c4 100644 --- a/config/augmentation/detection.yaml +++ b/config/augmentation/detection.yaml @@ -1,31 +1,7 @@ augmentation: img_size: &img_size 512 - hsv_h: ~ - hsv_s: ~ - hsv_v: ~ - degrees: ~ - translate: ~ - scale: ~ - max_scale: 2048 - min_scale: 768 - crop_size_h: 512 - crop_size_w: 512 - resize_ratio0: 0.5 - resize_ratiof: 2.0 - resize_add: 1 - shear: ~ - perspective: ~ - flipud: ~ - fliplr: 0.5 - mosaic: ~ - mixup: ~ - copy_paste: ~ - mixup_alpha: ~ - cutmix_alpha: ~ - mixup_switch_prob: ~ - color_jitter: - brightness: 0.25 - contrast: 0.25 - saturation: 0.25 - hue: 0.1 - colorjitter_p: 0.5 \ No newline at end of file + recipe: + - + name: Resize + size: *img_size + interpolation: bilinear diff --git a/config/augmentation/segmentation.yaml b/config/augmentation/segmentation.yaml index 48dae02ff..a0f2c7a55 100644 --- a/config/augmentation/segmentation.yaml +++ b/config/augmentation/segmentation.yaml @@ -1,31 +1,17 @@ augmentation: img_size: &img_size 512 - hsv_h: ~ - hsv_s: ~ - hsv_v: ~ - degrees: ~ - translate: ~ - scale: ~ - max_scale: 1024 - min_scale: *img_size - crop_size_h: *img_size - crop_size_w: *img_size - resize_ratio0: 1.0 - resize_ratiof: 1.5 - resize_add: 1 - shear: ~ - perspective: ~ - flipud: ~ - fliplr: 0.5 - mosaic: ~ - mixup: ~ - copy_paste: ~ - mixup_alpha: ~ - cutmix_alpha: ~ - mixup_switch_prob: ~ - color_jitter: - brightness: 0.25 - contrast: 0.25 - saturation: 0.25 - hue: 0.1 - colorjitter_p: 0.5 \ No newline at end of file + recipe: + - + name: RandomResizedCrop + size: *img_size + interpolation: bilinear + - + name: RandomHorizontalFlip + p: 0.5 + - + name: ColorJitter + brightness: 0.25 + contrast: 0.25 + saturation: 0.25 + hue: 0.1 + p: 0.5 From 95c1a0b803417514ccdaccbe4b5569ed6c8e864a Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Thu, 2 Nov 2023 11:12:39 +0900 Subject: [PATCH 016/167] Move pidnet function --- .../dataloaders/augmentation/transforms.py | 25 ++++++++++++++++++- .../dataloaders/segmentation/huggingface.py | 2 +- .../dataloaders/segmentation/local.py | 2 +- 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/src/netspresso_trainer/dataloaders/augmentation/transforms.py b/src/netspresso_trainer/dataloaders/augmentation/transforms.py index 47f2c6603..d0948dc56 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/transforms.py +++ b/src/netspresso_trainer/dataloaders/augmentation/transforms.py @@ -1,9 +1,32 @@ -from torchvision.transforms.functional import InterpolationMode +import cv2 +import numpy as np +import PIL.Image as Image from . import custom as TC from .registry import TRANSFORM_DICT from ..utils.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD +EDGE_SIZE = 4 +Y_K_SIZE = 6 +X_K_SIZE = 6 + + +def reduce_label(label: np.ndarray) -> Image.Image: + label[label == 0] = 255 + label = label - 1 + label[label == 254] = 255 + return Image.fromarray(label) + + +def generate_edge(label: np.ndarray) -> Image.Image: + edge = cv2.Canny(label, 0.1, 0.2) + kernel = np.ones((EDGE_SIZE, EDGE_SIZE), np.uint8) + # edge_pad == True + edge = edge[Y_K_SIZE:-Y_K_SIZE, X_K_SIZE:-X_K_SIZE] + edge = np.pad(edge, ((Y_K_SIZE, Y_K_SIZE), (X_K_SIZE, X_K_SIZE)), mode='constant') + edge = (cv2.dilate(edge, kernel, iterations=1) > 50) * 1.0 + return Image.fromarray((edge.copy() * 255).astype(np.uint8)) + def transforms_custom_train(conf_augmentation): assert conf_augmentation.img_size > 32 diff --git a/src/netspresso_trainer/dataloaders/segmentation/huggingface.py b/src/netspresso_trainer/dataloaders/segmentation/huggingface.py index 8c494307c..e6a2cd2df 100644 --- a/src/netspresso_trainer/dataloaders/segmentation/huggingface.py +++ b/src/netspresso_trainer/dataloaders/segmentation/huggingface.py @@ -4,7 +4,7 @@ import PIL.Image as Image from ..base import BaseHFDataset -from ..segmentation.transforms import generate_edge, reduce_label +from ..augmentation.transforms import generate_edge, reduce_label class SegmentationHFDataset(BaseHFDataset): diff --git a/src/netspresso_trainer/dataloaders/segmentation/local.py b/src/netspresso_trainer/dataloaders/segmentation/local.py index 1ab8d3056..cb67e94a0 100644 --- a/src/netspresso_trainer/dataloaders/segmentation/local.py +++ b/src/netspresso_trainer/dataloaders/segmentation/local.py @@ -6,7 +6,7 @@ import PIL.Image as Image from ..base import BaseCustomDataset -from ..segmentation.transforms import generate_edge, reduce_label +from ..augmentation.transforms import generate_edge, reduce_label class SegmentationCustomDataset(BaseCustomDataset): From ec410c8b646228c52c70272acdedea97b2001b6b Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Thu, 2 Nov 2023 11:13:18 +0900 Subject: [PATCH 017/167] Remove task specific transforms --- .../dataloaders/classification/transforms.py | 40 ------ .../dataloaders/detection/transforms.py | 40 ------ .../dataloaders/segmentation/transforms.py | 119 ------------------ 3 files changed, 199 deletions(-) delete mode 100644 src/netspresso_trainer/dataloaders/classification/transforms.py delete mode 100644 src/netspresso_trainer/dataloaders/detection/transforms.py delete mode 100644 src/netspresso_trainer/dataloaders/segmentation/transforms.py diff --git a/src/netspresso_trainer/dataloaders/classification/transforms.py b/src/netspresso_trainer/dataloaders/classification/transforms.py deleted file mode 100644 index b76aaae52..000000000 --- a/src/netspresso_trainer/dataloaders/classification/transforms.py +++ /dev/null @@ -1,40 +0,0 @@ -from typing import Optional - -from torchvision.transforms.functional import InterpolationMode - -from ..augmentation import custom as TC -from ..augmentation.registry import TRANSFORM_DICT -from ..utils.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD - - -def transforms_custom_train(conf_augmentation): - assert conf_augmentation.img_size > 32 - preprocess = [] - for augment in conf_augmentation.recipe: - name = augment.name.lower() - augment_kwargs = list(augment.keys()) - augment_kwargs.remove('name') - augment_kwargs = {k:augment[k] for k in augment_kwargs} - transform = TRANSFORM_DICT[name](**augment_kwargs) - preprocess.append(transform) - - preprocess = preprocess + [ - TC.ToTensor(), - TC.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD) - ] - return TC.Compose(preprocess) - - -def transforms_custom_eval(conf_augmentation): - assert conf_augmentation.img_size > 32 - preprocess = [ - TC.Resize((conf_augmentation.img_size, conf_augmentation.img_size), - interpolation=InterpolationMode.BILINEAR), - TC.ToTensor(), - TC.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD) - ] - return TC.Compose(preprocess) - - -def create_transform_classification(model_name: str, is_training=False): - return transforms_custom_train if is_training else transforms_custom_eval diff --git a/src/netspresso_trainer/dataloaders/detection/transforms.py b/src/netspresso_trainer/dataloaders/detection/transforms.py deleted file mode 100644 index ac3090a59..000000000 --- a/src/netspresso_trainer/dataloaders/detection/transforms.py +++ /dev/null @@ -1,40 +0,0 @@ -from typing import Optional - -import cv2 -import numpy as np -import PIL.Image as Image - -from ..augmentation import custom as TC -from ..utils.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD - - -def train_transforms_efficientformer(conf_augmentation): - - crop_size_h = conf_augmentation.crop_size_h - crop_size_w = conf_augmentation.crop_size_w - - train_transforms_composed = TC.Compose([ - TC.Resize(size=(crop_size_h, crop_size_w)), - TC.ToTensor(), - TC.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD) - ]) - - return train_transforms_composed - -def val_transforms_efficientformer(conf_augmentation): - - crop_size_h = conf_augmentation.crop_size_h - crop_size_w = conf_augmentation.crop_size_w - - val_transforms_composed = TC.Compose([ - TC.Resize(size=(crop_size_h, crop_size_w)), - TC.ToTensor(), - TC.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD) - ]) - - return val_transforms_composed - -def create_transform_detection(model_name: str, is_training=False): - if is_training: - return train_transforms_efficientformer - return val_transforms_efficientformer diff --git a/src/netspresso_trainer/dataloaders/segmentation/transforms.py b/src/netspresso_trainer/dataloaders/segmentation/transforms.py deleted file mode 100644 index d4aa506c0..000000000 --- a/src/netspresso_trainer/dataloaders/segmentation/transforms.py +++ /dev/null @@ -1,119 +0,0 @@ -from typing import Optional - -import cv2 -import numpy as np -import PIL.Image as Image - -from ..augmentation import custom as TC -from ..utils.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD - -EDGE_SIZE = 4 -Y_K_SIZE = 6 -X_K_SIZE = 6 - - -def reduce_label(label: np.ndarray) -> Image.Image: - label[label == 0] = 255 - label = label - 1 - label[label == 254] = 255 - return Image.fromarray(label) - -def generate_edge(label: np.ndarray) -> Image.Image: - edge = cv2.Canny(label, 0.1, 0.2) - kernel = np.ones((EDGE_SIZE, EDGE_SIZE), np.uint8) - # edge_pad == True - edge = edge[Y_K_SIZE:-Y_K_SIZE, X_K_SIZE:-X_K_SIZE] - edge = np.pad(edge, ((Y_K_SIZE, Y_K_SIZE), (X_K_SIZE, X_K_SIZE)), mode='constant') - edge = (cv2.dilate(edge, kernel, iterations=1) > 50) * 1.0 - return Image.fromarray((edge.copy() * 255).astype(np.uint8)) - - -def train_transforms_segmentation(conf_augmentation): - - crop_size_h = conf_augmentation.crop_size_h - crop_size_w = conf_augmentation.crop_size_w - - scale_ratio = (conf_augmentation.resize_ratio0, conf_augmentation.resize_ratiof) - - train_transforms_composed = TC.Compose([ - TC.RandomResizedCrop((crop_size_h, crop_size_w), scale=scale_ratio, ratio=(1.0, 1.0)), - TC.RandomHorizontalFlip(p=conf_augmentation.fliplr), - TC.ColorJitter(brightness=conf_augmentation.color_jitter.brightness, - contrast=conf_augmentation.color_jitter.contrast, - saturation=conf_augmentation.color_jitter.saturation, - hue=conf_augmentation.color_jitter.hue, - p=conf_augmentation.color_jitter.colorjitter_p), - TC.ToTensor(), - TC.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD) - ]) - - return train_transforms_composed - -def val_transforms_segmentation(conf_augmentation): - - crop_size_h = conf_augmentation.crop_size_h - crop_size_w = conf_augmentation.crop_size_w - - val_transforms_composed = TC.Compose([ - TC.Resize((crop_size_h, crop_size_w)), - TC.ToTensor(), - TC.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD) - ]) - - return val_transforms_composed - - -def infer_transforms_segmentation(conf_augmentation): - return - - -def train_transforms_pidnet(conf_augmentation): - - crop_size_h = conf_augmentation.crop_size_h - crop_size_w = conf_augmentation.crop_size_w - - scale_ratio = (conf_augmentation.resize_ratio0, conf_augmentation.resize_ratiof) - - train_transforms_composed = TC.Compose( - [ - TC.RandomResizedCrop((crop_size_h, crop_size_w), scale=scale_ratio, ratio=(1.0, 1.0)), - TC.RandomHorizontalFlip(p=conf_augmentation.fliplr), - TC.ToTensor(), - TC.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD) - ], - additional_targets={'edge': 'mask'} - ) - - return train_transforms_composed - - -def val_transforms_pidnet(conf_augmentation): - - crop_size_h = conf_augmentation.crop_size_h - crop_size_w = conf_augmentation.crop_size_w - - val_transforms_composed = TC.Compose( - [ - TC.Resize((crop_size_h, crop_size_w)), - TC.ToTensor(), - TC.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD) - ], - additional_targets={'edge': 'mask'} - ) - - return val_transforms_composed - - -def infer_transforms_pidnet(conf_augmentation): - return - - -def create_transform_segmentation(model_name: str, is_training=False): - - if model_name == 'pidnet': - if is_training: - return train_transforms_pidnet - return val_transforms_pidnet - if is_training: - return train_transforms_segmentation - return val_transforms_segmentation From 9537d55795ee24a1bd35db4ab09bf34311d6e7c1 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Thu, 2 Nov 2023 11:39:37 +0900 Subject: [PATCH 018/167] Remove error --- src/netspresso_trainer/dataloaders/classification/__init__.py | 1 - src/netspresso_trainer/dataloaders/detection/__init__.py | 1 - src/netspresso_trainer/dataloaders/segmentation/__init__.py | 1 - 3 files changed, 3 deletions(-) diff --git a/src/netspresso_trainer/dataloaders/classification/__init__.py b/src/netspresso_trainer/dataloaders/classification/__init__.py index b297f1a01..d7d9a63f8 100644 --- a/src/netspresso_trainer/dataloaders/classification/__init__.py +++ b/src/netspresso_trainer/dataloaders/classification/__init__.py @@ -1,4 +1,3 @@ from .dataset import ClassficationDataSampler from .huggingface import ClassificationHFDataset from .local import ClassificationCustomDataset -from .transforms import create_transform_classification diff --git a/src/netspresso_trainer/dataloaders/detection/__init__.py b/src/netspresso_trainer/dataloaders/detection/__init__.py index 9234262ef..38587950d 100644 --- a/src/netspresso_trainer/dataloaders/detection/__init__.py +++ b/src/netspresso_trainer/dataloaders/detection/__init__.py @@ -1,3 +1,2 @@ from .dataset import DetectionDataSampler, detection_collate_fn from .local import DetectionCustomDataset -from .transforms import create_transform_detection diff --git a/src/netspresso_trainer/dataloaders/segmentation/__init__.py b/src/netspresso_trainer/dataloaders/segmentation/__init__.py index efed9d1d7..9d73a0300 100644 --- a/src/netspresso_trainer/dataloaders/segmentation/__init__.py +++ b/src/netspresso_trainer/dataloaders/segmentation/__init__.py @@ -1,4 +1,3 @@ from .dataset import SegmentationDataSampler from .huggingface import SegmentationHFDataset from .local import SegmentationCustomDataset -from .transforms import create_transform_segmentation From dc794b3584bbc4cb0ba3550ef3987dc6a070707c Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Thu, 2 Nov 2023 13:15:46 +0900 Subject: [PATCH 019/167] Update gradio demo --- demo/gradio_augmentation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/demo/gradio_augmentation.py b/demo/gradio_augmentation.py index e885dab7f..5fa0c901f 100644 --- a/demo/gradio_augmentation.py +++ b/demo/gradio_augmentation.py @@ -25,7 +25,7 @@ def summary_transform(phase, task, model_name, yaml_str): try: conf = OmegaConf.create(yaml_str) is_training = (phase == 'train') - transform = CREATE_TRANSFORM[task](model_name, is_training=is_training) + transform = CREATE_TRANSFORM(model_name, is_training=is_training) transform_composed = transform(conf.augmentation) return str(transform_composed) except Exception as e: @@ -37,7 +37,7 @@ def get_augmented_images(phase, task, model_name, yaml_str, test_image, try: conf = OmegaConf.create(yaml_str) is_training = (phase == 'train') - transform = CREATE_TRANSFORM[task](model_name, is_training=is_training) + transform = CREATE_TRANSFORM(model_name, is_training=is_training) transform_composed = transform(conf.augmentation) transformed_images = [transform_composed(test_image, From 2e1116f2ac1af1e66fd06114b24ea27915702ed6 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Thu, 2 Nov 2023 13:44:22 +0900 Subject: [PATCH 020/167] Fix typo --- src/netspresso_trainer/dataloaders/augmentation/registry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/netspresso_trainer/dataloaders/augmentation/registry.py b/src/netspresso_trainer/dataloaders/augmentation/registry.py index be0ab3ec4..c724aafa6 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/registry.py +++ b/src/netspresso_trainer/dataloaders/augmentation/registry.py @@ -8,6 +8,6 @@ 'randomcrop': RandomCrop, 'randomresizedcrop': RandomResizedCrop, 'randomhorizontalflip': RandomHorizontalFlip, - 'randomVerticalFlip': RandomVerticalFlip, + 'randomverticalflip': RandomVerticalFlip, 'resize': Resize, } From bfaf3dcaeb419375a696abd6752da0f093f262ae Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Thu, 2 Nov 2023 13:53:50 +0900 Subject: [PATCH 021/167] Update augmentation template config --- config/augmentation/template/common.yaml | 55 +++++++++++------------- 1 file changed, 25 insertions(+), 30 deletions(-) diff --git a/config/augmentation/template/common.yaml b/config/augmentation/template/common.yaml index 48dae02ff..f87d90bbd 100644 --- a/config/augmentation/template/common.yaml +++ b/config/augmentation/template/common.yaml @@ -1,31 +1,26 @@ augmentation: - img_size: &img_size 512 - hsv_h: ~ - hsv_s: ~ - hsv_v: ~ - degrees: ~ - translate: ~ - scale: ~ - max_scale: 1024 - min_scale: *img_size - crop_size_h: *img_size - crop_size_w: *img_size - resize_ratio0: 1.0 - resize_ratiof: 1.5 - resize_add: 1 - shear: ~ - perspective: ~ - flipud: ~ - fliplr: 0.5 - mosaic: ~ - mixup: ~ - copy_paste: ~ - mixup_alpha: ~ - cutmix_alpha: ~ - mixup_switch_prob: ~ - color_jitter: - brightness: 0.25 - contrast: 0.25 - saturation: 0.25 - hue: 0.1 - colorjitter_p: 0.5 \ No newline at end of file + img_size: &img_size ~ + recipe: + - + name: RandomResizedCrop + size: ~ + interpolation: bilinear + - + name: RandomHorizontalFlip + p: ~ + - + name: RandomVerticalFlip + p: ~ + - + name: ColorJitter + brightness: ~ + contrast: ~ + saturation: ~ + hue: ~ + p: ~ + - + name: Resize + size: ~ + - + name: Pad + padding: ~ \ No newline at end of file From c44211ac6edacfca110ce2d9e9f969782c133e28 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Thu, 2 Nov 2023 15:34:51 +0900 Subject: [PATCH 022/167] Update cfg module --- src/netspresso_trainer/cfg/__init__.py | 17 ++-- src/netspresso_trainer/cfg/augmentation.py | 93 +++++++++++----------- tools/config_test.py | 10 ++- 3 files changed, 58 insertions(+), 62 deletions(-) diff --git a/src/netspresso_trainer/cfg/__init__.py b/src/netspresso_trainer/cfg/__init__.py index 892dcd416..45c681b26 100644 --- a/src/netspresso_trainer/cfg/__init__.py +++ b/src/netspresso_trainer/cfg/__init__.py @@ -5,10 +5,13 @@ from .augmentation import ( AugmentationConfig, - ClassificationAugmentationConfig, ColorJitter, - DetectionAugmentationConfig, - SegmentationAugmentationConfig, + Pad, + RandomCrop, + RandomResizedCrop, + RandomHorizontalFlip, + RandomVerticalFlip, + Resize, ) from .data import ( DatasetConfig, @@ -47,12 +50,6 @@ ) from .training import ClassificationScheduleConfig, DetectionScheduleConfig, ScheduleConfig, SegmentationScheduleConfig -_AUGMENTATION_CONFIG_TYPE_DICT = { - 'classification': ClassificationAugmentationConfig, - 'segmentation': SegmentationAugmentationConfig, - 'detection': DetectionAugmentationConfig -} - _TRAINING_CONFIG_TYPE_DICT = { 'classification': ClassificationScheduleConfig, 'segmentation': SegmentationScheduleConfig, @@ -101,6 +98,6 @@ def __post_init__(self): if self.auto: if self.augmentation is None: - self.augmentation = _AUGMENTATION_CONFIG_TYPE_DICT[self.task]() + self.augmentation = AugmentationConfig() if self.training is None: self.training = _TRAINING_CONFIG_TYPE_DICT[self.task]() \ No newline at end of file diff --git a/src/netspresso_trainer/cfg/augmentation.py b/src/netspresso_trainer/cfg/augmentation.py index 8e9378b7f..a7ac0acff 100644 --- a/src/netspresso_trainer/cfg/augmentation.py +++ b/src/netspresso_trainer/cfg/augmentation.py @@ -1,12 +1,20 @@ from dataclasses import dataclass, field from pathlib import Path -from typing import Optional, Union +from typing import Optional, Union, List from omegaconf import MISSING, MissingMandatoryValue +DEFAULT_IMG_SIZE = 256 + @dataclass -class ColorJitter: +class Transform: + name: str = MissingMandatoryValue + + +@dataclass +class ColorJitter(Transform): + name: str = 'colorjitter' brightness: Optional[float] = 0.25 contrast: Optional[float] = 0.25 saturation: Optional[float] = 0.25 @@ -15,56 +23,45 @@ class ColorJitter: @dataclass -class AugmentationConfig: - img_size: int = 256 - max_scale: Optional[int] = 1024 - min_scale: Optional[int] = None - crop_size_h: Optional[int] = None - crop_size_w: Optional[int] = None - resize_ratio0: Optional[float] = None - resize_ratiof: Optional[float] = None - resize_add: Optional[float] = 1 - fliplr: Optional[float] = 0.5 - color_jitter: Optional[ColorJitter] = field(default_factory=lambda: ColorJitter()) - - +class Pad(Transform): + name: str = 'pad' + padding: Union[int, List] = 0 + @dataclass -class ClassificationAugmentationConfig(AugmentationConfig): - resize_ratio0 = None - resize_ratiof = None - resize_add = None - color_jitter = None +class RandomCrop(Transform): + name: str = 'randomcrop' + size: int = DEFAULT_IMG_SIZE + interpolation: Optional[str] = 'bilinear' @dataclass -class SegmentationAugmentationConfig(AugmentationConfig): - img_size = 512 - resize_ratio0 = 1.0 - resize_ratiof = 1.5 - - def __post_init__(self): - # variable interpolation - if self.min_scale is None: - self.min_scale = self.img_size - if self.crop_size_h is None: - self.crop_size_h = self.img_size - if self.crop_size_w is None: - self.crop_size_w = self.img_size - +class RandomResizedCrop(Transform): + name: str = 'randomresizedcrop' + size: int = DEFAULT_IMG_SIZE + interpolation: Optional[str] = 'bilinear' + @dataclass -class DetectionAugmentationConfig(AugmentationConfig): - img_size = 512 - max_scale = 2048 - min_scale = 768 - resize_ratio0: 0.5 - resize_ratiof: 2.0 - resize_add: 1 - - def __post_init__(self): - # variable interpolation - if self.crop_size_h is None: - self.crop_size_h = self.img_size - if self.crop_size_w is None: - self.crop_size_w = self.img_size \ No newline at end of file +class RandomHorizontalFlip(Transform): + name: str = 'randomhorizontalflip' + p: float = 0.5 + + +@dataclass +class RandomVerticalFlip(Transform): + name: str = 'randomverticalflip' + p: float = 0.5 + + +@dataclass +class Resize(Transform): + name: str = 'resize' + size: int = DEFAULT_IMG_SIZE + interpolation: Optional[str] = 'bilinear' + + +@dataclass +class AugmentationConfig: + img_size: int = DEFAULT_IMG_SIZE + recipe: List[Transform] = field(default_factory=list) diff --git a/tools/config_test.py b/tools/config_test.py index ea1c451d2..67ddc7826 100644 --- a/tools/config_test.py +++ b/tools/config_test.py @@ -8,13 +8,15 @@ if __name__ == "__main__": from netspresso_trainer.cfg import ( - ClassificationAugmentationConfig, + AugmentationConfig, ClassificationResNetModelConfig, ColorJitter, + RandomResizedCrop, + RandomHorizontalFlip, ExampleBeansDataset, ) - augmentation_config = ClassificationAugmentationConfig(color_jitter=ColorJitter(colorjitter_p=0.9)) + augmentation_config = AugmentationConfig(recipe=[RandomResizedCrop(), RandomHorizontalFlip(), ColorJitter()]) example_dataset = ExampleBeansDataset example_model = ClassificationResNetModelConfig() cfg = TrainerConfig( @@ -32,12 +34,12 @@ # OK: update value of subclass in the main dataclass cfg_new: TrainerConfig = deepcopy(cfg) - cfg_new.augmentation.color_jitter.saturation = 0.0 + cfg_new.augmentation.recipe[-1].saturation = 0.0 # print(OmegaConf.to_yaml(OmegaConf.structured(cfg_new))) # OK: update value from OmegaConf Config config_new: TrainerConfig = deepcopy(config) - config_new.augmentation.color_jitter.hue = 0.5 + cfg_new.augmentation.recipe[-1].hue = 0.5 # print(OmegaConf.to_yaml(config_new)) From 57953cdddef00a9d312bb7ec97e6bc1785d89a93 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Thu, 2 Nov 2023 15:44:33 +0900 Subject: [PATCH 023/167] Change config recipe name to lowercase --- config/augmentation/classification.yaml | 4 ++-- config/augmentation/detection.yaml | 2 +- config/augmentation/segmentation.yaml | 6 +++--- config/augmentation/template/common.yaml | 12 ++++++------ 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/config/augmentation/classification.yaml b/config/augmentation/classification.yaml index 840744236..115988d3b 100644 --- a/config/augmentation/classification.yaml +++ b/config/augmentation/classification.yaml @@ -2,9 +2,9 @@ augmentation: img_size: &img_size 256 recipe: - - name: RandomResizedCrop + name: randomresizedcrop size: *img_size interpolation: bilinear - - name: RandomHorizontalFlip + name: randomhorizontalflip p: 0.5 diff --git a/config/augmentation/detection.yaml b/config/augmentation/detection.yaml index 5adf7e5c4..099dd3655 100644 --- a/config/augmentation/detection.yaml +++ b/config/augmentation/detection.yaml @@ -2,6 +2,6 @@ augmentation: img_size: &img_size 512 recipe: - - name: Resize + name: resize size: *img_size interpolation: bilinear diff --git a/config/augmentation/segmentation.yaml b/config/augmentation/segmentation.yaml index a0f2c7a55..97b8630f7 100644 --- a/config/augmentation/segmentation.yaml +++ b/config/augmentation/segmentation.yaml @@ -2,14 +2,14 @@ augmentation: img_size: &img_size 512 recipe: - - name: RandomResizedCrop + name: randomresizedcrop size: *img_size interpolation: bilinear - - name: RandomHorizontalFlip + name: randomhorizontalflip p: 0.5 - - name: ColorJitter + name: colorjitter brightness: 0.25 contrast: 0.25 saturation: 0.25 diff --git a/config/augmentation/template/common.yaml b/config/augmentation/template/common.yaml index f87d90bbd..af96c0926 100644 --- a/config/augmentation/template/common.yaml +++ b/config/augmentation/template/common.yaml @@ -2,25 +2,25 @@ augmentation: img_size: &img_size ~ recipe: - - name: RandomResizedCrop + name: randomresizedcrop size: ~ interpolation: bilinear - - name: RandomHorizontalFlip + name: randomhorizontalflip p: ~ - - name: RandomVerticalFlip + name: randomverticalflip p: ~ - - name: ColorJitter + name: colorjitter brightness: ~ contrast: ~ saturation: ~ hue: ~ p: ~ - - name: Resize + name: resize size: ~ - - name: Pad + name: pad padding: ~ \ No newline at end of file From 86e732f47e86c41394b2a12938fd356ee8325dc4 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Thu, 2 Nov 2023 15:51:18 +0900 Subject: [PATCH 024/167] Ruff fix --- src/netspresso_trainer/cfg/__init__.py | 2 +- src/netspresso_trainer/cfg/augmentation.py | 2 +- src/netspresso_trainer/dataloaders/augmentation/transforms.py | 2 +- src/netspresso_trainer/dataloaders/segmentation/huggingface.py | 2 +- src/netspresso_trainer/dataloaders/segmentation/local.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/netspresso_trainer/cfg/__init__.py b/src/netspresso_trainer/cfg/__init__.py index 45c681b26..2aabdddfb 100644 --- a/src/netspresso_trainer/cfg/__init__.py +++ b/src/netspresso_trainer/cfg/__init__.py @@ -8,8 +8,8 @@ ColorJitter, Pad, RandomCrop, - RandomResizedCrop, RandomHorizontalFlip, + RandomResizedCrop, RandomVerticalFlip, Resize, ) diff --git a/src/netspresso_trainer/cfg/augmentation.py b/src/netspresso_trainer/cfg/augmentation.py index a7ac0acff..a06fa071b 100644 --- a/src/netspresso_trainer/cfg/augmentation.py +++ b/src/netspresso_trainer/cfg/augmentation.py @@ -1,6 +1,6 @@ from dataclasses import dataclass, field from pathlib import Path -from typing import Optional, Union, List +from typing import List, Optional, Union from omegaconf import MISSING, MissingMandatoryValue diff --git a/src/netspresso_trainer/dataloaders/augmentation/transforms.py b/src/netspresso_trainer/dataloaders/augmentation/transforms.py index d0948dc56..c96809881 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/transforms.py +++ b/src/netspresso_trainer/dataloaders/augmentation/transforms.py @@ -2,9 +2,9 @@ import numpy as np import PIL.Image as Image +from ..utils.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD from . import custom as TC from .registry import TRANSFORM_DICT -from ..utils.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD EDGE_SIZE = 4 Y_K_SIZE = 6 diff --git a/src/netspresso_trainer/dataloaders/segmentation/huggingface.py b/src/netspresso_trainer/dataloaders/segmentation/huggingface.py index e6a2cd2df..eeb6afc66 100644 --- a/src/netspresso_trainer/dataloaders/segmentation/huggingface.py +++ b/src/netspresso_trainer/dataloaders/segmentation/huggingface.py @@ -3,8 +3,8 @@ import numpy as np import PIL.Image as Image -from ..base import BaseHFDataset from ..augmentation.transforms import generate_edge, reduce_label +from ..base import BaseHFDataset class SegmentationHFDataset(BaseHFDataset): diff --git a/src/netspresso_trainer/dataloaders/segmentation/local.py b/src/netspresso_trainer/dataloaders/segmentation/local.py index cb67e94a0..aea1f350e 100644 --- a/src/netspresso_trainer/dataloaders/segmentation/local.py +++ b/src/netspresso_trainer/dataloaders/segmentation/local.py @@ -5,8 +5,8 @@ import numpy as np import PIL.Image as Image -from ..base import BaseCustomDataset from ..augmentation.transforms import generate_edge, reduce_label +from ..base import BaseCustomDataset class SegmentationCustomDataset(BaseCustomDataset): From 31372f08da14c4cca47e222a29fd94c379829e2a Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Thu, 2 Nov 2023 15:51:37 +0900 Subject: [PATCH 025/167] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 80a1bada4..ae55350ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ No changes to highlight. ## Breaking Changes: -No changes to highlight. +- Enable dataset augmentation customizing by `@illian01` in [PR 201](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/201) ## Other Changes: From 32e2b6cd1728227a6e58a6935d69a09cae2dc52b Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Fri, 3 Nov 2023 10:53:01 +0900 Subject: [PATCH 026/167] Fix MissingMandatoryValue to MISSING --- src/netspresso_trainer/cfg/augmentation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/netspresso_trainer/cfg/augmentation.py b/src/netspresso_trainer/cfg/augmentation.py index a06fa071b..195ad8849 100644 --- a/src/netspresso_trainer/cfg/augmentation.py +++ b/src/netspresso_trainer/cfg/augmentation.py @@ -9,7 +9,7 @@ @dataclass class Transform: - name: str = MissingMandatoryValue + name: str = MISSING @dataclass From 52327c0851cea7df0760ee531f5617d092fe6995 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Fri, 3 Nov 2023 11:00:35 +0900 Subject: [PATCH 027/167] Change default of AugmentationConfig recipe --- src/netspresso_trainer/cfg/augmentation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/netspresso_trainer/cfg/augmentation.py b/src/netspresso_trainer/cfg/augmentation.py index 195ad8849..eeae461c7 100644 --- a/src/netspresso_trainer/cfg/augmentation.py +++ b/src/netspresso_trainer/cfg/augmentation.py @@ -64,4 +64,4 @@ class Resize(Transform): @dataclass class AugmentationConfig: img_size: int = DEFAULT_IMG_SIZE - recipe: List[Transform] = field(default_factory=list) + recipe: List[Transform] = field(default_factory=[Transform()]) From c9c06a9c6c46c107e8425d36f7a37bdace1c6646 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Fri, 3 Nov 2023 11:19:04 +0900 Subject: [PATCH 028/167] Add default augmentation config for each task --- src/netspresso_trainer/cfg/__init__.py | 3 +++ src/netspresso_trainer/cfg/augmentation.py | 21 +++++++++++++++++++++ tools/config_test.py | 3 ++- 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/netspresso_trainer/cfg/__init__.py b/src/netspresso_trainer/cfg/__init__.py index 2aabdddfb..96e120e22 100644 --- a/src/netspresso_trainer/cfg/__init__.py +++ b/src/netspresso_trainer/cfg/__init__.py @@ -5,13 +5,16 @@ from .augmentation import ( AugmentationConfig, + ClassificationAugmentationConfig, ColorJitter, + DetectionAugmentationConfig, Pad, RandomCrop, RandomHorizontalFlip, RandomResizedCrop, RandomVerticalFlip, Resize, + SegmentationAugmentationConfig, ) from .data import ( DatasetConfig, diff --git a/src/netspresso_trainer/cfg/augmentation.py b/src/netspresso_trainer/cfg/augmentation.py index eeae461c7..dfff974c2 100644 --- a/src/netspresso_trainer/cfg/augmentation.py +++ b/src/netspresso_trainer/cfg/augmentation.py @@ -65,3 +65,24 @@ class Resize(Transform): class AugmentationConfig: img_size: int = DEFAULT_IMG_SIZE recipe: List[Transform] = field(default_factory=[Transform()]) + + +@dataclass +class ClassificationAugmentationConfig(AugmentationConfig): + img_size: int = 256 + recipe: List[Transform] = field(default_factory=lambda: [RandomResizedCrop(size=256), + RandomHorizontalFlip()]) + + +@dataclass +class SegmentationAugmentationConfig(AugmentationConfig): + img_size: int = 512 + recipe: List[Transform] = field(default_factory=lambda: [RandomResizedCrop(size=512), + RandomHorizontalFlip(), + ColorJitter()]) + + +@dataclass +class DetectionAugmentationConfig(AugmentationConfig): + img_size: int = 512 + recipe: List[Transform] = field(default_factory=lambda: [Resize(size=512)]) diff --git a/tools/config_test.py b/tools/config_test.py index 67ddc7826..10e345388 100644 --- a/tools/config_test.py +++ b/tools/config_test.py @@ -9,6 +9,7 @@ from netspresso_trainer.cfg import ( AugmentationConfig, + ClassificationAugmentationConfig, ClassificationResNetModelConfig, ColorJitter, RandomResizedCrop, @@ -16,7 +17,7 @@ ExampleBeansDataset, ) - augmentation_config = AugmentationConfig(recipe=[RandomResizedCrop(), RandomHorizontalFlip(), ColorJitter()]) + augmentation_config = ClassificationAugmentationConfig() example_dataset = ExampleBeansDataset example_model = ClassificationResNetModelConfig() cfg = TrainerConfig( From 1e79a587f214fbcebf9f31f7313fcb8e3e7c69b9 Mon Sep 17 00:00:00 2001 From: deepkyu Date: Thu, 2 Nov 2023 19:20:54 +0900 Subject: [PATCH 029/167] hotfix: reinit CUDA for safety --- src/netspresso_trainer/trainer_cli.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/netspresso_trainer/trainer_cli.py b/src/netspresso_trainer/trainer_cli.py index 56cde7b84..245e32901 100644 --- a/src/netspresso_trainer/trainer_cli.py +++ b/src/netspresso_trainer/trainer_cli.py @@ -5,6 +5,7 @@ from typing import Union from omegaconf import DictConfig, OmegaConf +import torch from netspresso_trainer.trainer_common import train_common @@ -136,6 +137,7 @@ def train_with_yaml_impl(gpus: Union[list, int], data: Union[Path, str], augment assert isinstance(gpus, (list, int)) gpu_ids_str = ','.join(map(str, gpus)) if isinstance(gpus, list) else str(gpus) os.environ['CUDA_VISIBLE_DEVICES'] = gpu_ids_str + torch.cuda.empty_cache() # Reinitialize CUDA to apply the change if isinstance(gpus, int): conf = set_arguments(data, augmentation, model, training, logging, environment) From 4647eae90844c0db4a1f9275d744ac6104701e9b Mon Sep 17 00:00:00 2001 From: deepkyu Date: Fri, 3 Nov 2023 11:52:33 +0900 Subject: [PATCH 030/167] #200 fix default_factory as 0-args func --- src/netspresso_trainer/cfg/augmentation.py | 32 ++++++++++++++-------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/src/netspresso_trainer/cfg/augmentation.py b/src/netspresso_trainer/cfg/augmentation.py index dfff974c2..fb51d02d6 100644 --- a/src/netspresso_trainer/cfg/augmentation.py +++ b/src/netspresso_trainer/cfg/augmentation.py @@ -12,6 +12,14 @@ class Transform: name: str = MISSING +@dataclass +class AugmentationConfig: + img_size: int = DEFAULT_IMG_SIZE + recipe: List[Transform] = field(default_factory=lambda: [ + Transform() + ]) + + @dataclass class ColorJitter(Transform): name: str = 'colorjitter' @@ -61,28 +69,28 @@ class Resize(Transform): interpolation: Optional[str] = 'bilinear' -@dataclass -class AugmentationConfig: - img_size: int = DEFAULT_IMG_SIZE - recipe: List[Transform] = field(default_factory=[Transform()]) - - @dataclass class ClassificationAugmentationConfig(AugmentationConfig): img_size: int = 256 - recipe: List[Transform] = field(default_factory=lambda: [RandomResizedCrop(size=256), - RandomHorizontalFlip()]) + recipe: List[Transform] = field(default_factory=lambda: [ + RandomResizedCrop(size=256), + RandomHorizontalFlip() + ]) @dataclass class SegmentationAugmentationConfig(AugmentationConfig): img_size: int = 512 - recipe: List[Transform] = field(default_factory=lambda: [RandomResizedCrop(size=512), - RandomHorizontalFlip(), - ColorJitter()]) + recipe: List[Transform] = field(default_factory=lambda: [ + RandomResizedCrop(size=512), + RandomHorizontalFlip(), + ColorJitter() + ]) @dataclass class DetectionAugmentationConfig(AugmentationConfig): img_size: int = 512 - recipe: List[Transform] = field(default_factory=lambda: [Resize(size=512)]) + recipe: List[Transform] = field(default_factory=lambda: [ + Resize(size=512) + ]) From b17d7c0ef695ce86a66dc295dca641fe9b0864d5 Mon Sep 17 00:00:00 2001 From: deepkyu Date: Fri, 3 Nov 2023 11:53:22 +0900 Subject: [PATCH 031/167] ruff fix --- src/netspresso_trainer/trainer_cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/netspresso_trainer/trainer_cli.py b/src/netspresso_trainer/trainer_cli.py index 245e32901..30f7fda32 100644 --- a/src/netspresso_trainer/trainer_cli.py +++ b/src/netspresso_trainer/trainer_cli.py @@ -4,8 +4,8 @@ from pathlib import Path from typing import Union -from omegaconf import DictConfig, OmegaConf import torch +from omegaconf import DictConfig, OmegaConf from netspresso_trainer.trainer_common import train_common From 8b0ec6be53637838018c001fc6dee390bb96e11c Mon Sep 17 00:00:00 2001 From: deepkyu Date: Fri, 3 Nov 2023 11:57:15 +0900 Subject: [PATCH 032/167] #216 add 'W' to extend-select [tool.ruff] --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 303021ee6..ab66ec722 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,6 +8,7 @@ extend-select = [ "I", "SIM", "INP001", + "W" ] ignore = [ From fc3ee3a09b98074723698e32a721c1ab514adaf1 Mon Sep 17 00:00:00 2001 From: deepkyu Date: Fri, 3 Nov 2023 11:59:41 +0900 Subject: [PATCH 033/167] #216 apply rule --- src/netspresso_trainer/__init__.py | 2 +- src/netspresso_trainer/cfg/__init__.py | 16 +++--- src/netspresso_trainer/cfg/data.py | 2 +- src/netspresso_trainer/cfg/model.py | 6 +-- src/netspresso_trainer/cfg/training.py | 2 +- .../dataloaders/augmentation/__init__.py | 2 +- .../dataloaders/augmentation/custom.py | 10 ++-- src/netspresso_trainer/dataloaders/base.py | 22 ++++---- .../dataloaders/classification/dataset.py | 54 +++++++++---------- .../dataloaders/detection/dataset.py | 22 ++++---- .../dataloaders/detection/local.py | 14 ++--- .../dataloaders/registry.py | 2 +- .../dataloaders/utils/constants.py | 2 +- .../dataloaders/utils/misc.py | 2 +- src/netspresso_trainer/loggers/base.py | 40 +++++++------- src/netspresso_trainer/loggers/builder.py | 8 +-- src/netspresso_trainer/loggers/csv.py | 8 +-- src/netspresso_trainer/loggers/image.py | 16 +++--- src/netspresso_trainer/loggers/netspresso.py | 36 ++++++------- src/netspresso_trainer/loggers/registry.py | 2 +- src/netspresso_trainer/loggers/stdout.py | 10 ++-- src/netspresso_trainer/loggers/tensorboard.py | 12 ++--- src/netspresso_trainer/loggers/visualizer.py | 8 +-- .../losses/classification/__init__.py | 2 +- .../losses/classification/label_smooth.py | 2 +- .../losses/classification/soft_target.py | 2 +- src/netspresso_trainer/losses/common.py | 2 +- .../losses/detection/__init__.py | 2 +- .../losses/detection/fastrcnn.py | 16 +++--- .../losses/detection/yolox.py | 14 ++--- src/netspresso_trainer/losses/registry.py | 2 +- .../losses/segmentation/pidnet.py | 6 +-- src/netspresso_trainer/metrics/__init__.py | 2 +- .../metrics/detection/metric.py | 2 +- src/netspresso_trainer/metrics/registry.py | 2 +- .../models/op/base_metaformer.py | 48 ++++++++--------- src/netspresso_trainer/models/op/custom.py | 52 +++++++++--------- src/netspresso_trainer/models/op/depth.py | 2 +- src/netspresso_trainer/models/op/registry.py | 2 +- src/netspresso_trainer/optimizers/__init__.py | 2 +- src/netspresso_trainer/optimizers/builder.py | 4 +- src/netspresso_trainer/optimizers/registry.py | 2 +- src/netspresso_trainer/pipelines/builder.py | 4 +- src/netspresso_trainer/pipelines/detection.py | 18 +++---- src/netspresso_trainer/pipelines/registry.py | 2 +- src/netspresso_trainer/schedulers/builder.py | 4 +- .../schedulers/cosine_lr.py | 6 +-- .../schedulers/cosine_warm_restart.py | 6 +-- src/netspresso_trainer/schedulers/poly_lr.py | 6 +-- src/netspresso_trainer/schedulers/registry.py | 2 +- src/netspresso_trainer/schedulers/step_lr.py | 2 +- src/netspresso_trainer/trainer_cli.py | 28 +++++----- src/netspresso_trainer/trainer_inline.py | 8 +-- src/netspresso_trainer/utils/logger.py | 4 +- src/netspresso_trainer/utils/stats.py | 4 +- 55 files changed, 279 insertions(+), 279 deletions(-) diff --git a/src/netspresso_trainer/__init__.py b/src/netspresso_trainer/__init__.py index 24b54002f..0c4517b21 100644 --- a/src/netspresso_trainer/__init__.py +++ b/src/netspresso_trainer/__init__.py @@ -10,4 +10,4 @@ version = (Path(__file__).parent / "VERSION").read_text().strip() -__version__ = version \ No newline at end of file +__version__ = version diff --git a/src/netspresso_trainer/cfg/__init__.py b/src/netspresso_trainer/cfg/__init__.py index 96e120e22..dd176a035 100644 --- a/src/netspresso_trainer/cfg/__init__.py +++ b/src/netspresso_trainer/cfg/__init__.py @@ -69,19 +69,19 @@ class TrainerConfig: training: Optional[ScheduleConfig] = None logging: LoggingConfig = field(default_factory=lambda: LoggingConfig()) environment: EnvironmentConfig = field(default_factory=lambda: EnvironmentConfig()) - + @property def epochs(self) -> int: return self.training.epochs - + @property def batch_size(self) -> int: return self.training.batch_size - + @property def num_workers(self) -> int: return self.environment.num_workers - + @epochs.setter def epochs(self, v: int) -> None: self.training.epochs = v @@ -89,18 +89,18 @@ def epochs(self, v: int) -> None: @batch_size.setter def batch_size(self, v: int) -> None: self.training.batch_size = v - + @num_workers.setter def num_workers(self, v: int) -> None: self.environment.num_workers = v - + def __post_init__(self): assert self.task in ['classification', 'segmentation', 'detection'] self.data.task = self.task self.model.task = self.task - + if self.auto: if self.augmentation is None: self.augmentation = AugmentationConfig() if self.training is None: - self.training = _TRAINING_CONFIG_TYPE_DICT[self.task]() \ No newline at end of file + self.training = _TRAINING_CONFIG_TYPE_DICT[self.task]() diff --git a/src/netspresso_trainer/cfg/data.py b/src/netspresso_trainer/cfg/data.py index 000624b1c..21e2abc54 100644 --- a/src/netspresso_trainer/cfg/data.py +++ b/src/netspresso_trainer/cfg/data.py @@ -262,4 +262,4 @@ class HuggingFaceSegmentationDatasetConfig(DatasetConfig): subset="full", features={"image": "image", "label": "artist"} ) -) \ No newline at end of file +) diff --git a/src/netspresso_trainer/cfg/model.py b/src/netspresso_trainer/cfg/model.py index c9d476043..4203c47b8 100644 --- a/src/netspresso_trainer/cfg/model.py +++ b/src/netspresso_trainer/cfg/model.py @@ -25,10 +25,10 @@ class ArchitectureConfig: full: Optional[Dict[str, Any]] = None backbone: Optional[Dict[str, Any]] = None head: Optional[Dict[str, Any]] = None - + def __post_init__(self): assert bool(self.full) != bool(self.backbone), "Only one of full or backbone should be given." - + @dataclass class ModelConfig: task: str = MISSING @@ -71,7 +71,7 @@ class EfficientFormerArchitectureConfig(ArchitectureConfig): class MobileNetV3ArchitectureConfig(ArchitectureConfig): backbone: Dict[str, Any] = field(default_factory=lambda: { "name": "mobilenetv3_small", - + # [in_channels, kernel, expended_channels, out_channels, use_se, activation, stride, dilation] "block_info": [ [ diff --git a/src/netspresso_trainer/cfg/training.py b/src/netspresso_trainer/cfg/training.py index a13be88fb..3c0c32ff6 100644 --- a/src/netspresso_trainer/cfg/training.py +++ b/src/netspresso_trainer/cfg/training.py @@ -32,4 +32,4 @@ class SegmentationScheduleConfig(ScheduleConfig): @dataclass class DetectionScheduleConfig(ScheduleConfig): - pass \ No newline at end of file + pass diff --git a/src/netspresso_trainer/dataloaders/augmentation/__init__.py b/src/netspresso_trainer/dataloaders/augmentation/__init__.py index 34ae87e36..624fcd12f 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/__init__.py +++ b/src/netspresso_trainer/dataloaders/augmentation/__init__.py @@ -12,4 +12,4 @@ Resize, ToTensor, ) -from .registry import TRANSFORM_DICT \ No newline at end of file +from .registry import TRANSFORM_DICT diff --git a/src/netspresso_trainer/dataloaders/augmentation/custom.py b/src/netspresso_trainer/dataloaders/augmentation/custom.py index a40b4b495..2e0673727 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/custom.py +++ b/src/netspresso_trainer/dataloaders/augmentation/custom.py @@ -29,7 +29,7 @@ def _get_transformed(self, image, mask, bbox, visualize_for_debug): for t in self.transforms: if visualize_for_debug and not t.visualize: continue - image, mask, bbox = t(image=image, mask=mask, bbox=bbox) + image, mask, bbox = t(image=image, mask=mask, bbox=bbox) return image, mask, bbox def __call__(self, image, mask=None, bbox=None, visualize_for_debug=False, **kwargs): @@ -103,7 +103,7 @@ def __init__(self, size, interpolation='bilinear', max_size=None, antialias=None # TODO: There is logic error in forward. If `size` is int, this specify edge for shorter one. # And, this is not match with bbox computing logic. - # Thus, automatically transform to sequence format for now, + # Thus, automatically transform to sequence format for now, # but this should be specified whether Resize receives sequence or int. if isinstance(size, int): size = [size, size] @@ -294,11 +294,11 @@ def __repr__(self): class RandomResizedCrop(T.RandomResizedCrop): visualize = True - def __init__(self, + def __init__(self, size, scale=(0.08, 1.0), ratio=(3.0 / 4.0, 4.0 / 3.0), - interpolation='bilinear', + interpolation='bilinear', antialias: Optional[bool]=None): interpolation = INVERSE_MODES_MAPPING[interpolation] super().__init__(size, scale, ratio, interpolation, antialias) @@ -379,4 +379,4 @@ def __call__(self, image, mask=None, bbox=None): return image, mask, bbox def __repr__(self): - return self.__class__.__name__ + "()" \ No newline at end of file + return self.__class__.__name__ + "()" diff --git a/src/netspresso_trainer/dataloaders/base.py b/src/netspresso_trainer/dataloaders/base.py index 5e1e8b116..e00bb5a1e 100644 --- a/src/netspresso_trainer/dataloaders/base.py +++ b/src/netspresso_trainer/dataloaders/base.py @@ -15,10 +15,10 @@ def __init__(self, conf_data, conf_augmentation, model_name, idx_to_class, split self.conf_data = conf_data self.conf_augmentation = conf_augmentation self.model_name = model_name - + self.transform = transform self.samples = samples - + self._root = conf_data.path.root self._idx_to_class = idx_to_class self._num_classes = len(self._idx_to_class) @@ -47,12 +47,12 @@ def root(self): @property def mode(self): return self._split - + @property def with_label(self): return self._with_label - - + + class BaseHFDataset(data.Dataset): def __init__(self, conf_data, conf_augmentation, model_name, root, split, with_label): @@ -64,7 +64,7 @@ def __init__(self, conf_data, conf_augmentation, model_name, root, split, with_l self._split = split self._with_label = with_label - def _load_dataset(self, root, subset_name=None, cache_dir=None): + def _load_dataset(self, root, subset_name=None, cache_dir=None): from datasets import load_dataset if cache_dir is not None: Path(cache_dir).mkdir(exist_ok=True, parents=True) @@ -94,7 +94,7 @@ def root(self): @property def mode(self): return self._split - + @property def with_label(self): return self._with_label @@ -104,15 +104,15 @@ class BaseDataSampler(ABC): def __init__(self, conf_data, train_valid_split_ratio): self.conf_data = conf_data self.train_valid_split_ratio = train_valid_split_ratio - + @abstractmethod def load_data(self): raise NotImplementedError - + @abstractmethod def load_samples(self): raise NotImplementedError - + @abstractmethod def load_huggingface_samples(self): - raise NotImplementedError \ No newline at end of file + raise NotImplementedError diff --git a/src/netspresso_trainer/dataloaders/classification/dataset.py b/src/netspresso_trainer/dataloaders/classification/dataset.py index cd42daa2a..bed9088c5 100644 --- a/src/netspresso_trainer/dataloaders/classification/dataset.py +++ b/src/netspresso_trainer/dataloaders/classification/dataset.py @@ -16,21 +16,21 @@ logger = logging.getLogger("netspresso_trainer") VALID_IMG_EXTENSIONS = IMG_EXTENSIONS + tuple((x.upper() for x in IMG_EXTENSIONS)) - + def load_class_map_with_id_mapping(root_dir, train_dir, map_or_filename: Optional[Union[str, Path]]=None, id_mapping: Optional[Dict[str, str]]=None): if map_or_filename is None: # may be labeled with directory - # dir -> + # dir -> dir_list = [x.name for x in Path(train_dir).iterdir() if x.is_dir()] dir_to_class = id_mapping if id_mapping is not None else {k: k for k in dir_list} # id_mapping or identity - + class_list = [dir_to_class[dir] for dir in dir_list] class_list = sorted(class_list, key=lambda k: natural_key(k)) _class_to_idx = {class_name: class_idx for class_idx, class_name in enumerate(class_list)} idx_to_class = {v: k for k, v in _class_to_idx.items()} - + file_or_dir_to_idx = {dir: _class_to_idx[dir_to_class[dir]] for dir in dir_list} # dir -> idx return file_or_dir_to_idx, idx_to_class @@ -45,9 +45,9 @@ def load_class_map_with_id_mapping(root_dir, train_dir, reader = csv.DictReader(csvfile) file_class_list = [{column: str(row[column]).strip() for column in ['image_id', 'class']} for row in reader] - + class_stats = Counter([x['class'] for x in file_class_list]) - + _class_to_idx = {class_name: class_idx for class_idx, class_name in enumerate(sorted(class_stats, key=lambda k: natural_key(k)))} idx_to_class = {v: k for k, v in _class_to_idx.items()} @@ -62,26 +62,26 @@ def is_file_dict(image_dir: Union[Path, str], file_or_dir_to_idx): file_or_dir: Path = image_dir / candidate_name if file_or_dir.exists(): return file_or_dir.is_file() - + file_candidates = list(image_dir.glob(f"{candidate_name}.*")) assert len(file_candidates) != 0, f"Unknown label format! Is there any something file like {file_or_dir} ?" - + return True class ClassficationDataSampler(BaseDataSampler): def __init__(self, conf_data, train_valid_split_ratio): super(ClassficationDataSampler, self).__init__(conf_data, train_valid_split_ratio) - + def load_data(self, file_or_dir_to_idx, split='train'): data_root = Path(self.conf_data.path.root) split_dir = self.conf_data.path[split] image_dir: Path = data_root / split_dir.image - + images_and_targets: List[Dict[str, Optional[Union[str, int]]]] = [] - + assert split in ['train', 'valid', 'test'], f"split should be either {['train', 'valid', 'test']}" if split in ['train', 'valid']: - + if is_file_dict(image_dir, file_or_dir_to_idx): file_to_idx = file_or_dir_to_idx for file in chain(image_dir.glob(f'*{ext}') for ext in VALID_IMG_EXTENSIONS): @@ -92,7 +92,7 @@ def load_data(self, file_or_dir_to_idx, split='train'): images_and_targets.append({'image': str(file), 'label': file_to_idx[file.stem]}) continue logger.debug(f"Found file wihtout label: {file}") - + else: dir_to_idx = file_or_dir_to_idx for dir_name, dir_idx in dir_to_idx.items(): @@ -103,24 +103,24 @@ def load_data(self, file_or_dir_to_idx, split='train'): else: # split == test for ext in VALID_IMG_EXTENSIONS: images_and_targets.extend([{'image': str(file), 'label': None} for file in image_dir.glob(f'*{ext}')]) - + images_and_targets = sorted(images_and_targets, key=lambda k: natural_key(k['image'])) return images_and_targets - + def load_samples(self): assert self.conf_data.path.train.image is not None root_dir = Path(self.conf_data.path.root) train_dir = root_dir / self.conf_data.path.train.image id_mapping: Optional[dict] = dict(self.conf_data.id_mapping) if self.conf_data.id_mapping is not None else None file_or_dir_to_idx, idx_to_class = load_class_map_with_id_mapping(root_dir, train_dir, map_or_filename=self.conf_data.path.train.label, id_mapping=id_mapping) - + exists_valid = self.conf_data.path.valid.image is not None exists_test = self.conf_data.path.test.image is not None - + valid_samples = None test_samples = None - + train_samples = self.load_data(file_or_dir_to_idx, split='train') if exists_valid: valid_samples = self.load_data(file_or_dir_to_idx, split='valid') @@ -128,16 +128,16 @@ def load_samples(self): test_samples = self.load_data(file_or_dir_to_idx, split='test') if not exists_valid: - num_train_splitted = int(len(train_samples) * self.train_valid_split_ratio) + num_train_splitted = int(len(train_samples) * self.train_valid_split_ratio) train_samples, valid_samples = \ random_split(train_samples, [num_train_splitted, len(train_samples) - num_train_splitted], generator=torch.Generator().manual_seed(42)) - + return train_samples, valid_samples, test_samples, {'idx_to_class': idx_to_class} - + def load_huggingface_samples(self): from datasets import ClassLabel, load_dataset - + cache_dir = self.conf_data.metadata.custom_cache_dir root = self.conf_data.metadata.repo subset_name = self.conf_data.metadata.subset @@ -145,23 +145,23 @@ def load_huggingface_samples(self): cache_dir = Path(cache_dir) Path(cache_dir).mkdir(exist_ok=True, parents=True) total_dataset = load_dataset(root, name=subset_name, cache_dir=cache_dir) - + label_feature_name = self.conf_data.metadata.features.label label_feature = total_dataset['train'].features[label_feature_name] if isinstance(label_feature, ClassLabel): labels: List[str] = label_feature.names else: labels = list({sample[label_feature_name] for sample in total_dataset['train']}) - + if isinstance(labels[0], int): # TODO: find class_map <-> idx and apply it (ex. using id_mapping) idx_to_class: Dict[int, int] = {k: k for k in labels} elif isinstance(labels[0], str): idx_to_class: Dict[int, str] = dict(enumerate(labels)) - + exists_valid = 'validation' in total_dataset exists_test = 'test' in total_dataset - + train_samples = total_dataset['train'] valid_samples = None if exists_valid: @@ -174,4 +174,4 @@ def load_huggingface_samples(self): splitted_datasets = train_samples.train_test_split(test_size=(1 - self.train_valid_split_ratio)) train_samples = splitted_datasets['train'] valid_samples = splitted_datasets['test'] - return train_samples, valid_samples, test_samples, {'idx_to_class': idx_to_class} \ No newline at end of file + return train_samples, valid_samples, test_samples, {'idx_to_class': idx_to_class} diff --git a/src/netspresso_trainer/dataloaders/detection/dataset.py b/src/netspresso_trainer/dataloaders/detection/dataset.py index fd6dbcf2c..769843754 100644 --- a/src/netspresso_trainer/dataloaders/detection/dataset.py +++ b/src/netspresso_trainer/dataloaders/detection/dataset.py @@ -49,7 +49,7 @@ def detection_collate_fn(original_batch): class DetectionDataSampler(BaseDataSampler): def __init__(self, conf_data, train_valid_split_ratio): super(DetectionDataSampler, self).__init__(conf_data, train_valid_split_ratio) - + def load_data(self, split='train'): data_root = Path(self.conf_data.path.root) split_dir = self.conf_data.path[split] @@ -71,7 +71,7 @@ def load_data(self, split='train'): images = sorted(images, key=lambda k: natural_key(k)) labels = sorted(labels, key=lambda k: natural_key(k)) images_and_targets.extend([{'image': str(image), 'label': str(label)} for image, label in zip(images, labels)]) - + elif split == 'test': for ext in IMG_EXTENSIONS: images_and_targets.extend([{'image': str(file), 'label': None} @@ -79,21 +79,21 @@ def load_data(self, split='train'): images_and_targets = sorted(images_and_targets, key=lambda k: natural_key(k['image'])) else: raise AssertionError(f"split should be either {['train', 'valid', 'test']}") - + return images_and_targets - + def load_samples(self): assert self.conf_data.path.train.image is not None assert self.conf_data.id_mapping is not None id_mapping: Optional[list] = list(self.conf_data.id_mapping) idx_to_class = load_custom_class_map(id_mapping=id_mapping) - + exists_valid = self.conf_data.path.valid.image is not None exists_test = self.conf_data.path.test.image is not None - + valid_samples = None test_samples = None - + train_samples = self.load_data(split='train') if exists_valid: valid_samples = self.load_data(split='valid') @@ -101,12 +101,12 @@ def load_samples(self): test_samples = self.load_data(split='test') if not exists_valid: - num_train_splitted = int(len(train_samples) * self.train_valid_split_ratio) + num_train_splitted = int(len(train_samples) * self.train_valid_split_ratio) train_samples, valid_samples = \ random_split(train_samples, [num_train_splitted, len(train_samples) - num_train_splitted], generator=torch.Generator().manual_seed(42)) - + return train_samples, valid_samples, test_samples, {'idx_to_class': idx_to_class} - + def load_huggingface_samples(self): - raise NotImplementedError \ No newline at end of file + raise NotImplementedError diff --git a/src/netspresso_trainer/dataloaders/detection/local.py b/src/netspresso_trainer/dataloaders/detection/local.py index 2ba297627..3dbbe8427 100644 --- a/src/netspresso_trainer/dataloaders/detection/local.py +++ b/src/netspresso_trainer/dataloaders/detection/local.py @@ -23,13 +23,13 @@ def exist_name(candidate, folder_iterable): def get_label(label_file: Path): target = Path(label_file).read_text() - + try: target_array = np.array([list(map(float, box.split(' '))) for box in target.split('\n') if box.strip()]) except ValueError as e: print(target) raise e - + label, boxes = target_array[:, 0], target_array[:, 1:] label = label[..., np.newaxis] return label, boxes @@ -43,7 +43,7 @@ def __init__(self, conf_data, conf_augmentation, model_name, idx_to_class, conf_data, conf_augmentation, model_name, idx_to_class, split, samples, transform, with_label, **kwargs ) - + @staticmethod def xywhn2xyxy(original: np.ndarray, w: int, h: int, padw=0, padh=0): converted = original.copy() @@ -67,12 +67,12 @@ def __getitem__(self, index): if ann_path is None: out = self.transform(self.conf_augmentation)(image=img) return {'pixel_values': out['image'], 'name': img_path.name, 'org_img': org_img, 'org_shape': (h, w)} - + outputs = {} label, boxes_yolo = get_label(Path(ann_path)) boxes = self.xywhn2xyxy(boxes_yolo, w, h) - + out = self.transform(self.conf_augmentation)(image=img, bbox=np.concatenate((boxes, label), axis=-1)) assert out['bbox'].shape[-1] == 5 # ltrb + class_label outputs.update({'pixel_values': out['image'], 'bbox': out['bbox'][..., :4], @@ -83,6 +83,6 @@ def __getitem__(self, index): return outputs assert self._split in ['val', 'valid', 'test'] - # outputs.update({'org_img': org_img, 'org_shape': (h, w)}) # TODO: return org_img with batch_size > 1 + # outputs.update({'org_img': org_img, 'org_shape': (h, w)}) # TODO: return org_img with batch_size > 1 outputs.update({'org_shape': (h, w)}) - return outputs \ No newline at end of file + return outputs diff --git a/src/netspresso_trainer/dataloaders/registry.py b/src/netspresso_trainer/dataloaders/registry.py index 1ab25afea..7ff3cc424 100644 --- a/src/netspresso_trainer/dataloaders/registry.py +++ b/src/netspresso_trainer/dataloaders/registry.py @@ -31,4 +31,4 @@ 'classification': ClassficationDataSampler, 'segmentation': SegmentationDataSampler, 'detection': DetectionDataSampler -} \ No newline at end of file +} diff --git a/src/netspresso_trainer/dataloaders/utils/constants.py b/src/netspresso_trainer/dataloaders/utils/constants.py index b017eb440..91d73f299 100644 --- a/src/netspresso_trainer/dataloaders/utils/constants.py +++ b/src/netspresso_trainer/dataloaders/utils/constants.py @@ -1,4 +1,4 @@ -DEFAULT_CROP_PCT = 0.95 #0.875 +DEFAULT_CROP_PCT = 0.95 #0.875 IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406) IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225) IMAGENET_INCEPTION_MEAN = (0.5, 0.5, 0.5) diff --git a/src/netspresso_trainer/dataloaders/utils/misc.py b/src/netspresso_trainer/dataloaders/utils/misc.py index 0fab867f2..3fb736217 100644 --- a/src/netspresso_trainer/dataloaders/utils/misc.py +++ b/src/netspresso_trainer/dataloaders/utils/misc.py @@ -19,4 +19,4 @@ def expand_to_chs(x, n): def natural_key(string_): """See http://www.codinghorror.com/blog/archives/001018.html""" - return [int(s) if s.isdigit() else s for s in re.split(r'(\d+)', string_.lower())] \ No newline at end of file + return [int(s) if s.isdigit() else s for s in re.split(r'(\d+)', string_.lower())] diff --git a/src/netspresso_trainer/loggers/base.py b/src/netspresso_trainer/loggers/base.py index dffa31d6c..6dd321da3 100644 --- a/src/netspresso_trainer/loggers/base.py +++ b/src/netspresso_trainer/loggers/base.py @@ -13,41 +13,41 @@ def __init__(self, model, result_dir): self.model = model self.csv_path = Path(result_dir) / CSV_FILENAME self.header: List = [] - + self._temp_row_dict = {} - + if self.csv_path.exists(): self.csv_path.unlink() - + self._epoch = None - + @property @abstractmethod def key_map(self) -> Dict[str, str]: raise NotImplementedError - + def init_epoch(self): self._epoch = 0 - + @property def epoch(self): return self._epoch - + @epoch.setter def epoch(self, value: int) -> None: self._epoch = int(value) - + def update_header(self, header: List): assert len(header) != 0 self.header = header - + with open(self.csv_path, 'a') as f: f.write(",".join(self.header)) f.write("\n") def _clear_temp(self): self._temp_row_dict = {} - + def _update_with_list(self, data: List): if data is not None and len(data) != 0: with open(self.csv_path, 'a') as f: @@ -55,18 +55,18 @@ def _update_with_list(self, data: List): f.write("\n") self._clear_temp() return - + def _update_specific(self, data: Dict): for _key, _value in data.items(): if _key not in self.header: raise AssertionError(f"The given key ({_key}) is not in {self.header}!") if _key not in self._temp_row_dict: self._temp_row_dict[_key] = _value - + if set(self.header) == set(self._temp_row_dict.keys()): self._update_with_list([self._temp_row_dict[_col] for _col in self.header]) return - + def update(self, data=None, **kwargs): if isinstance(data, List): return self._update_with_list(data) @@ -74,9 +74,9 @@ def update(self, data=None, **kwargs): return self._update_specific(data) # if isinstance(data, type(None)): # return self._update_specific(kwargs) - + raise AssertionError(f"Type of data should be either List or Dict! Current: {type(data)}") - + def _convert_as_csv_record(self, scalar_dict: Dict, prefix: Literal['train', 'valid'] = 'train'): converted_dict = {} for k, v in scalar_dict.items(): @@ -84,25 +84,25 @@ def _convert_as_csv_record(self, scalar_dict: Dict, prefix: Literal['train', 'va continue record_key = self.key_map[f"{prefix}/{k}"] assert record_key in self.header, f"{record_key} not in {self.header}" - + converted_dict.update({record_key: v}) return converted_dict - + def __call__(self, train_losses, train_metrics, valid_losses=None, valid_metrics=None): assert len(self.header) != 0 assert len(self.key_map) != 0 - + csv_record_dict = {'epoch': self._epoch} converted_train_losses = self._convert_as_csv_record(train_losses, prefix='train') converted_train_metrics = self._convert_as_csv_record(train_metrics, prefix='train') csv_record_dict.update(converted_train_losses) csv_record_dict.update(converted_train_metrics) - + if valid_losses is not None: converted_valid_losses = self._convert_as_csv_record(valid_losses, prefix='valid') csv_record_dict.update(converted_valid_losses) if valid_metrics is not None: converted_valid_metrics = self._convert_as_csv_record(valid_metrics, prefix='valid') csv_record_dict.update(converted_valid_metrics) - + self.update(csv_record_dict) diff --git a/src/netspresso_trainer/loggers/builder.py b/src/netspresso_trainer/loggers/builder.py index 912044bb7..63f602536 100644 --- a/src/netspresso_trainer/loggers/builder.py +++ b/src/netspresso_trainer/loggers/builder.py @@ -62,16 +62,16 @@ def __init__( step_per_epoch=step_per_epoch, num_sample_images=num_sample_images) if self.use_tensorboard else None self.stdout_logger: Optional[StdOutLogger] = \ StdOutLogger(task=task, model=model, total_epochs=conf.training.epochs) if self.use_stdout else None - + self.netspresso_api_client = None if self.use_netspresso: from loggers.netspresso import ModelSearchServerHandler self.netspresso_api_client: Optional[ModelSearchServerHandler] = ModelSearchServerHandler(task=task, model=model) - + if task in VISUALIZER: pallete = conf.data.pallete if 'pallete' in conf.data else None self.label_converter = VISUALIZER[task](class_map=class_map, pallete=pallete) - + @property def result_dir(self): return self._result_dir @@ -117,7 +117,7 @@ def _convert_imagedict_as_readable(self, images_dict: Dict): for k, v in images_dict.items(): if k == 'images': continue - + # target, pred, bg_gt v = v[:self.num_sample_images] v_new: np.ndarray = magic_image_handler( diff --git a/src/netspresso_trainer/loggers/csv.py b/src/netspresso_trainer/loggers/csv.py index 943acb29a..d82f629ae 100644 --- a/src/netspresso_trainer/loggers/csv.py +++ b/src/netspresso_trainer/loggers/csv.py @@ -6,7 +6,7 @@ class ClassificationCSVLogger(BaseCSVLogger): def __init__(self, model, result_dir): super(ClassificationCSVLogger, self).__init__(model, result_dir) self.update_header(self.csv_header) - + self._key_map = { 'epoch': 'epoch', 'train/total': 'train_loss', @@ -14,7 +14,7 @@ def __init__(self, model, result_dir): 'train/Acc@1': 'train_accuracy', 'valid/Acc@1': 'valid_accuracy', } - + @property def key_map(self): return self._key_map @@ -24,7 +24,7 @@ class SegmentationCSVLogger(BaseCSVLogger): def __init__(self, model, result_dir): super(SegmentationCSVLogger, self).__init__(model, result_dir) self.update_header(self.csv_header) - + self._key_map = { 'epoch': 'epoch', 'train/total': 'train_loss', @@ -35,4 +35,4 @@ def __init__(self, model, result_dir): @property def key_map(self): - return self._key_map \ No newline at end of file + return self._key_map diff --git a/src/netspresso_trainer/loggers/image.py b/src/netspresso_trainer/loggers/image.py index 68f6c23e9..cf9bb0e14 100644 --- a/src/netspresso_trainer/loggers/image.py +++ b/src/netspresso_trainer/loggers/image.py @@ -12,32 +12,32 @@ def __init__(self, model, result_dir) -> None: self.save_dir: Path = Path(result_dir) / "result_image" self.save_dir.mkdir(exist_ok=True) self._epoch = None - + def init_epoch(self): self._epoch = 0 - + @property def epoch(self): return self._epoch - + @epoch.setter def epoch(self, value: int) -> None: self._epoch = int(value) - + def save_ndarray_as_image(self, image_array: np.ndarray, filename: Union[str, Path], dataformats: Literal['HWC', 'CHW'] = 'HWC'): assert image_array.ndim == 3 if dataformats != 'HWC' and dataformats == 'CHW': image_array = image_array.transpose((1, 2, 0)) - + # HWC assert image_array.shape[-1] in [1, 3] Image.fromarray(image_array.astype(np.uint8)).save(filename) return True - + def save_result(self, image_dict: Dict, prefix='train'): prefix_dir: Path = self.save_dir / prefix prefix_dir.mkdir(exist_ok=True) - + for k, v in image_dict.items(): assert isinstance(v, np.ndarray) assert v.ndim in [3, 4], \ @@ -53,5 +53,5 @@ def __call__(self, train_images=None, valid_images=None): self.save_result(train_images, prefix='train') if valid_images is not None: self.save_result(valid_images, prefix='valid') - + diff --git a/src/netspresso_trainer/loggers/netspresso.py b/src/netspresso_trainer/loggers/netspresso.py index 8402c6f1f..589a7503d 100644 --- a/src/netspresso_trainer/loggers/netspresso.py +++ b/src/netspresso_trainer/loggers/netspresso.py @@ -7,7 +7,7 @@ logger = logging.getLogger("netspresso_trainer") -MONGODB_TEMP_URI = "" +MONGODB_TEMP_URI = "" class ModelSearchServerHandler: @@ -19,27 +19,27 @@ def __init__(self, task, model, mongodb_uri: str=MONGODB_TEMP_URI) -> None: logger.debug("Pinged your deployment. You successfully connected to MongoDB!") except Exception as e: raise e - + self._db = client['custom-training-board']['trainer-all-in-one'] self._session_id = None - + self._create_session(title=f"[{task}]{model}") - - + + def init_epoch(self): self._epoch = 0 - + @property def epoch(self): return self._epoch - + @epoch.setter def epoch(self, value: int) -> None: self._epoch = int(value) - + def _is_ready(self): return self._session_id is not None - + def _append(self, scalar_dict, mode='train'): assert self._is_ready() meta_string = f"{mode}/" if mode is not None else "" @@ -48,38 +48,38 @@ def _append(self, scalar_dict, mode='train'): '$currentDate': {'lastModified': True }} result = self._db.update_one({'_id': self._session_id}, contents, upsert=True) return result - + def _create_session(self, title: str ="test") -> ObjectId: example_document = { "title": title } document = self._db.insert_one(example_document) self._session_id = document.inserted_id return self._session_id - + def create_session(self, title: str="test") -> ObjectId: return self._create_session(title=title) - + def log_scalar(self, key, value, mode='train'): result = self._append({key: value}, mode=mode) return result - + def log_scalars_with_dict(self, scalar_dict, mode='train'): result = self._append(scalar_dict, mode=mode) return result - + def __call__(self, train_losses, train_metrics, valid_losses, valid_metrics, learning_rate, elapsed_time, ) -> None: - + self.log_scalars_with_dict(train_losses, mode='train') self.log_scalars_with_dict(train_metrics, mode='train') - + if valid_losses is not None: self.log_scalars_with_dict(valid_losses, mode='valid') if valid_metrics is not None: self.log_scalars_with_dict(valid_metrics, mode='valid') - + if learning_rate is not None: self.log_scalar('learning_rate', learning_rate, mode='misc') if elapsed_time is not None: - self.log_scalar('elapsed_time', elapsed_time, mode='misc') \ No newline at end of file + self.log_scalar('elapsed_time', elapsed_time, mode='misc') diff --git a/src/netspresso_trainer/loggers/registry.py b/src/netspresso_trainer/loggers/registry.py index 1ba8aad6c..b8c485900 100644 --- a/src/netspresso_trainer/loggers/registry.py +++ b/src/netspresso_trainer/loggers/registry.py @@ -9,4 +9,4 @@ VISUALIZER = { 'segmentation': SegmentationVisualizer, 'detection': DetectionVisualizer, -} \ No newline at end of file +} diff --git a/src/netspresso_trainer/loggers/stdout.py b/src/netspresso_trainer/loggers/stdout.py index aa0e99ff0..7e3d653da 100644 --- a/src/netspresso_trainer/loggers/stdout.py +++ b/src/netspresso_trainer/loggers/stdout.py @@ -11,21 +11,21 @@ def __init__(self, task, model, total_epochs=None) -> None: self.task = task self.model_name = model self.total_epochs = total_epochs if total_epochs is not None else "???" - + def init_epoch(self): self._epoch = 0 - + @property def epoch(self): return self._epoch - + @epoch.setter def epoch(self, value: int) -> None: self._epoch = int(value) - + def __call__(self, train_losses, train_metrics, valid_losses, valid_metrics, learning_rate, elapsed_time): logger.info(f"Epoch: {self._epoch} / {self.total_epochs}") - + if learning_rate is not None: logger.info(f"learning rate: {learning_rate:.7f}") if elapsed_time is not None: diff --git a/src/netspresso_trainer/loggers/tensorboard.py b/src/netspresso_trainer/loggers/tensorboard.py index 3ebc9c60f..e905b0ddb 100644 --- a/src/netspresso_trainer/loggers/tensorboard.py +++ b/src/netspresso_trainer/loggers/tensorboard.py @@ -70,14 +70,14 @@ def log_image(self, key, value: Union[np.ndarray, torch.Tensor], mode='train'): def log_images_with_dict(self, image_dict, mode='train'): for k, v in image_dict.items(): self._log_image(k, v, mode) - + def _get_rasterized_hparam(self, hparams): if not isinstance(hparams, dict): stem = hparams if not isinstance(hparams, (int, float, str, bool, torch.Tensor)): return str(stem) return stem - + rasterized_dict = {} for key, value in hparams.items(): if isinstance(value, dict): @@ -90,15 +90,15 @@ def _get_rasterized_hparam(self, hparams): return rasterized_dict def log_hparams(self, hp_omegaconf: Union[Dict, List], final_metrics=None): - + if final_metrics is None: final_metrics = {} final_metrics = {f"hparams_metrics/{k}": v for k, v in final_metrics.items()} - + hp_dict = OmegaConf.to_container(hp_omegaconf, resolve=True) hp_for_log = self._get_rasterized_hparam(hp_dict) - - exp, ssi, sei = hparams(hparam_dict=hp_for_log, metric_dict=final_metrics) + + exp, ssi, sei = hparams(hparam_dict=hp_for_log, metric_dict=final_metrics) self.tensorboard.file_writer.add_summary(exp) self.tensorboard.file_writer.add_summary(ssi) self.tensorboard.file_writer.add_summary(sei) diff --git a/src/netspresso_trainer/loggers/visualizer.py b/src/netspresso_trainer/loggers/visualizer.py index 3d11934bd..349dcd688 100644 --- a/src/netspresso_trainer/loggers/visualizer.py +++ b/src/netspresso_trainer/loggers/visualizer.py @@ -55,7 +55,7 @@ def _convert(self, gray_image): return color_image def __call__(self, results: List[Tuple[np.ndarray, np.ndarray]], images=None): - + return_images = [] for image, result in zip(images, results): image = image.copy() @@ -75,12 +75,12 @@ def __call__(self, results: List[Tuple[np.ndarray, np.ndarray]], images=None): text_w, text_h = text_size image = cv2.rectangle(image, (x1, y1-5-text_h), (x1+text_w, y1), color=color, thickness=-1) image = cv2.putText(image, str(class_name), (x1, y1-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) - + return_images.append(image[np.newaxis, ...]) return_images = np.concatenate(return_images, axis=0) return return_images - - + + class SegmentationVisualizer: def __init__(self, class_map, pallete=None): n = len(class_map) diff --git a/src/netspresso_trainer/losses/classification/__init__.py b/src/netspresso_trainer/losses/classification/__init__.py index f072e4cc1..6f89bba8f 100644 --- a/src/netspresso_trainer/losses/classification/__init__.py +++ b/src/netspresso_trainer/losses/classification/__init__.py @@ -1,2 +1,2 @@ from .label_smooth import LabelSmoothingCrossEntropy -from .soft_target import SoftTargetCrossEntropy \ No newline at end of file +from .soft_target import SoftTargetCrossEntropy diff --git a/src/netspresso_trainer/losses/classification/label_smooth.py b/src/netspresso_trainer/losses/classification/label_smooth.py index 61e2e3773..495d13543 100644 --- a/src/netspresso_trainer/losses/classification/label_smooth.py +++ b/src/netspresso_trainer/losses/classification/label_smooth.py @@ -19,4 +19,4 @@ def forward(self, out: torch.Tensor, target: torch.Tensor) -> torch.Tensor: nll_loss = nll_loss.squeeze(1) smooth_loss = -logprobs.mean(dim=-1) loss = self.confidence * nll_loss + self.smoothing * smooth_loss - return loss.mean() \ No newline at end of file + return loss.mean() diff --git a/src/netspresso_trainer/losses/classification/soft_target.py b/src/netspresso_trainer/losses/classification/soft_target.py index 2dfc8cd07..8f25bc9c8 100644 --- a/src/netspresso_trainer/losses/classification/soft_target.py +++ b/src/netspresso_trainer/losses/classification/soft_target.py @@ -12,4 +12,4 @@ def __init__(self): def forward(self, out: Dict, target: torch.Tensor) -> torch.Tensor: pred = out['pred'] loss = torch.sum(-target * F.log_softmax(pred, dim=-1), dim=-1) - return loss.mean() \ No newline at end of file + return loss.mean() diff --git a/src/netspresso_trainer/losses/common.py b/src/netspresso_trainer/losses/common.py index eda3b8ba4..5c7b4e2ad 100644 --- a/src/netspresso_trainer/losses/common.py +++ b/src/netspresso_trainer/losses/common.py @@ -13,4 +13,4 @@ def __init__(self, ignore_index, **kwargs) -> None: def forward(self, out: Dict, target: torch.Tensor) -> torch.Tensor: pred = out['pred'] loss = self.loss_fn(pred, target) - return loss \ No newline at end of file + return loss diff --git a/src/netspresso_trainer/losses/detection/__init__.py b/src/netspresso_trainer/losses/detection/__init__.py index 24d6942e2..2d5f5d05b 100644 --- a/src/netspresso_trainer/losses/detection/__init__.py +++ b/src/netspresso_trainer/losses/detection/__init__.py @@ -1,2 +1,2 @@ from .fastrcnn import RoiHeadLoss, RPNLoss -from .yolox import YOLOXLoss \ No newline at end of file +from .yolox import YOLOXLoss diff --git a/src/netspresso_trainer/losses/detection/fastrcnn.py b/src/netspresso_trainer/losses/detection/fastrcnn.py index 976a6098f..28b2ab18c 100644 --- a/src/netspresso_trainer/losses/detection/fastrcnn.py +++ b/src/netspresso_trainer/losses/detection/fastrcnn.py @@ -12,7 +12,7 @@ class RoiHeadLoss(nn.Module): def __init__(self) -> None: super().__init__() - + @staticmethod def forward(out: torch.Tensor, target: torch.Tensor) -> torch.Tensor: class_logits, box_regression, labels, regression_targets =\ @@ -43,10 +43,10 @@ def forward(out: torch.Tensor, target: torch.Tensor) -> torch.Tensor: "loss_classifier": classification_loss, "loss_box_reg": box_loss } - + # TODO: return as dict return sum(losses.values()) - + class RPNLoss(nn.Module): def __init__(self, box_fg_iou_thresh=0.5, @@ -54,7 +54,7 @@ def __init__(self, box_batch_size_per_image=512, box_positive_fraction=0.25) -> None: super().__init__() - + self.box_coder = det_utils.BoxCoder(weights=(1.0, 1.0, 1.0, 1.0)) self.box_similarity = box_ops.box_iou self.proposal_matcher = det_utils.Matcher( @@ -63,7 +63,7 @@ def __init__(self, allow_low_quality_matches=True, ) self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(box_batch_size_per_image, box_positive_fraction) - + def _assign_targets_to_anchors(self, anchors: List[Tensor], targets: List[Dict[str, Tensor]] ) -> Tuple[List[Tensor], List[Tensor]]: @@ -100,7 +100,7 @@ def _assign_targets_to_anchors(self, anchors: List[Tensor], targets: List[Dict[s labels.append(labels_per_image) matched_gt_boxes.append(matched_gt_boxes_per_image) return labels, matched_gt_boxes - + def _compute_loss(self, objectness: Tensor, pred_bbox_deltas: Tensor, labels: List[Tensor], regression_targets: List[Tensor] ) -> Tuple[Tensor, Tensor]: """ @@ -137,7 +137,7 @@ def _compute_loss(self, objectness: Tensor, pred_bbox_deltas: Tensor, labels: Li objectness_loss = F.binary_cross_entropy_with_logits(objectness[sampled_inds], labels[sampled_inds]) return objectness_loss, box_loss - + def forward(self, out: torch.Tensor, target: torch.Tensor) -> torch.Tensor: anchors, objectness, pred_bbox_deltas = out['anchors'], out['objectness'], out['pred_bbox_deltas'] labels, matched_gt_boxes = self._assign_targets_to_anchors(anchors, target) @@ -150,4 +150,4 @@ def forward(self, out: torch.Tensor, target: torch.Tensor) -> torch.Tensor: "loss_rpn_box_reg": loss_rpn_box_reg, } # TODO: return as dict - return sum(losses.values()) \ No newline at end of file + return sum(losses.values()) diff --git a/src/netspresso_trainer/losses/detection/yolox.py b/src/netspresso_trainer/losses/detection/yolox.py index f593cc0a2..7fcd8d534 100644 --- a/src/netspresso_trainer/losses/detection/yolox.py +++ b/src/netspresso_trainer/losses/detection/yolox.py @@ -47,7 +47,7 @@ def __init__(self, **kwargs) -> None: super(YOLOXLoss, self).__init__() self.bcewithlog_loss = nn.BCEWithLogitsLoss(reduction="none") self.iou_loss = IOUloss(reduction="none") - + def forward(self, out: List, target: Dict) -> torch.Tensor: x_shifts = [] @@ -90,10 +90,10 @@ def forward(self, out: List, target: Dict) -> torch.Tensor: [], dtype=out[0].dtype, ) - + # TODO: return as dict return total_loss - + def get_losses( self, imgs, @@ -263,7 +263,7 @@ def get_losses( #loss_l1, num_fg / max(num_gts, 1), ) - + @torch.no_grad() def get_assignments( self, @@ -354,7 +354,7 @@ def get_assignments( matched_gt_inds, num_fg, ) - + def get_geometry_constraint( self, gt_bboxes_per_image, expanded_strides, x_shifts, y_shifts, ): @@ -385,7 +385,7 @@ def get_geometry_constraint( geometry_relation = is_in_centers[:, anchor_filter] return anchor_filter, geometry_relation - + def simota_matching(self, cost, pair_wise_ious, gt_classes, num_gt, fg_mask): matching_matrix = torch.zeros_like(cost, dtype=torch.uint8) @@ -419,7 +419,7 @@ def simota_matching(self, cost, pair_wise_ious, gt_classes, num_gt, fg_mask): fg_mask_inboxes ] return num_fg, gt_matched_classes, pred_ious_this_matching, matched_gt_inds - + def get_output_and_grid(self, output, k, stride, dtype): grid = self.grids[k] diff --git a/src/netspresso_trainer/losses/registry.py b/src/netspresso_trainer/losses/registry.py index 61d8f554d..82f60e869 100644 --- a/src/netspresso_trainer/losses/registry.py +++ b/src/netspresso_trainer/losses/registry.py @@ -15,4 +15,4 @@ 'yolox_loss': YOLOXLoss, } -PHASE_LIST = ['train', 'valid', 'test'] \ No newline at end of file +PHASE_LIST = ['train', 'valid', 'test'] diff --git a/src/netspresso_trainer/losses/segmentation/pidnet.py b/src/netspresso_trainer/losses/segmentation/pidnet.py index 2bdc7cab0..736ab018c 100644 --- a/src/netspresso_trainer/losses/segmentation/pidnet.py +++ b/src/netspresso_trainer/losses/segmentation/pidnet.py @@ -26,7 +26,7 @@ def __init__(self, ignore_index=IGNORE_INDEX_NONE_VALUE, weight=None): self.boundary_aware = False def _forward(self, out: torch.Tensor, target: torch.Tensor): - + return self.loss_fn(out, target) def forward(self, out: Dict, target: torch.Tensor): @@ -36,7 +36,7 @@ def forward(self, out: Dict, target: torch.Tensor): filler = torch.ones_like(target) * self.ignore_index bd_label = torch.where(torch.sigmoid(extra_d[:, 0, :, :]) > 0.8, target, filler) return self._forward(pred, bd_label) - + pred, extra_p = out['pred'], out['extra_p'] score = [extra_p, pred] return sum([w * self._forward(x, target) for (w, x) in zip(BALANCE_WEIGHTS, score)]) @@ -45,7 +45,7 @@ class PIDNetBoundaryAwareCrossEntropy(PIDNetCrossEntropy): def __init__(self, ignore_index=IGNORE_INDEX_NONE_VALUE, weight=None): super().__init__(ignore_index, weight) self.boundary_aware = True - + # class OhemCrossEntropy(nn.Module): # def __init__(self, ignore_label=-1, thres=0.7, min_kept=100000, weight=None): # super(OhemCrossEntropy, self).__init__() diff --git a/src/netspresso_trainer/metrics/__init__.py b/src/netspresso_trainer/metrics/__init__.py index 330ddc644..923da55ed 100644 --- a/src/netspresso_trainer/metrics/__init__.py +++ b/src/netspresso_trainer/metrics/__init__.py @@ -1 +1 @@ -from .builder import build_metrics \ No newline at end of file +from .builder import build_metrics diff --git a/src/netspresso_trainer/metrics/detection/metric.py b/src/netspresso_trainer/metrics/detection/metric.py index 17a83845d..93e2070fd 100644 --- a/src/netspresso_trainer/metrics/detection/metric.py +++ b/src/netspresso_trainer/metrics/detection/metric.py @@ -167,7 +167,7 @@ def average_precisions_per_class( class DetectionMetric(BaseMetric): metric_names: List[str] = ['map50', 'map75', 'map50_95'] primary_metric: str = 'map50_95' - + def __init__(self, **kwargs): super().__init__() diff --git a/src/netspresso_trainer/metrics/registry.py b/src/netspresso_trainer/metrics/registry.py index 381ab64fe..73603e133 100644 --- a/src/netspresso_trainer/metrics/registry.py +++ b/src/netspresso_trainer/metrics/registry.py @@ -11,4 +11,4 @@ 'detection': DetectionMetric } -PHASE_LIST = ['train', 'valid', 'test'] \ No newline at end of file +PHASE_LIST = ['train', 'valid', 'test'] diff --git a/src/netspresso_trainer/models/op/base_metaformer.py b/src/netspresso_trainer/models/op/base_metaformer.py index 5a23d5cb6..65237e741 100644 --- a/src/netspresso_trainer/models/op/base_metaformer.py +++ b/src/netspresso_trainer/models/op/base_metaformer.py @@ -53,7 +53,7 @@ def __init__( attention_bias_resolution = 16, ) -> None: super().__init__() - + attention_hidden_size = attention_hidden_size if attention_hidden_size is not None else hidden_size value_hidden_size = value_hidden_size if value_hidden_size is not None else attention_hidden_size @@ -62,17 +62,17 @@ def __init__( f"The hidden size {attention_hidden_size,} is not a multiple of the number of attention " f"heads {num_attention_heads}." ) - + if value_hidden_size % num_attention_heads != 0: raise ValueError( f"The hidden size {value_hidden_size,} is not a multiple of the number of attention " f"heads {num_attention_heads}." ) - + self.num_attention_heads = num_attention_heads self.attention_head_size = int(attention_hidden_size / num_attention_heads) self.value_attention_head_size = int(value_hidden_size / num_attention_heads) - + self.head_size = self.num_attention_heads * self.attention_head_size self.value_head_size = self.num_attention_heads * self.value_attention_head_size self.attention_scale = attention_scale if attention_scale is not None \ @@ -82,7 +82,7 @@ def __init__( self.query = nn.Linear(hidden_size, self.head_size, bias=use_qkv_bias) # ... x C -> ... x C_qk self.key = nn.Linear(hidden_size, self.head_size, bias=use_qkv_bias) # ... x C -> ... x C_qk self.value = nn.Linear(hidden_size, self.value_head_size, bias=use_qkv_bias) # ... x C -> ... x C_v - + self.linear = nn.Linear(self.value_head_size, hidden_size) # ... x C_v -> ... x C self.dropout = nn.Dropout(attention_dropout_prob) @@ -118,14 +118,14 @@ def __init__( # torch.zeros(self.num_attention_heads, len(attention_offsets))) # self.register_buffer('attention_bias_idxs_seg', # torch.LongTensor(idxs).view(N, N)) - + self.use_cross_attention = use_cross_attention def transpose_for_scores(self, x: Tensor, attention_head_size: int) -> Tensor: new_x_shape = x.size()[:-1] + (self.num_attention_heads, attention_head_size) x = x.view(new_x_shape) return x.permute(0, 2, 1, 3) - + def sequence_reduce(self, x: Tensor, height: int, width: int) -> Tensor: """SegFormer """ @@ -167,7 +167,7 @@ def forward( """ mixed_query_layer = self.query(query_states) # B x S_s x C_qk - + if not self.use_cross_attention: # Self-attention key_value_states = query_states # B x S_t(=S_s) x C_qk if self.use_sequence_reduction: @@ -180,7 +180,7 @@ def forward( attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2)) # B x {head} x S_s x S_t attention_scores = attention_scores / self.attention_scale # B x {head} x S_s x S_t - + if self.use_attention_bias: bias = self.attention_biases[:, self.attention_bias_idxs] bias = nn.functional.interpolate(bias.unsqueeze(0), size=(attention_scores.size(-2), attention_scores.size(-1)), mode='bicubic') @@ -199,15 +199,15 @@ def forward( context_layer = context_layer.permute(0, 2, 1, 3).contiguous() # B x S_s x {head} x C_vsplit new_context_layer_shape = context_layer.size()[:-2] + (self.value_head_size,) context_layer = context_layer.view(new_context_layer_shape) # B x S_s x C_v - + context_layer = self.linear(context_layer) # B x S_s x C context_layer = self.dropout(context_layer) # B x S_s x C if self.output_with_attentions: return (context_layer, attention_probs) - + return context_layer # B x S_s x C - + class ChannelMLP(nn.Module): def __init__(self, hidden_size, intermediate_size, hidden_dropout_prob, hidden_activation_type='silu'): super().__init__() @@ -218,7 +218,7 @@ def __init__(self, hidden_size, intermediate_size, hidden_dropout_prob, hidden_a self.ffn.add_module('dense2', nn.Linear(in_features=intermediate_size, out_features=hidden_size, bias=True)) self.dropout = nn.Dropout(p=hidden_dropout_prob) - + def forward(self, x): x = self.ffn(x) x = self.dropout(x) @@ -231,20 +231,20 @@ def __init__(self, hidden_size, layer_norm_eps) -> None: self.layernorm_after = nn.LayerNorm(hidden_size) self.token_mixer = nn.Identity() # MultiHeadAttention() self.channel_mlp = nn.Identity() # ChannelMLP() - + def forward(self, x): out_token_mixer = self.layernorm_before(x) out_token_mixer = self.token_mixer(out_token_mixer) - + out_token_mixer = out_token_mixer + x - + out_final = self.layernorm_after(out_token_mixer) out_final = self.channel_mlp(out_final) - + out_final = out_final + out_token_mixer - + return out_final - + class MetaFormerEncoder(nn.Module): def __init__(self) -> None: super().__init__() @@ -252,7 +252,7 @@ def __init__(self) -> None: # self.blocks = nn.Sequential( # *[MetaFormerBlock(hidden_size, layer_norm_eps) for _ in range(num_layers)] # ) - + def forward(self, x): x = self.blocks(x) return x @@ -262,7 +262,7 @@ def __init__(self, hidden_sizes) -> None: super().__init__() self._feature_dim = hidden_sizes[-1] self._intermediate_features_dim = hidden_sizes - + self.patch_embed = nn.Identity() self.encoder = MetaFormerEncoder() self.norm = nn.Identity() @@ -270,14 +270,14 @@ def __init__(self, hidden_sizes) -> None: @property def feature_dim(self): return self._feature_dim - + @property def intermediate_features_dim(self): return self._intermediate_features_dim - + def forward(self, x: FXTensorType): x = self.patch_embed(x) x = self.encoder(x) x = self.norm(x) feat = torch.mean(x, dim=1) - return BackboneOutput(last_feature=feat) \ No newline at end of file + return BackboneOutput(last_feature=feat) diff --git a/src/netspresso_trainer/models/op/custom.py b/src/netspresso_trainer/models/op/custom.py index 787156255..7dd1752cd 100644 --- a/src/netspresso_trainer/models/op/custom.py +++ b/src/netspresso_trainer/models/op/custom.py @@ -296,9 +296,9 @@ def __init__( # project layers.append( ConvLayer( - in_channels=hidden_channels, - out_channels=out_channels, - kernel_size=1, + in_channels=hidden_channels, + out_channels=out_channels, + kernel_size=1, norm_type=norm_type, use_act=False ) @@ -365,7 +365,7 @@ def __init__( self.patch_dim = patch_dim self.register_buffer("pe", pos_encoding) - + def forward_patch_last( self, x, indices: Optional[Tensor] = None, *args, **kwargs ) -> Tensor: @@ -385,8 +385,8 @@ def forward_others( self, x, indices: Optional[Tensor] = None, *args, **kwargs ) -> Tensor: # seq_length should be the second last dim - - # @deepkyu: [fx tracing] Always `indices` is None + + # @deepkyu: [fx tracing] Always `indices` is None # if indices is None: # x = x + self.pe[..., : x.shape[-2], :] # else: @@ -396,10 +396,10 @@ def forward_others( # pe = self.pe.expand(repeat_size) # selected_pe = torch.gather(pe, index=indices, dim=-2) # x = x + selected_pe - + # x = x + self.pe[..., :seq_index, :] x = x + tensor_slice(self.pe, dim=1, index=x.shape[-2]) - + return x def forward(self, x, indices: Optional[Tensor] = None, *args, **kwargs) -> Tensor: @@ -480,7 +480,7 @@ def forward(self, x: Tensor) -> Tensor: # dims = [-3, -2, -1] # else: # raise NotImplementedError("Currently 2D and 3D global pooling supported") - + return self._global_pool(x, dims=(-2, -1)) # def profile_module(self, input: Tensor) -> Tuple[Tensor, float, float]: @@ -497,9 +497,9 @@ class Focus(nn.Module): def __init__(self, in_channels, out_channels, ksize=1, stride=1, act_type="silu"): super().__init__() self.conv = ConvLayer(in_channels=in_channels * 4, - out_channels=out_channels, - kernel_size=ksize, - stride=stride, + out_channels=out_channels, + kernel_size=ksize, + stride=stride, act_type=act_type) def forward(self, x): @@ -542,25 +542,25 @@ def __init__( # ch_in, ch_out, number, shortcut, groups, expansion super().__init__() hidden_channels = int(out_channels * expansion) # hidden channels - self.conv1 = ConvLayer(in_channels=in_channels, + self.conv1 = ConvLayer(in_channels=in_channels, out_channels=hidden_channels, - kernel_size=1, + kernel_size=1, stride=1, act_type=act_type) self.conv2 = ConvLayer(in_channels=in_channels, - out_channels=hidden_channels, - kernel_size=1, + out_channels=hidden_channels, + kernel_size=1, stride=1, act_type=act_type) - self.conv3 = ConvLayer(in_channels=2 * hidden_channels, - out_channels=out_channels, - kernel_size=1, + self.conv3 = ConvLayer(in_channels=2 * hidden_channels, + out_channels=out_channels, + kernel_size=1, stride=1, act_type=act_type) - + block = DarknetBlock module_list = [ block( - in_channels=hidden_channels, - out_channels=hidden_channels, + in_channels=hidden_channels, + out_channels=hidden_channels, shortcut=shortcut, expansion=1.0, act_type=act_type @@ -585,7 +585,7 @@ def __init__( ): super().__init__() hidden_channels = in_channels // 2 - self.conv1 = ConvLayer(in_channels=in_channels, out_channels=hidden_channels, + self.conv1 = ConvLayer(in_channels=in_channels, out_channels=hidden_channels, kernel_size=1, stride=1, act_type=act_type) self.m = nn.ModuleList( [ @@ -594,7 +594,7 @@ def __init__( ] ) conv2_channels = hidden_channels * (len(kernel_sizes) + 1) - self.conv2 = ConvLayer(in_channels=conv2_channels, out_channels=out_channels, + self.conv2 = ConvLayer(in_channels=conv2_channels, out_channels=out_channels, kernel_size=1, stride=1, act_type=act_type) def forward(self, x): @@ -618,9 +618,9 @@ def __init__( ): super().__init__() hidden_channels = int(out_channels * expansion) - self.conv1 = ConvLayer(in_channels=in_channels, out_channels=hidden_channels, + self.conv1 = ConvLayer(in_channels=in_channels, out_channels=hidden_channels, kernel_size=1, stride=1, act_type=act_type) - self.conv2 = ConvLayer(in_channels=hidden_channels, out_channels=out_channels, + self.conv2 = ConvLayer(in_channels=hidden_channels, out_channels=out_channels, kernel_size=3, stride=1, act_type=act_type) self.use_add = shortcut and in_channels == out_channels diff --git a/src/netspresso_trainer/models/op/depth.py b/src/netspresso_trainer/models/op/depth.py index 9c626bb78..276527d39 100644 --- a/src/netspresso_trainer/models/op/depth.py +++ b/src/netspresso_trainer/models/op/depth.py @@ -31,4 +31,4 @@ def __init__(self, drop_prob: float = 0., scale_by_keep: bool = True): self.scale_by_keep = scale_by_keep def forward(self, x): - return drop_path(x, self.drop_prob, self.training, self.scale_by_keep) \ No newline at end of file + return drop_path(x, self.drop_prob, self.training, self.scale_by_keep) diff --git a/src/netspresso_trainer/models/op/registry.py b/src/netspresso_trainer/models/op/registry.py index 4666e9f58..10c6f8cb8 100644 --- a/src/netspresso_trainer/models/op/registry.py +++ b/src/netspresso_trainer/models/op/registry.py @@ -16,4 +16,4 @@ 'silu': nn.SiLU, 'swish': nn.SiLU, 'hard_swish': nn.Hardswish, -} \ No newline at end of file +} diff --git a/src/netspresso_trainer/optimizers/__init__.py b/src/netspresso_trainer/optimizers/__init__.py index b1b64e76b..871ebc6e1 100644 --- a/src/netspresso_trainer/optimizers/__init__.py +++ b/src/netspresso_trainer/optimizers/__init__.py @@ -1 +1 @@ -from .builder import build_optimizer \ No newline at end of file +from .builder import build_optimizer diff --git a/src/netspresso_trainer/optimizers/builder.py b/src/netspresso_trainer/optimizers/builder.py index 3d2d2bf63..460af3fec 100644 --- a/src/netspresso_trainer/optimizers/builder.py +++ b/src/netspresso_trainer/optimizers/builder.py @@ -21,7 +21,7 @@ def build_optimizer( 'adam', 'adamw', 'adamax', 'adadelta', 'adagrad', 'rmsprop'] = opt.lower() assert opt_name in OPTIMIZER_DICT - + conf_optim = {'weight_decay': wd, 'lr': lr} if opt_name in ['sgd', 'nesterov', 'momentum', 'rmsprop']: @@ -32,7 +32,7 @@ def build_optimizer( conf_optim.update({'nesterov': True}) if opt_name in ['momentum']: conf_optim.update({'nesterov': False}) - + optimizer = OPTIMIZER_DICT[opt_name](parameters, **conf_optim) return optimizer diff --git a/src/netspresso_trainer/optimizers/registry.py b/src/netspresso_trainer/optimizers/registry.py index f4c3fc220..4f1ff7591 100644 --- a/src/netspresso_trainer/optimizers/registry.py +++ b/src/netspresso_trainer/optimizers/registry.py @@ -14,4 +14,4 @@ 'sgd': optim.SGD, 'nesterov': optim.SGD, 'momentum': optim.SGD, -} \ No newline at end of file +} diff --git a/src/netspresso_trainer/pipelines/builder.py b/src/netspresso_trainer/pipelines/builder.py index 4773261c8..18698855c 100644 --- a/src/netspresso_trainer/pipelines/builder.py +++ b/src/netspresso_trainer/pipelines/builder.py @@ -9,9 +9,9 @@ def build_pipeline(conf, task, model_name, model, devices, train_dataloader, eva task_ = "detection-two-stage" if conf.model.architecture.head.name in ["faster_rcnn"] else "detection-one-stage" task_pipeline = TASK_PIPELINE[task_] - + trainer = task_pipeline(conf, task, model_name, model, devices, train_dataloader, eval_dataloader, class_map, is_graphmodule_training=is_graphmodule_training, profile=profile) - return trainer \ No newline at end of file + return trainer diff --git a/src/netspresso_trainer/pipelines/detection.py b/src/netspresso_trainer/pipelines/detection.py index 3d7e1ab54..5cdef182c 100644 --- a/src/netspresso_trainer/pipelines/detection.py +++ b/src/netspresso_trainer/pipelines/detection.py @@ -139,7 +139,7 @@ def get_metric_with_all_outputs(self, outputs, phase: Literal['train', 'valid']) pred_on_image['post_labels'] = class_idx pred.append(pred_on_image) self.metric_factory.calc(pred, target=targets, phase=phase) - + def save_checkpoint(self, epoch: int): # Check whether the valid loss is minimum at this epoch @@ -211,9 +211,9 @@ def train_step(self, batch): images = images.to(self.devices) targets = [{"boxes": box.to(self.devices), "labels": label.to(self.devices),} for box, label in zip(bboxes, labels)] - - targets = {'gt': targets, - 'img_size': images.size(-1), + + targets = {'gt': targets, + 'img_size': images.size(-1), 'num_classes': self.num_classes,} self.optimizer.zero_grad() @@ -235,7 +235,7 @@ def train_step(self, batch): 'target': [(bbox.detach().cpu().numpy(), label.detach().cpu().numpy()) for bbox, label in zip(bboxes, labels)], 'pred': [(torch.cat([p[:, :4], p[:, 5:6]], dim=-1).detach().cpu().numpy(), - p[:, 6].to(torch.int).detach().cpu().numpy()) + p[:, 6].to(torch.int).detach().cpu().numpy()) if p is not None else (np.array([[]]), np.array([])) for p in pred] } @@ -247,9 +247,9 @@ def valid_step(self, batch): images = images.to(self.devices) targets = [{"boxes": box.to(self.devices), "labels": label.to(self.devices)} for box, label in zip(bboxes, labels)] - - targets = {'gt': targets, - 'img_size': images.size(-1), + + targets = {'gt': targets, + 'img_size': images.size(-1), 'num_classes': self.num_classes,} self.optimizer.zero_grad() @@ -269,7 +269,7 @@ def valid_step(self, batch): 'target': [(bbox.detach().cpu().numpy(), label.detach().cpu().numpy()) for bbox, label in zip(bboxes, labels)], 'pred': [(torch.cat([p[:, :4], p[:, 5:6]], dim=-1).detach().cpu().numpy(), - p[:, 6].to(torch.int).detach().cpu().numpy()) + p[:, 6].to(torch.int).detach().cpu().numpy()) if p is not None else (np.array([[]]), np.array([])) for p in pred] } diff --git a/src/netspresso_trainer/pipelines/registry.py b/src/netspresso_trainer/pipelines/registry.py index 61fb0dd2a..b0110bee7 100644 --- a/src/netspresso_trainer/pipelines/registry.py +++ b/src/netspresso_trainer/pipelines/registry.py @@ -13,4 +13,4 @@ 'segmentation': SegmentationPipeline, 'detection-two-stage': TwoStageDetectionPipeline, 'detection-one-stage': OneStageDetectionPipeline, -} \ No newline at end of file +} diff --git a/src/netspresso_trainer/schedulers/builder.py b/src/netspresso_trainer/schedulers/builder.py index ce566554b..df4b49f3a 100644 --- a/src/netspresso_trainer/schedulers/builder.py +++ b/src/netspresso_trainer/schedulers/builder.py @@ -15,8 +15,8 @@ def build_scheduler(optimizer, conf_training): 'total_iters': num_epochs, 'iters_per_phase': conf_training.iters_per_phase, # TODO: config for StepLR }) - + assert scheduler_name in SCHEDULER_DICT, f"{scheduler_name} not in scheduler dict!" lr_scheduler = SCHEDULER_DICT[scheduler_name](optimizer, **conf_sched) - + return lr_scheduler, num_epochs diff --git a/src/netspresso_trainer/schedulers/cosine_lr.py b/src/netspresso_trainer/schedulers/cosine_lr.py index aac4d3017..e24286a40 100644 --- a/src/netspresso_trainer/schedulers/cosine_lr.py +++ b/src/netspresso_trainer/schedulers/cosine_lr.py @@ -31,10 +31,10 @@ def get_lr(self): if not self._get_lr_called_within_step: warnings.warn("To get the last learning rate computed by the scheduler, " "please use `get_last_lr()`.", UserWarning, stacklevel=2) - + if self.last_epoch > self.T_max: return [group['lr'] for group in self.optimizer.param_groups] - + if self.last_epoch >= 0 and self.last_epoch < self.warmup_iters: return [self.warmup_bias_lr + (float(self.last_epoch + 1) / float(max(1, self.warmup_iters))) * (base_lr - self.warmup_bias_lr) for base_lr in self.base_lrs] @@ -63,4 +63,4 @@ def _get_closed_form_lr(self): ) ) for base_lr in self.base_lrs - ] \ No newline at end of file + ] diff --git a/src/netspresso_trainer/schedulers/cosine_warm_restart.py b/src/netspresso_trainer/schedulers/cosine_warm_restart.py index 4e114142f..ba0f501f8 100644 --- a/src/netspresso_trainer/schedulers/cosine_warm_restart.py +++ b/src/netspresso_trainer/schedulers/cosine_warm_restart.py @@ -91,7 +91,7 @@ def get_reassigned_t_i(current_t_i, next_t_i, remain_epochs): return remain_epochs, remain_epochs return current_t_i, remain_epochs - + def _step_without_given_epoch(self) -> int: if self.last_epoch < 0: epoch = 0 @@ -105,7 +105,7 @@ def _step_without_given_epoch(self) -> int: self.T_i = self.T_i * self.T_mult self.T_i, self.remain_iters = self.get_reassigned_t_i(self.T_i, self.T_i * self.T_mult, self.remain_iters) return epoch - + def step(self, epoch=None): """Step could be called after every batch update @@ -139,7 +139,7 @@ def step(self, epoch=None): else: if epoch < 0: raise ValueError("Expected non-negative epoch, but got {}".format(epoch)) - + if epoch >= self.T_0: if self.T_mult == 1: self.T_cur = epoch % self.T_0 diff --git a/src/netspresso_trainer/schedulers/poly_lr.py b/src/netspresso_trainer/schedulers/poly_lr.py index 9c9b21c20..d7f624610 100644 --- a/src/netspresso_trainer/schedulers/poly_lr.py +++ b/src/netspresso_trainer/schedulers/poly_lr.py @@ -34,7 +34,7 @@ def get_lr(self): if self.last_epoch > self.total_iters: return [group["lr"] for group in self.optimizer.param_groups] - + if self.last_epoch >= 0 and self.last_epoch < self.warmup_iters: return [self.warmup_bias_lr + (float(self.last_epoch + 1) / float(max(1, self.warmup_iters))) * (base_lr - self.warmup_bias_lr) for base_lr in self.base_lrs] @@ -45,7 +45,7 @@ def get_lr(self): return [self.min_lr + (group["lr"] - self.min_lr) * decay_factor for group in self.optimizer.param_groups] def _get_closed_form_lr(self): - decay_steps = self.total_iters - self.warmup_iters + decay_steps = self.total_iters - self.warmup_iters return [ ( min( @@ -54,4 +54,4 @@ def _get_closed_form_lr(self): ) ) for base_lr in self.base_lrs - ] \ No newline at end of file + ] diff --git a/src/netspresso_trainer/schedulers/registry.py b/src/netspresso_trainer/schedulers/registry.py index d389ae0d8..992a9d3cc 100644 --- a/src/netspresso_trainer/schedulers/registry.py +++ b/src/netspresso_trainer/schedulers/registry.py @@ -13,4 +13,4 @@ 'cosine_no_sgdr': CosineAnnealingLRWithCustomWarmUp, 'poly': PolynomialLRWithWarmUp, 'step': StepLR -} \ No newline at end of file +} diff --git a/src/netspresso_trainer/schedulers/step_lr.py b/src/netspresso_trainer/schedulers/step_lr.py index 26776e7c7..ab97636fd 100644 --- a/src/netspresso_trainer/schedulers/step_lr.py +++ b/src/netspresso_trainer/schedulers/step_lr.py @@ -52,4 +52,4 @@ def get_lr(self): def _get_closed_form_lr(self): return [base_lr * self.gamma ** (self.last_epoch // self.step_size) - for base_lr in self.base_lrs] \ No newline at end of file + for base_lr in self.base_lrs] diff --git a/src/netspresso_trainer/trainer_cli.py b/src/netspresso_trainer/trainer_cli.py index 30f7fda32..e0fa0c640 100644 --- a/src/netspresso_trainer/trainer_cli.py +++ b/src/netspresso_trainer/trainer_cli.py @@ -13,17 +13,17 @@ def run_distributed_training_script(gpu_ids, data, augmentation, model, training, logging, environment, log_level): - + command = [ "--data", data, - "--augmentation", augmentation, + "--augmentation", augmentation, "--model", model, "--training", training, "--logging", logging, "--environment", environment, "--log_level", log_level, ] - + # Distributed training script command = [ 'python', '-m', 'torch.distributed.launch', @@ -46,10 +46,10 @@ def parse_gpu_ids(gpu_arg: str): """Parse comma-separated GPU IDs and return as a list of integers.""" try: gpu_ids = [int(id) for id in gpu_arg.split(',')] - + if len(gpu_ids) == 1: # Single GPU return gpu_ids[0] - + gpu_ids = sorted(gpu_ids) return gpu_ids except ValueError as e: @@ -61,7 +61,7 @@ def parse_args_netspresso(with_gpus=False): parser = argparse.ArgumentParser(description="Parser for NetsPresso configuration") # -------- User arguments ---------------------------------------- - + if with_gpus: parser.add_argument( '--gpus', type=parse_gpu_ids, default=0, @@ -111,7 +111,7 @@ def parse_args_netspresso(with_gpus=False): def set_arguments(data: Union[Path, str], augmentation: Union[Path, str], model: Union[Path, str], training: Union[Path, str], logging: Union[Path, str], environment: Union[Path, str]) -> DictConfig: - + conf_data = OmegaConf.load(data) conf_augmentation = OmegaConf.load(augmentation) conf_model = OmegaConf.load(model) @@ -126,19 +126,19 @@ def set_arguments(data: Union[Path, str], augmentation: Union[Path, str], conf.merge_with(conf_training) conf.merge_with(conf_logging) conf.merge_with(conf_environment) - + return conf def train_with_yaml_impl(gpus: Union[list, int], data: Union[Path, str], augmentation: Union[Path, str], model: Union[Path, str], training: Union[Path, str], logging: Union[Path, str], environment: Union[Path, str], log_level: str = LOG_LEVEL): - + assert isinstance(gpus, (list, int)) gpu_ids_str = ','.join(map(str, gpus)) if isinstance(gpus, list) else str(gpus) os.environ['CUDA_VISIBLE_DEVICES'] = gpu_ids_str torch.cuda.empty_cache() # Reinitialize CUDA to apply the change - + if isinstance(gpus, int): conf = set_arguments(data, augmentation, model, training, logging, environment) train_common(conf, log_level=log_level) @@ -148,7 +148,7 @@ def train_with_yaml_impl(gpus: Union[list, int], data: Union[Path, str], augment def train_cli() -> None: args_parsed = parse_args_netspresso(with_gpus=True) - + train_with_yaml_impl( gpus=args_parsed.gpus, data=args_parsed.data, @@ -163,7 +163,7 @@ def train_cli() -> None: def train_cli_without_additional_gpu_check() -> None: args_parsed = parse_args_netspresso(with_gpus=False) - + conf = set_arguments( data=args_parsed.data, augmentation=args_parsed.augmentation, @@ -177,6 +177,6 @@ def train_cli_without_additional_gpu_check() -> None: if __name__ == "__main__": - + # Execute by `run_distributed_training_script` - train_cli_without_additional_gpu_check() \ No newline at end of file + train_cli_without_additional_gpu_check() diff --git a/src/netspresso_trainer/trainer_inline.py b/src/netspresso_trainer/trainer_inline.py index 556194df8..65d2b7893 100644 --- a/src/netspresso_trainer/trainer_inline.py +++ b/src/netspresso_trainer/trainer_inline.py @@ -12,7 +12,7 @@ def set_struct_recursive(conf: DictConfig, value: bool) -> None: OmegaConf.set_struct(conf, value) - + for _, conf_value in conf.items(): if isinstance(conf_value, DictConfig): set_struct_recursive(conf_value, value) @@ -32,9 +32,9 @@ def train_with_config(config: TrainerConfig, log_level: Literal['DEBUG', 'INFO', def train_with_yaml(gpus: str, data: Union[Path, str], augmentation: Union[Path, str], model: Union[Path, str], training: Union[Path, str], logging: Union[Path, str], environment: Union[Path, str], log_level: str = LOG_LEVEL): - + gpus: Union[List, int] = parse_gpu_ids(gpus) - + train_with_yaml_impl( gpus=gpus, data=data, @@ -44,4 +44,4 @@ def train_with_yaml(gpus: str, data: Union[Path, str], augmentation: Union[Path, logging=logging, environment=environment, log_level=log_level - ) \ No newline at end of file + ) diff --git a/src/netspresso_trainer/utils/logger.py b/src/netspresso_trainer/utils/logger.py index bf296de13..6b320a09c 100644 --- a/src/netspresso_trainer/utils/logger.py +++ b/src/netspresso_trainer/utils/logger.py @@ -24,7 +24,7 @@ def _custom_logger(name: str, level: str, distributed: bool): else: fmt = '%(asctime)s | %(levelname)s\t\t| %(funcName)s:<%(filename)s>:%(lineno)s >>> %(message)s' logger = logging.getLogger(name) - + if not logger.hasHandlers(): handler = logging.StreamHandler() @@ -46,7 +46,7 @@ def set_logger(logger_name="netspresso_trainer", level: str = 'INFO', distribute print("Skipping timezone setting.") _level: Literal['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] = level.upper() _custom_logger(logger_name, _level, distributed) - + logger = logging.getLogger(logger_name) if _level == 'DEBUG': logger.setLevel(logging.DEBUG) diff --git a/src/netspresso_trainer/utils/stats.py b/src/netspresso_trainer/utils/stats.py index aacc385dc..326721d24 100644 --- a/src/netspresso_trainer/utils/stats.py +++ b/src/netspresso_trainer/utils/stats.py @@ -12,7 +12,7 @@ def get_params_and_macs(model: nn.Module, sample_input: torch.Tensor): sample_input = sample_input.to(get_device(model)) # From v0.0.9 macs, params = _params_and_macs_fvcore(model, sample_input) - + # # Before v0.0.9 # macs, params = _params_and_macs_thop(model, sample_input) @@ -25,4 +25,4 @@ def _params_and_macs_fvcore(model: nn.Module, sample_input: torch.Tensor): def _params_and_macs_thop(model: nn.Module, sample_input: torch.Tensor): macs, params = thop.profile(model, inputs=(sample_input,), verbose=False) - return macs, params \ No newline at end of file + return macs, params From 43f7ab9b6300aa0fec16dde59c349c083551129f Mon Sep 17 00:00:00 2001 From: deepkyu Date: Fri, 3 Nov 2023 12:01:30 +0900 Subject: [PATCH 034/167] update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ae55350ca..0f11bee62 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ No changes to highlight. ## Other Changes: -No changes to highlight. +- Update ruff rule (`W`) by `@deepkyu` in [PR 218](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/218) # v0.0.9 From 1a161fe3d5a94e6a61cc688bf3928ec25ec1af77 Mon Sep 17 00:00:00 2001 From: deepkyu Date: Fri, 3 Nov 2023 12:05:44 +0900 Subject: [PATCH 035/167] Merge branch '216-add-ruff-rule-wwarning' --- CHANGELOG.md | 2 +- pyproject.toml | 1 + src/netspresso_trainer/__init__.py | 2 +- src/netspresso_trainer/cfg/__init__.py | 16 +++--- src/netspresso_trainer/cfg/data.py | 2 +- src/netspresso_trainer/cfg/model.py | 6 +-- src/netspresso_trainer/cfg/training.py | 2 +- .../dataloaders/augmentation/__init__.py | 2 +- .../dataloaders/augmentation/custom.py | 10 ++-- src/netspresso_trainer/dataloaders/base.py | 22 ++++---- .../dataloaders/classification/dataset.py | 54 +++++++++---------- .../dataloaders/detection/dataset.py | 22 ++++---- .../dataloaders/detection/local.py | 14 ++--- .../dataloaders/registry.py | 2 +- .../dataloaders/utils/constants.py | 2 +- .../dataloaders/utils/misc.py | 2 +- src/netspresso_trainer/loggers/base.py | 40 +++++++------- src/netspresso_trainer/loggers/builder.py | 8 +-- src/netspresso_trainer/loggers/csv.py | 8 +-- src/netspresso_trainer/loggers/image.py | 16 +++--- src/netspresso_trainer/loggers/netspresso.py | 36 ++++++------- src/netspresso_trainer/loggers/registry.py | 2 +- src/netspresso_trainer/loggers/stdout.py | 10 ++-- src/netspresso_trainer/loggers/tensorboard.py | 12 ++--- src/netspresso_trainer/loggers/visualizer.py | 8 +-- .../losses/classification/__init__.py | 2 +- .../losses/classification/label_smooth.py | 2 +- .../losses/classification/soft_target.py | 2 +- src/netspresso_trainer/losses/common.py | 2 +- .../losses/detection/__init__.py | 2 +- .../losses/detection/fastrcnn.py | 16 +++--- .../losses/detection/yolox.py | 14 ++--- src/netspresso_trainer/losses/registry.py | 2 +- .../losses/segmentation/pidnet.py | 6 +-- src/netspresso_trainer/metrics/__init__.py | 2 +- .../metrics/detection/metric.py | 2 +- src/netspresso_trainer/metrics/registry.py | 2 +- .../models/op/base_metaformer.py | 48 ++++++++--------- src/netspresso_trainer/models/op/custom.py | 52 +++++++++--------- src/netspresso_trainer/models/op/depth.py | 2 +- src/netspresso_trainer/models/op/registry.py | 2 +- src/netspresso_trainer/optimizers/__init__.py | 2 +- src/netspresso_trainer/optimizers/builder.py | 4 +- src/netspresso_trainer/optimizers/registry.py | 2 +- src/netspresso_trainer/pipelines/builder.py | 4 +- src/netspresso_trainer/pipelines/detection.py | 18 +++---- src/netspresso_trainer/pipelines/registry.py | 2 +- src/netspresso_trainer/schedulers/builder.py | 4 +- .../schedulers/cosine_lr.py | 6 +-- .../schedulers/cosine_warm_restart.py | 6 +-- src/netspresso_trainer/schedulers/poly_lr.py | 6 +-- src/netspresso_trainer/schedulers/registry.py | 2 +- src/netspresso_trainer/schedulers/step_lr.py | 2 +- src/netspresso_trainer/trainer_cli.py | 28 +++++----- src/netspresso_trainer/trainer_inline.py | 8 +-- src/netspresso_trainer/utils/logger.py | 4 +- src/netspresso_trainer/utils/stats.py | 4 +- 57 files changed, 281 insertions(+), 280 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ae55350ca..0f11bee62 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ No changes to highlight. ## Other Changes: -No changes to highlight. +- Update ruff rule (`W`) by `@deepkyu` in [PR 218](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/218) # v0.0.9 diff --git a/pyproject.toml b/pyproject.toml index 303021ee6..ab66ec722 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,6 +8,7 @@ extend-select = [ "I", "SIM", "INP001", + "W" ] ignore = [ diff --git a/src/netspresso_trainer/__init__.py b/src/netspresso_trainer/__init__.py index 24b54002f..0c4517b21 100644 --- a/src/netspresso_trainer/__init__.py +++ b/src/netspresso_trainer/__init__.py @@ -10,4 +10,4 @@ version = (Path(__file__).parent / "VERSION").read_text().strip() -__version__ = version \ No newline at end of file +__version__ = version diff --git a/src/netspresso_trainer/cfg/__init__.py b/src/netspresso_trainer/cfg/__init__.py index 96e120e22..dd176a035 100644 --- a/src/netspresso_trainer/cfg/__init__.py +++ b/src/netspresso_trainer/cfg/__init__.py @@ -69,19 +69,19 @@ class TrainerConfig: training: Optional[ScheduleConfig] = None logging: LoggingConfig = field(default_factory=lambda: LoggingConfig()) environment: EnvironmentConfig = field(default_factory=lambda: EnvironmentConfig()) - + @property def epochs(self) -> int: return self.training.epochs - + @property def batch_size(self) -> int: return self.training.batch_size - + @property def num_workers(self) -> int: return self.environment.num_workers - + @epochs.setter def epochs(self, v: int) -> None: self.training.epochs = v @@ -89,18 +89,18 @@ def epochs(self, v: int) -> None: @batch_size.setter def batch_size(self, v: int) -> None: self.training.batch_size = v - + @num_workers.setter def num_workers(self, v: int) -> None: self.environment.num_workers = v - + def __post_init__(self): assert self.task in ['classification', 'segmentation', 'detection'] self.data.task = self.task self.model.task = self.task - + if self.auto: if self.augmentation is None: self.augmentation = AugmentationConfig() if self.training is None: - self.training = _TRAINING_CONFIG_TYPE_DICT[self.task]() \ No newline at end of file + self.training = _TRAINING_CONFIG_TYPE_DICT[self.task]() diff --git a/src/netspresso_trainer/cfg/data.py b/src/netspresso_trainer/cfg/data.py index 000624b1c..21e2abc54 100644 --- a/src/netspresso_trainer/cfg/data.py +++ b/src/netspresso_trainer/cfg/data.py @@ -262,4 +262,4 @@ class HuggingFaceSegmentationDatasetConfig(DatasetConfig): subset="full", features={"image": "image", "label": "artist"} ) -) \ No newline at end of file +) diff --git a/src/netspresso_trainer/cfg/model.py b/src/netspresso_trainer/cfg/model.py index c9d476043..4203c47b8 100644 --- a/src/netspresso_trainer/cfg/model.py +++ b/src/netspresso_trainer/cfg/model.py @@ -25,10 +25,10 @@ class ArchitectureConfig: full: Optional[Dict[str, Any]] = None backbone: Optional[Dict[str, Any]] = None head: Optional[Dict[str, Any]] = None - + def __post_init__(self): assert bool(self.full) != bool(self.backbone), "Only one of full or backbone should be given." - + @dataclass class ModelConfig: task: str = MISSING @@ -71,7 +71,7 @@ class EfficientFormerArchitectureConfig(ArchitectureConfig): class MobileNetV3ArchitectureConfig(ArchitectureConfig): backbone: Dict[str, Any] = field(default_factory=lambda: { "name": "mobilenetv3_small", - + # [in_channels, kernel, expended_channels, out_channels, use_se, activation, stride, dilation] "block_info": [ [ diff --git a/src/netspresso_trainer/cfg/training.py b/src/netspresso_trainer/cfg/training.py index a13be88fb..3c0c32ff6 100644 --- a/src/netspresso_trainer/cfg/training.py +++ b/src/netspresso_trainer/cfg/training.py @@ -32,4 +32,4 @@ class SegmentationScheduleConfig(ScheduleConfig): @dataclass class DetectionScheduleConfig(ScheduleConfig): - pass \ No newline at end of file + pass diff --git a/src/netspresso_trainer/dataloaders/augmentation/__init__.py b/src/netspresso_trainer/dataloaders/augmentation/__init__.py index 34ae87e36..624fcd12f 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/__init__.py +++ b/src/netspresso_trainer/dataloaders/augmentation/__init__.py @@ -12,4 +12,4 @@ Resize, ToTensor, ) -from .registry import TRANSFORM_DICT \ No newline at end of file +from .registry import TRANSFORM_DICT diff --git a/src/netspresso_trainer/dataloaders/augmentation/custom.py b/src/netspresso_trainer/dataloaders/augmentation/custom.py index a40b4b495..2e0673727 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/custom.py +++ b/src/netspresso_trainer/dataloaders/augmentation/custom.py @@ -29,7 +29,7 @@ def _get_transformed(self, image, mask, bbox, visualize_for_debug): for t in self.transforms: if visualize_for_debug and not t.visualize: continue - image, mask, bbox = t(image=image, mask=mask, bbox=bbox) + image, mask, bbox = t(image=image, mask=mask, bbox=bbox) return image, mask, bbox def __call__(self, image, mask=None, bbox=None, visualize_for_debug=False, **kwargs): @@ -103,7 +103,7 @@ def __init__(self, size, interpolation='bilinear', max_size=None, antialias=None # TODO: There is logic error in forward. If `size` is int, this specify edge for shorter one. # And, this is not match with bbox computing logic. - # Thus, automatically transform to sequence format for now, + # Thus, automatically transform to sequence format for now, # but this should be specified whether Resize receives sequence or int. if isinstance(size, int): size = [size, size] @@ -294,11 +294,11 @@ def __repr__(self): class RandomResizedCrop(T.RandomResizedCrop): visualize = True - def __init__(self, + def __init__(self, size, scale=(0.08, 1.0), ratio=(3.0 / 4.0, 4.0 / 3.0), - interpolation='bilinear', + interpolation='bilinear', antialias: Optional[bool]=None): interpolation = INVERSE_MODES_MAPPING[interpolation] super().__init__(size, scale, ratio, interpolation, antialias) @@ -379,4 +379,4 @@ def __call__(self, image, mask=None, bbox=None): return image, mask, bbox def __repr__(self): - return self.__class__.__name__ + "()" \ No newline at end of file + return self.__class__.__name__ + "()" diff --git a/src/netspresso_trainer/dataloaders/base.py b/src/netspresso_trainer/dataloaders/base.py index 5e1e8b116..e00bb5a1e 100644 --- a/src/netspresso_trainer/dataloaders/base.py +++ b/src/netspresso_trainer/dataloaders/base.py @@ -15,10 +15,10 @@ def __init__(self, conf_data, conf_augmentation, model_name, idx_to_class, split self.conf_data = conf_data self.conf_augmentation = conf_augmentation self.model_name = model_name - + self.transform = transform self.samples = samples - + self._root = conf_data.path.root self._idx_to_class = idx_to_class self._num_classes = len(self._idx_to_class) @@ -47,12 +47,12 @@ def root(self): @property def mode(self): return self._split - + @property def with_label(self): return self._with_label - - + + class BaseHFDataset(data.Dataset): def __init__(self, conf_data, conf_augmentation, model_name, root, split, with_label): @@ -64,7 +64,7 @@ def __init__(self, conf_data, conf_augmentation, model_name, root, split, with_l self._split = split self._with_label = with_label - def _load_dataset(self, root, subset_name=None, cache_dir=None): + def _load_dataset(self, root, subset_name=None, cache_dir=None): from datasets import load_dataset if cache_dir is not None: Path(cache_dir).mkdir(exist_ok=True, parents=True) @@ -94,7 +94,7 @@ def root(self): @property def mode(self): return self._split - + @property def with_label(self): return self._with_label @@ -104,15 +104,15 @@ class BaseDataSampler(ABC): def __init__(self, conf_data, train_valid_split_ratio): self.conf_data = conf_data self.train_valid_split_ratio = train_valid_split_ratio - + @abstractmethod def load_data(self): raise NotImplementedError - + @abstractmethod def load_samples(self): raise NotImplementedError - + @abstractmethod def load_huggingface_samples(self): - raise NotImplementedError \ No newline at end of file + raise NotImplementedError diff --git a/src/netspresso_trainer/dataloaders/classification/dataset.py b/src/netspresso_trainer/dataloaders/classification/dataset.py index cd42daa2a..bed9088c5 100644 --- a/src/netspresso_trainer/dataloaders/classification/dataset.py +++ b/src/netspresso_trainer/dataloaders/classification/dataset.py @@ -16,21 +16,21 @@ logger = logging.getLogger("netspresso_trainer") VALID_IMG_EXTENSIONS = IMG_EXTENSIONS + tuple((x.upper() for x in IMG_EXTENSIONS)) - + def load_class_map_with_id_mapping(root_dir, train_dir, map_or_filename: Optional[Union[str, Path]]=None, id_mapping: Optional[Dict[str, str]]=None): if map_or_filename is None: # may be labeled with directory - # dir -> + # dir -> dir_list = [x.name for x in Path(train_dir).iterdir() if x.is_dir()] dir_to_class = id_mapping if id_mapping is not None else {k: k for k in dir_list} # id_mapping or identity - + class_list = [dir_to_class[dir] for dir in dir_list] class_list = sorted(class_list, key=lambda k: natural_key(k)) _class_to_idx = {class_name: class_idx for class_idx, class_name in enumerate(class_list)} idx_to_class = {v: k for k, v in _class_to_idx.items()} - + file_or_dir_to_idx = {dir: _class_to_idx[dir_to_class[dir]] for dir in dir_list} # dir -> idx return file_or_dir_to_idx, idx_to_class @@ -45,9 +45,9 @@ def load_class_map_with_id_mapping(root_dir, train_dir, reader = csv.DictReader(csvfile) file_class_list = [{column: str(row[column]).strip() for column in ['image_id', 'class']} for row in reader] - + class_stats = Counter([x['class'] for x in file_class_list]) - + _class_to_idx = {class_name: class_idx for class_idx, class_name in enumerate(sorted(class_stats, key=lambda k: natural_key(k)))} idx_to_class = {v: k for k, v in _class_to_idx.items()} @@ -62,26 +62,26 @@ def is_file_dict(image_dir: Union[Path, str], file_or_dir_to_idx): file_or_dir: Path = image_dir / candidate_name if file_or_dir.exists(): return file_or_dir.is_file() - + file_candidates = list(image_dir.glob(f"{candidate_name}.*")) assert len(file_candidates) != 0, f"Unknown label format! Is there any something file like {file_or_dir} ?" - + return True class ClassficationDataSampler(BaseDataSampler): def __init__(self, conf_data, train_valid_split_ratio): super(ClassficationDataSampler, self).__init__(conf_data, train_valid_split_ratio) - + def load_data(self, file_or_dir_to_idx, split='train'): data_root = Path(self.conf_data.path.root) split_dir = self.conf_data.path[split] image_dir: Path = data_root / split_dir.image - + images_and_targets: List[Dict[str, Optional[Union[str, int]]]] = [] - + assert split in ['train', 'valid', 'test'], f"split should be either {['train', 'valid', 'test']}" if split in ['train', 'valid']: - + if is_file_dict(image_dir, file_or_dir_to_idx): file_to_idx = file_or_dir_to_idx for file in chain(image_dir.glob(f'*{ext}') for ext in VALID_IMG_EXTENSIONS): @@ -92,7 +92,7 @@ def load_data(self, file_or_dir_to_idx, split='train'): images_and_targets.append({'image': str(file), 'label': file_to_idx[file.stem]}) continue logger.debug(f"Found file wihtout label: {file}") - + else: dir_to_idx = file_or_dir_to_idx for dir_name, dir_idx in dir_to_idx.items(): @@ -103,24 +103,24 @@ def load_data(self, file_or_dir_to_idx, split='train'): else: # split == test for ext in VALID_IMG_EXTENSIONS: images_and_targets.extend([{'image': str(file), 'label': None} for file in image_dir.glob(f'*{ext}')]) - + images_and_targets = sorted(images_and_targets, key=lambda k: natural_key(k['image'])) return images_and_targets - + def load_samples(self): assert self.conf_data.path.train.image is not None root_dir = Path(self.conf_data.path.root) train_dir = root_dir / self.conf_data.path.train.image id_mapping: Optional[dict] = dict(self.conf_data.id_mapping) if self.conf_data.id_mapping is not None else None file_or_dir_to_idx, idx_to_class = load_class_map_with_id_mapping(root_dir, train_dir, map_or_filename=self.conf_data.path.train.label, id_mapping=id_mapping) - + exists_valid = self.conf_data.path.valid.image is not None exists_test = self.conf_data.path.test.image is not None - + valid_samples = None test_samples = None - + train_samples = self.load_data(file_or_dir_to_idx, split='train') if exists_valid: valid_samples = self.load_data(file_or_dir_to_idx, split='valid') @@ -128,16 +128,16 @@ def load_samples(self): test_samples = self.load_data(file_or_dir_to_idx, split='test') if not exists_valid: - num_train_splitted = int(len(train_samples) * self.train_valid_split_ratio) + num_train_splitted = int(len(train_samples) * self.train_valid_split_ratio) train_samples, valid_samples = \ random_split(train_samples, [num_train_splitted, len(train_samples) - num_train_splitted], generator=torch.Generator().manual_seed(42)) - + return train_samples, valid_samples, test_samples, {'idx_to_class': idx_to_class} - + def load_huggingface_samples(self): from datasets import ClassLabel, load_dataset - + cache_dir = self.conf_data.metadata.custom_cache_dir root = self.conf_data.metadata.repo subset_name = self.conf_data.metadata.subset @@ -145,23 +145,23 @@ def load_huggingface_samples(self): cache_dir = Path(cache_dir) Path(cache_dir).mkdir(exist_ok=True, parents=True) total_dataset = load_dataset(root, name=subset_name, cache_dir=cache_dir) - + label_feature_name = self.conf_data.metadata.features.label label_feature = total_dataset['train'].features[label_feature_name] if isinstance(label_feature, ClassLabel): labels: List[str] = label_feature.names else: labels = list({sample[label_feature_name] for sample in total_dataset['train']}) - + if isinstance(labels[0], int): # TODO: find class_map <-> idx and apply it (ex. using id_mapping) idx_to_class: Dict[int, int] = {k: k for k in labels} elif isinstance(labels[0], str): idx_to_class: Dict[int, str] = dict(enumerate(labels)) - + exists_valid = 'validation' in total_dataset exists_test = 'test' in total_dataset - + train_samples = total_dataset['train'] valid_samples = None if exists_valid: @@ -174,4 +174,4 @@ def load_huggingface_samples(self): splitted_datasets = train_samples.train_test_split(test_size=(1 - self.train_valid_split_ratio)) train_samples = splitted_datasets['train'] valid_samples = splitted_datasets['test'] - return train_samples, valid_samples, test_samples, {'idx_to_class': idx_to_class} \ No newline at end of file + return train_samples, valid_samples, test_samples, {'idx_to_class': idx_to_class} diff --git a/src/netspresso_trainer/dataloaders/detection/dataset.py b/src/netspresso_trainer/dataloaders/detection/dataset.py index fd6dbcf2c..769843754 100644 --- a/src/netspresso_trainer/dataloaders/detection/dataset.py +++ b/src/netspresso_trainer/dataloaders/detection/dataset.py @@ -49,7 +49,7 @@ def detection_collate_fn(original_batch): class DetectionDataSampler(BaseDataSampler): def __init__(self, conf_data, train_valid_split_ratio): super(DetectionDataSampler, self).__init__(conf_data, train_valid_split_ratio) - + def load_data(self, split='train'): data_root = Path(self.conf_data.path.root) split_dir = self.conf_data.path[split] @@ -71,7 +71,7 @@ def load_data(self, split='train'): images = sorted(images, key=lambda k: natural_key(k)) labels = sorted(labels, key=lambda k: natural_key(k)) images_and_targets.extend([{'image': str(image), 'label': str(label)} for image, label in zip(images, labels)]) - + elif split == 'test': for ext in IMG_EXTENSIONS: images_and_targets.extend([{'image': str(file), 'label': None} @@ -79,21 +79,21 @@ def load_data(self, split='train'): images_and_targets = sorted(images_and_targets, key=lambda k: natural_key(k['image'])) else: raise AssertionError(f"split should be either {['train', 'valid', 'test']}") - + return images_and_targets - + def load_samples(self): assert self.conf_data.path.train.image is not None assert self.conf_data.id_mapping is not None id_mapping: Optional[list] = list(self.conf_data.id_mapping) idx_to_class = load_custom_class_map(id_mapping=id_mapping) - + exists_valid = self.conf_data.path.valid.image is not None exists_test = self.conf_data.path.test.image is not None - + valid_samples = None test_samples = None - + train_samples = self.load_data(split='train') if exists_valid: valid_samples = self.load_data(split='valid') @@ -101,12 +101,12 @@ def load_samples(self): test_samples = self.load_data(split='test') if not exists_valid: - num_train_splitted = int(len(train_samples) * self.train_valid_split_ratio) + num_train_splitted = int(len(train_samples) * self.train_valid_split_ratio) train_samples, valid_samples = \ random_split(train_samples, [num_train_splitted, len(train_samples) - num_train_splitted], generator=torch.Generator().manual_seed(42)) - + return train_samples, valid_samples, test_samples, {'idx_to_class': idx_to_class} - + def load_huggingface_samples(self): - raise NotImplementedError \ No newline at end of file + raise NotImplementedError diff --git a/src/netspresso_trainer/dataloaders/detection/local.py b/src/netspresso_trainer/dataloaders/detection/local.py index 2ba297627..3dbbe8427 100644 --- a/src/netspresso_trainer/dataloaders/detection/local.py +++ b/src/netspresso_trainer/dataloaders/detection/local.py @@ -23,13 +23,13 @@ def exist_name(candidate, folder_iterable): def get_label(label_file: Path): target = Path(label_file).read_text() - + try: target_array = np.array([list(map(float, box.split(' '))) for box in target.split('\n') if box.strip()]) except ValueError as e: print(target) raise e - + label, boxes = target_array[:, 0], target_array[:, 1:] label = label[..., np.newaxis] return label, boxes @@ -43,7 +43,7 @@ def __init__(self, conf_data, conf_augmentation, model_name, idx_to_class, conf_data, conf_augmentation, model_name, idx_to_class, split, samples, transform, with_label, **kwargs ) - + @staticmethod def xywhn2xyxy(original: np.ndarray, w: int, h: int, padw=0, padh=0): converted = original.copy() @@ -67,12 +67,12 @@ def __getitem__(self, index): if ann_path is None: out = self.transform(self.conf_augmentation)(image=img) return {'pixel_values': out['image'], 'name': img_path.name, 'org_img': org_img, 'org_shape': (h, w)} - + outputs = {} label, boxes_yolo = get_label(Path(ann_path)) boxes = self.xywhn2xyxy(boxes_yolo, w, h) - + out = self.transform(self.conf_augmentation)(image=img, bbox=np.concatenate((boxes, label), axis=-1)) assert out['bbox'].shape[-1] == 5 # ltrb + class_label outputs.update({'pixel_values': out['image'], 'bbox': out['bbox'][..., :4], @@ -83,6 +83,6 @@ def __getitem__(self, index): return outputs assert self._split in ['val', 'valid', 'test'] - # outputs.update({'org_img': org_img, 'org_shape': (h, w)}) # TODO: return org_img with batch_size > 1 + # outputs.update({'org_img': org_img, 'org_shape': (h, w)}) # TODO: return org_img with batch_size > 1 outputs.update({'org_shape': (h, w)}) - return outputs \ No newline at end of file + return outputs diff --git a/src/netspresso_trainer/dataloaders/registry.py b/src/netspresso_trainer/dataloaders/registry.py index 1ab25afea..7ff3cc424 100644 --- a/src/netspresso_trainer/dataloaders/registry.py +++ b/src/netspresso_trainer/dataloaders/registry.py @@ -31,4 +31,4 @@ 'classification': ClassficationDataSampler, 'segmentation': SegmentationDataSampler, 'detection': DetectionDataSampler -} \ No newline at end of file +} diff --git a/src/netspresso_trainer/dataloaders/utils/constants.py b/src/netspresso_trainer/dataloaders/utils/constants.py index b017eb440..91d73f299 100644 --- a/src/netspresso_trainer/dataloaders/utils/constants.py +++ b/src/netspresso_trainer/dataloaders/utils/constants.py @@ -1,4 +1,4 @@ -DEFAULT_CROP_PCT = 0.95 #0.875 +DEFAULT_CROP_PCT = 0.95 #0.875 IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406) IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225) IMAGENET_INCEPTION_MEAN = (0.5, 0.5, 0.5) diff --git a/src/netspresso_trainer/dataloaders/utils/misc.py b/src/netspresso_trainer/dataloaders/utils/misc.py index 0fab867f2..3fb736217 100644 --- a/src/netspresso_trainer/dataloaders/utils/misc.py +++ b/src/netspresso_trainer/dataloaders/utils/misc.py @@ -19,4 +19,4 @@ def expand_to_chs(x, n): def natural_key(string_): """See http://www.codinghorror.com/blog/archives/001018.html""" - return [int(s) if s.isdigit() else s for s in re.split(r'(\d+)', string_.lower())] \ No newline at end of file + return [int(s) if s.isdigit() else s for s in re.split(r'(\d+)', string_.lower())] diff --git a/src/netspresso_trainer/loggers/base.py b/src/netspresso_trainer/loggers/base.py index dffa31d6c..6dd321da3 100644 --- a/src/netspresso_trainer/loggers/base.py +++ b/src/netspresso_trainer/loggers/base.py @@ -13,41 +13,41 @@ def __init__(self, model, result_dir): self.model = model self.csv_path = Path(result_dir) / CSV_FILENAME self.header: List = [] - + self._temp_row_dict = {} - + if self.csv_path.exists(): self.csv_path.unlink() - + self._epoch = None - + @property @abstractmethod def key_map(self) -> Dict[str, str]: raise NotImplementedError - + def init_epoch(self): self._epoch = 0 - + @property def epoch(self): return self._epoch - + @epoch.setter def epoch(self, value: int) -> None: self._epoch = int(value) - + def update_header(self, header: List): assert len(header) != 0 self.header = header - + with open(self.csv_path, 'a') as f: f.write(",".join(self.header)) f.write("\n") def _clear_temp(self): self._temp_row_dict = {} - + def _update_with_list(self, data: List): if data is not None and len(data) != 0: with open(self.csv_path, 'a') as f: @@ -55,18 +55,18 @@ def _update_with_list(self, data: List): f.write("\n") self._clear_temp() return - + def _update_specific(self, data: Dict): for _key, _value in data.items(): if _key not in self.header: raise AssertionError(f"The given key ({_key}) is not in {self.header}!") if _key not in self._temp_row_dict: self._temp_row_dict[_key] = _value - + if set(self.header) == set(self._temp_row_dict.keys()): self._update_with_list([self._temp_row_dict[_col] for _col in self.header]) return - + def update(self, data=None, **kwargs): if isinstance(data, List): return self._update_with_list(data) @@ -74,9 +74,9 @@ def update(self, data=None, **kwargs): return self._update_specific(data) # if isinstance(data, type(None)): # return self._update_specific(kwargs) - + raise AssertionError(f"Type of data should be either List or Dict! Current: {type(data)}") - + def _convert_as_csv_record(self, scalar_dict: Dict, prefix: Literal['train', 'valid'] = 'train'): converted_dict = {} for k, v in scalar_dict.items(): @@ -84,25 +84,25 @@ def _convert_as_csv_record(self, scalar_dict: Dict, prefix: Literal['train', 'va continue record_key = self.key_map[f"{prefix}/{k}"] assert record_key in self.header, f"{record_key} not in {self.header}" - + converted_dict.update({record_key: v}) return converted_dict - + def __call__(self, train_losses, train_metrics, valid_losses=None, valid_metrics=None): assert len(self.header) != 0 assert len(self.key_map) != 0 - + csv_record_dict = {'epoch': self._epoch} converted_train_losses = self._convert_as_csv_record(train_losses, prefix='train') converted_train_metrics = self._convert_as_csv_record(train_metrics, prefix='train') csv_record_dict.update(converted_train_losses) csv_record_dict.update(converted_train_metrics) - + if valid_losses is not None: converted_valid_losses = self._convert_as_csv_record(valid_losses, prefix='valid') csv_record_dict.update(converted_valid_losses) if valid_metrics is not None: converted_valid_metrics = self._convert_as_csv_record(valid_metrics, prefix='valid') csv_record_dict.update(converted_valid_metrics) - + self.update(csv_record_dict) diff --git a/src/netspresso_trainer/loggers/builder.py b/src/netspresso_trainer/loggers/builder.py index 912044bb7..63f602536 100644 --- a/src/netspresso_trainer/loggers/builder.py +++ b/src/netspresso_trainer/loggers/builder.py @@ -62,16 +62,16 @@ def __init__( step_per_epoch=step_per_epoch, num_sample_images=num_sample_images) if self.use_tensorboard else None self.stdout_logger: Optional[StdOutLogger] = \ StdOutLogger(task=task, model=model, total_epochs=conf.training.epochs) if self.use_stdout else None - + self.netspresso_api_client = None if self.use_netspresso: from loggers.netspresso import ModelSearchServerHandler self.netspresso_api_client: Optional[ModelSearchServerHandler] = ModelSearchServerHandler(task=task, model=model) - + if task in VISUALIZER: pallete = conf.data.pallete if 'pallete' in conf.data else None self.label_converter = VISUALIZER[task](class_map=class_map, pallete=pallete) - + @property def result_dir(self): return self._result_dir @@ -117,7 +117,7 @@ def _convert_imagedict_as_readable(self, images_dict: Dict): for k, v in images_dict.items(): if k == 'images': continue - + # target, pred, bg_gt v = v[:self.num_sample_images] v_new: np.ndarray = magic_image_handler( diff --git a/src/netspresso_trainer/loggers/csv.py b/src/netspresso_trainer/loggers/csv.py index 943acb29a..d82f629ae 100644 --- a/src/netspresso_trainer/loggers/csv.py +++ b/src/netspresso_trainer/loggers/csv.py @@ -6,7 +6,7 @@ class ClassificationCSVLogger(BaseCSVLogger): def __init__(self, model, result_dir): super(ClassificationCSVLogger, self).__init__(model, result_dir) self.update_header(self.csv_header) - + self._key_map = { 'epoch': 'epoch', 'train/total': 'train_loss', @@ -14,7 +14,7 @@ def __init__(self, model, result_dir): 'train/Acc@1': 'train_accuracy', 'valid/Acc@1': 'valid_accuracy', } - + @property def key_map(self): return self._key_map @@ -24,7 +24,7 @@ class SegmentationCSVLogger(BaseCSVLogger): def __init__(self, model, result_dir): super(SegmentationCSVLogger, self).__init__(model, result_dir) self.update_header(self.csv_header) - + self._key_map = { 'epoch': 'epoch', 'train/total': 'train_loss', @@ -35,4 +35,4 @@ def __init__(self, model, result_dir): @property def key_map(self): - return self._key_map \ No newline at end of file + return self._key_map diff --git a/src/netspresso_trainer/loggers/image.py b/src/netspresso_trainer/loggers/image.py index 68f6c23e9..cf9bb0e14 100644 --- a/src/netspresso_trainer/loggers/image.py +++ b/src/netspresso_trainer/loggers/image.py @@ -12,32 +12,32 @@ def __init__(self, model, result_dir) -> None: self.save_dir: Path = Path(result_dir) / "result_image" self.save_dir.mkdir(exist_ok=True) self._epoch = None - + def init_epoch(self): self._epoch = 0 - + @property def epoch(self): return self._epoch - + @epoch.setter def epoch(self, value: int) -> None: self._epoch = int(value) - + def save_ndarray_as_image(self, image_array: np.ndarray, filename: Union[str, Path], dataformats: Literal['HWC', 'CHW'] = 'HWC'): assert image_array.ndim == 3 if dataformats != 'HWC' and dataformats == 'CHW': image_array = image_array.transpose((1, 2, 0)) - + # HWC assert image_array.shape[-1] in [1, 3] Image.fromarray(image_array.astype(np.uint8)).save(filename) return True - + def save_result(self, image_dict: Dict, prefix='train'): prefix_dir: Path = self.save_dir / prefix prefix_dir.mkdir(exist_ok=True) - + for k, v in image_dict.items(): assert isinstance(v, np.ndarray) assert v.ndim in [3, 4], \ @@ -53,5 +53,5 @@ def __call__(self, train_images=None, valid_images=None): self.save_result(train_images, prefix='train') if valid_images is not None: self.save_result(valid_images, prefix='valid') - + diff --git a/src/netspresso_trainer/loggers/netspresso.py b/src/netspresso_trainer/loggers/netspresso.py index 8402c6f1f..589a7503d 100644 --- a/src/netspresso_trainer/loggers/netspresso.py +++ b/src/netspresso_trainer/loggers/netspresso.py @@ -7,7 +7,7 @@ logger = logging.getLogger("netspresso_trainer") -MONGODB_TEMP_URI = "" +MONGODB_TEMP_URI = "" class ModelSearchServerHandler: @@ -19,27 +19,27 @@ def __init__(self, task, model, mongodb_uri: str=MONGODB_TEMP_URI) -> None: logger.debug("Pinged your deployment. You successfully connected to MongoDB!") except Exception as e: raise e - + self._db = client['custom-training-board']['trainer-all-in-one'] self._session_id = None - + self._create_session(title=f"[{task}]{model}") - - + + def init_epoch(self): self._epoch = 0 - + @property def epoch(self): return self._epoch - + @epoch.setter def epoch(self, value: int) -> None: self._epoch = int(value) - + def _is_ready(self): return self._session_id is not None - + def _append(self, scalar_dict, mode='train'): assert self._is_ready() meta_string = f"{mode}/" if mode is not None else "" @@ -48,38 +48,38 @@ def _append(self, scalar_dict, mode='train'): '$currentDate': {'lastModified': True }} result = self._db.update_one({'_id': self._session_id}, contents, upsert=True) return result - + def _create_session(self, title: str ="test") -> ObjectId: example_document = { "title": title } document = self._db.insert_one(example_document) self._session_id = document.inserted_id return self._session_id - + def create_session(self, title: str="test") -> ObjectId: return self._create_session(title=title) - + def log_scalar(self, key, value, mode='train'): result = self._append({key: value}, mode=mode) return result - + def log_scalars_with_dict(self, scalar_dict, mode='train'): result = self._append(scalar_dict, mode=mode) return result - + def __call__(self, train_losses, train_metrics, valid_losses, valid_metrics, learning_rate, elapsed_time, ) -> None: - + self.log_scalars_with_dict(train_losses, mode='train') self.log_scalars_with_dict(train_metrics, mode='train') - + if valid_losses is not None: self.log_scalars_with_dict(valid_losses, mode='valid') if valid_metrics is not None: self.log_scalars_with_dict(valid_metrics, mode='valid') - + if learning_rate is not None: self.log_scalar('learning_rate', learning_rate, mode='misc') if elapsed_time is not None: - self.log_scalar('elapsed_time', elapsed_time, mode='misc') \ No newline at end of file + self.log_scalar('elapsed_time', elapsed_time, mode='misc') diff --git a/src/netspresso_trainer/loggers/registry.py b/src/netspresso_trainer/loggers/registry.py index 1ba8aad6c..b8c485900 100644 --- a/src/netspresso_trainer/loggers/registry.py +++ b/src/netspresso_trainer/loggers/registry.py @@ -9,4 +9,4 @@ VISUALIZER = { 'segmentation': SegmentationVisualizer, 'detection': DetectionVisualizer, -} \ No newline at end of file +} diff --git a/src/netspresso_trainer/loggers/stdout.py b/src/netspresso_trainer/loggers/stdout.py index aa0e99ff0..7e3d653da 100644 --- a/src/netspresso_trainer/loggers/stdout.py +++ b/src/netspresso_trainer/loggers/stdout.py @@ -11,21 +11,21 @@ def __init__(self, task, model, total_epochs=None) -> None: self.task = task self.model_name = model self.total_epochs = total_epochs if total_epochs is not None else "???" - + def init_epoch(self): self._epoch = 0 - + @property def epoch(self): return self._epoch - + @epoch.setter def epoch(self, value: int) -> None: self._epoch = int(value) - + def __call__(self, train_losses, train_metrics, valid_losses, valid_metrics, learning_rate, elapsed_time): logger.info(f"Epoch: {self._epoch} / {self.total_epochs}") - + if learning_rate is not None: logger.info(f"learning rate: {learning_rate:.7f}") if elapsed_time is not None: diff --git a/src/netspresso_trainer/loggers/tensorboard.py b/src/netspresso_trainer/loggers/tensorboard.py index 3ebc9c60f..e905b0ddb 100644 --- a/src/netspresso_trainer/loggers/tensorboard.py +++ b/src/netspresso_trainer/loggers/tensorboard.py @@ -70,14 +70,14 @@ def log_image(self, key, value: Union[np.ndarray, torch.Tensor], mode='train'): def log_images_with_dict(self, image_dict, mode='train'): for k, v in image_dict.items(): self._log_image(k, v, mode) - + def _get_rasterized_hparam(self, hparams): if not isinstance(hparams, dict): stem = hparams if not isinstance(hparams, (int, float, str, bool, torch.Tensor)): return str(stem) return stem - + rasterized_dict = {} for key, value in hparams.items(): if isinstance(value, dict): @@ -90,15 +90,15 @@ def _get_rasterized_hparam(self, hparams): return rasterized_dict def log_hparams(self, hp_omegaconf: Union[Dict, List], final_metrics=None): - + if final_metrics is None: final_metrics = {} final_metrics = {f"hparams_metrics/{k}": v for k, v in final_metrics.items()} - + hp_dict = OmegaConf.to_container(hp_omegaconf, resolve=True) hp_for_log = self._get_rasterized_hparam(hp_dict) - - exp, ssi, sei = hparams(hparam_dict=hp_for_log, metric_dict=final_metrics) + + exp, ssi, sei = hparams(hparam_dict=hp_for_log, metric_dict=final_metrics) self.tensorboard.file_writer.add_summary(exp) self.tensorboard.file_writer.add_summary(ssi) self.tensorboard.file_writer.add_summary(sei) diff --git a/src/netspresso_trainer/loggers/visualizer.py b/src/netspresso_trainer/loggers/visualizer.py index 3d11934bd..349dcd688 100644 --- a/src/netspresso_trainer/loggers/visualizer.py +++ b/src/netspresso_trainer/loggers/visualizer.py @@ -55,7 +55,7 @@ def _convert(self, gray_image): return color_image def __call__(self, results: List[Tuple[np.ndarray, np.ndarray]], images=None): - + return_images = [] for image, result in zip(images, results): image = image.copy() @@ -75,12 +75,12 @@ def __call__(self, results: List[Tuple[np.ndarray, np.ndarray]], images=None): text_w, text_h = text_size image = cv2.rectangle(image, (x1, y1-5-text_h), (x1+text_w, y1), color=color, thickness=-1) image = cv2.putText(image, str(class_name), (x1, y1-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) - + return_images.append(image[np.newaxis, ...]) return_images = np.concatenate(return_images, axis=0) return return_images - - + + class SegmentationVisualizer: def __init__(self, class_map, pallete=None): n = len(class_map) diff --git a/src/netspresso_trainer/losses/classification/__init__.py b/src/netspresso_trainer/losses/classification/__init__.py index f072e4cc1..6f89bba8f 100644 --- a/src/netspresso_trainer/losses/classification/__init__.py +++ b/src/netspresso_trainer/losses/classification/__init__.py @@ -1,2 +1,2 @@ from .label_smooth import LabelSmoothingCrossEntropy -from .soft_target import SoftTargetCrossEntropy \ No newline at end of file +from .soft_target import SoftTargetCrossEntropy diff --git a/src/netspresso_trainer/losses/classification/label_smooth.py b/src/netspresso_trainer/losses/classification/label_smooth.py index 61e2e3773..495d13543 100644 --- a/src/netspresso_trainer/losses/classification/label_smooth.py +++ b/src/netspresso_trainer/losses/classification/label_smooth.py @@ -19,4 +19,4 @@ def forward(self, out: torch.Tensor, target: torch.Tensor) -> torch.Tensor: nll_loss = nll_loss.squeeze(1) smooth_loss = -logprobs.mean(dim=-1) loss = self.confidence * nll_loss + self.smoothing * smooth_loss - return loss.mean() \ No newline at end of file + return loss.mean() diff --git a/src/netspresso_trainer/losses/classification/soft_target.py b/src/netspresso_trainer/losses/classification/soft_target.py index 2dfc8cd07..8f25bc9c8 100644 --- a/src/netspresso_trainer/losses/classification/soft_target.py +++ b/src/netspresso_trainer/losses/classification/soft_target.py @@ -12,4 +12,4 @@ def __init__(self): def forward(self, out: Dict, target: torch.Tensor) -> torch.Tensor: pred = out['pred'] loss = torch.sum(-target * F.log_softmax(pred, dim=-1), dim=-1) - return loss.mean() \ No newline at end of file + return loss.mean() diff --git a/src/netspresso_trainer/losses/common.py b/src/netspresso_trainer/losses/common.py index eda3b8ba4..5c7b4e2ad 100644 --- a/src/netspresso_trainer/losses/common.py +++ b/src/netspresso_trainer/losses/common.py @@ -13,4 +13,4 @@ def __init__(self, ignore_index, **kwargs) -> None: def forward(self, out: Dict, target: torch.Tensor) -> torch.Tensor: pred = out['pred'] loss = self.loss_fn(pred, target) - return loss \ No newline at end of file + return loss diff --git a/src/netspresso_trainer/losses/detection/__init__.py b/src/netspresso_trainer/losses/detection/__init__.py index 24d6942e2..2d5f5d05b 100644 --- a/src/netspresso_trainer/losses/detection/__init__.py +++ b/src/netspresso_trainer/losses/detection/__init__.py @@ -1,2 +1,2 @@ from .fastrcnn import RoiHeadLoss, RPNLoss -from .yolox import YOLOXLoss \ No newline at end of file +from .yolox import YOLOXLoss diff --git a/src/netspresso_trainer/losses/detection/fastrcnn.py b/src/netspresso_trainer/losses/detection/fastrcnn.py index 976a6098f..28b2ab18c 100644 --- a/src/netspresso_trainer/losses/detection/fastrcnn.py +++ b/src/netspresso_trainer/losses/detection/fastrcnn.py @@ -12,7 +12,7 @@ class RoiHeadLoss(nn.Module): def __init__(self) -> None: super().__init__() - + @staticmethod def forward(out: torch.Tensor, target: torch.Tensor) -> torch.Tensor: class_logits, box_regression, labels, regression_targets =\ @@ -43,10 +43,10 @@ def forward(out: torch.Tensor, target: torch.Tensor) -> torch.Tensor: "loss_classifier": classification_loss, "loss_box_reg": box_loss } - + # TODO: return as dict return sum(losses.values()) - + class RPNLoss(nn.Module): def __init__(self, box_fg_iou_thresh=0.5, @@ -54,7 +54,7 @@ def __init__(self, box_batch_size_per_image=512, box_positive_fraction=0.25) -> None: super().__init__() - + self.box_coder = det_utils.BoxCoder(weights=(1.0, 1.0, 1.0, 1.0)) self.box_similarity = box_ops.box_iou self.proposal_matcher = det_utils.Matcher( @@ -63,7 +63,7 @@ def __init__(self, allow_low_quality_matches=True, ) self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(box_batch_size_per_image, box_positive_fraction) - + def _assign_targets_to_anchors(self, anchors: List[Tensor], targets: List[Dict[str, Tensor]] ) -> Tuple[List[Tensor], List[Tensor]]: @@ -100,7 +100,7 @@ def _assign_targets_to_anchors(self, anchors: List[Tensor], targets: List[Dict[s labels.append(labels_per_image) matched_gt_boxes.append(matched_gt_boxes_per_image) return labels, matched_gt_boxes - + def _compute_loss(self, objectness: Tensor, pred_bbox_deltas: Tensor, labels: List[Tensor], regression_targets: List[Tensor] ) -> Tuple[Tensor, Tensor]: """ @@ -137,7 +137,7 @@ def _compute_loss(self, objectness: Tensor, pred_bbox_deltas: Tensor, labels: Li objectness_loss = F.binary_cross_entropy_with_logits(objectness[sampled_inds], labels[sampled_inds]) return objectness_loss, box_loss - + def forward(self, out: torch.Tensor, target: torch.Tensor) -> torch.Tensor: anchors, objectness, pred_bbox_deltas = out['anchors'], out['objectness'], out['pred_bbox_deltas'] labels, matched_gt_boxes = self._assign_targets_to_anchors(anchors, target) @@ -150,4 +150,4 @@ def forward(self, out: torch.Tensor, target: torch.Tensor) -> torch.Tensor: "loss_rpn_box_reg": loss_rpn_box_reg, } # TODO: return as dict - return sum(losses.values()) \ No newline at end of file + return sum(losses.values()) diff --git a/src/netspresso_trainer/losses/detection/yolox.py b/src/netspresso_trainer/losses/detection/yolox.py index f593cc0a2..7fcd8d534 100644 --- a/src/netspresso_trainer/losses/detection/yolox.py +++ b/src/netspresso_trainer/losses/detection/yolox.py @@ -47,7 +47,7 @@ def __init__(self, **kwargs) -> None: super(YOLOXLoss, self).__init__() self.bcewithlog_loss = nn.BCEWithLogitsLoss(reduction="none") self.iou_loss = IOUloss(reduction="none") - + def forward(self, out: List, target: Dict) -> torch.Tensor: x_shifts = [] @@ -90,10 +90,10 @@ def forward(self, out: List, target: Dict) -> torch.Tensor: [], dtype=out[0].dtype, ) - + # TODO: return as dict return total_loss - + def get_losses( self, imgs, @@ -263,7 +263,7 @@ def get_losses( #loss_l1, num_fg / max(num_gts, 1), ) - + @torch.no_grad() def get_assignments( self, @@ -354,7 +354,7 @@ def get_assignments( matched_gt_inds, num_fg, ) - + def get_geometry_constraint( self, gt_bboxes_per_image, expanded_strides, x_shifts, y_shifts, ): @@ -385,7 +385,7 @@ def get_geometry_constraint( geometry_relation = is_in_centers[:, anchor_filter] return anchor_filter, geometry_relation - + def simota_matching(self, cost, pair_wise_ious, gt_classes, num_gt, fg_mask): matching_matrix = torch.zeros_like(cost, dtype=torch.uint8) @@ -419,7 +419,7 @@ def simota_matching(self, cost, pair_wise_ious, gt_classes, num_gt, fg_mask): fg_mask_inboxes ] return num_fg, gt_matched_classes, pred_ious_this_matching, matched_gt_inds - + def get_output_and_grid(self, output, k, stride, dtype): grid = self.grids[k] diff --git a/src/netspresso_trainer/losses/registry.py b/src/netspresso_trainer/losses/registry.py index 61d8f554d..82f60e869 100644 --- a/src/netspresso_trainer/losses/registry.py +++ b/src/netspresso_trainer/losses/registry.py @@ -15,4 +15,4 @@ 'yolox_loss': YOLOXLoss, } -PHASE_LIST = ['train', 'valid', 'test'] \ No newline at end of file +PHASE_LIST = ['train', 'valid', 'test'] diff --git a/src/netspresso_trainer/losses/segmentation/pidnet.py b/src/netspresso_trainer/losses/segmentation/pidnet.py index 2bdc7cab0..736ab018c 100644 --- a/src/netspresso_trainer/losses/segmentation/pidnet.py +++ b/src/netspresso_trainer/losses/segmentation/pidnet.py @@ -26,7 +26,7 @@ def __init__(self, ignore_index=IGNORE_INDEX_NONE_VALUE, weight=None): self.boundary_aware = False def _forward(self, out: torch.Tensor, target: torch.Tensor): - + return self.loss_fn(out, target) def forward(self, out: Dict, target: torch.Tensor): @@ -36,7 +36,7 @@ def forward(self, out: Dict, target: torch.Tensor): filler = torch.ones_like(target) * self.ignore_index bd_label = torch.where(torch.sigmoid(extra_d[:, 0, :, :]) > 0.8, target, filler) return self._forward(pred, bd_label) - + pred, extra_p = out['pred'], out['extra_p'] score = [extra_p, pred] return sum([w * self._forward(x, target) for (w, x) in zip(BALANCE_WEIGHTS, score)]) @@ -45,7 +45,7 @@ class PIDNetBoundaryAwareCrossEntropy(PIDNetCrossEntropy): def __init__(self, ignore_index=IGNORE_INDEX_NONE_VALUE, weight=None): super().__init__(ignore_index, weight) self.boundary_aware = True - + # class OhemCrossEntropy(nn.Module): # def __init__(self, ignore_label=-1, thres=0.7, min_kept=100000, weight=None): # super(OhemCrossEntropy, self).__init__() diff --git a/src/netspresso_trainer/metrics/__init__.py b/src/netspresso_trainer/metrics/__init__.py index 330ddc644..923da55ed 100644 --- a/src/netspresso_trainer/metrics/__init__.py +++ b/src/netspresso_trainer/metrics/__init__.py @@ -1 +1 @@ -from .builder import build_metrics \ No newline at end of file +from .builder import build_metrics diff --git a/src/netspresso_trainer/metrics/detection/metric.py b/src/netspresso_trainer/metrics/detection/metric.py index 17a83845d..93e2070fd 100644 --- a/src/netspresso_trainer/metrics/detection/metric.py +++ b/src/netspresso_trainer/metrics/detection/metric.py @@ -167,7 +167,7 @@ def average_precisions_per_class( class DetectionMetric(BaseMetric): metric_names: List[str] = ['map50', 'map75', 'map50_95'] primary_metric: str = 'map50_95' - + def __init__(self, **kwargs): super().__init__() diff --git a/src/netspresso_trainer/metrics/registry.py b/src/netspresso_trainer/metrics/registry.py index 381ab64fe..73603e133 100644 --- a/src/netspresso_trainer/metrics/registry.py +++ b/src/netspresso_trainer/metrics/registry.py @@ -11,4 +11,4 @@ 'detection': DetectionMetric } -PHASE_LIST = ['train', 'valid', 'test'] \ No newline at end of file +PHASE_LIST = ['train', 'valid', 'test'] diff --git a/src/netspresso_trainer/models/op/base_metaformer.py b/src/netspresso_trainer/models/op/base_metaformer.py index 5a23d5cb6..65237e741 100644 --- a/src/netspresso_trainer/models/op/base_metaformer.py +++ b/src/netspresso_trainer/models/op/base_metaformer.py @@ -53,7 +53,7 @@ def __init__( attention_bias_resolution = 16, ) -> None: super().__init__() - + attention_hidden_size = attention_hidden_size if attention_hidden_size is not None else hidden_size value_hidden_size = value_hidden_size if value_hidden_size is not None else attention_hidden_size @@ -62,17 +62,17 @@ def __init__( f"The hidden size {attention_hidden_size,} is not a multiple of the number of attention " f"heads {num_attention_heads}." ) - + if value_hidden_size % num_attention_heads != 0: raise ValueError( f"The hidden size {value_hidden_size,} is not a multiple of the number of attention " f"heads {num_attention_heads}." ) - + self.num_attention_heads = num_attention_heads self.attention_head_size = int(attention_hidden_size / num_attention_heads) self.value_attention_head_size = int(value_hidden_size / num_attention_heads) - + self.head_size = self.num_attention_heads * self.attention_head_size self.value_head_size = self.num_attention_heads * self.value_attention_head_size self.attention_scale = attention_scale if attention_scale is not None \ @@ -82,7 +82,7 @@ def __init__( self.query = nn.Linear(hidden_size, self.head_size, bias=use_qkv_bias) # ... x C -> ... x C_qk self.key = nn.Linear(hidden_size, self.head_size, bias=use_qkv_bias) # ... x C -> ... x C_qk self.value = nn.Linear(hidden_size, self.value_head_size, bias=use_qkv_bias) # ... x C -> ... x C_v - + self.linear = nn.Linear(self.value_head_size, hidden_size) # ... x C_v -> ... x C self.dropout = nn.Dropout(attention_dropout_prob) @@ -118,14 +118,14 @@ def __init__( # torch.zeros(self.num_attention_heads, len(attention_offsets))) # self.register_buffer('attention_bias_idxs_seg', # torch.LongTensor(idxs).view(N, N)) - + self.use_cross_attention = use_cross_attention def transpose_for_scores(self, x: Tensor, attention_head_size: int) -> Tensor: new_x_shape = x.size()[:-1] + (self.num_attention_heads, attention_head_size) x = x.view(new_x_shape) return x.permute(0, 2, 1, 3) - + def sequence_reduce(self, x: Tensor, height: int, width: int) -> Tensor: """SegFormer """ @@ -167,7 +167,7 @@ def forward( """ mixed_query_layer = self.query(query_states) # B x S_s x C_qk - + if not self.use_cross_attention: # Self-attention key_value_states = query_states # B x S_t(=S_s) x C_qk if self.use_sequence_reduction: @@ -180,7 +180,7 @@ def forward( attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2)) # B x {head} x S_s x S_t attention_scores = attention_scores / self.attention_scale # B x {head} x S_s x S_t - + if self.use_attention_bias: bias = self.attention_biases[:, self.attention_bias_idxs] bias = nn.functional.interpolate(bias.unsqueeze(0), size=(attention_scores.size(-2), attention_scores.size(-1)), mode='bicubic') @@ -199,15 +199,15 @@ def forward( context_layer = context_layer.permute(0, 2, 1, 3).contiguous() # B x S_s x {head} x C_vsplit new_context_layer_shape = context_layer.size()[:-2] + (self.value_head_size,) context_layer = context_layer.view(new_context_layer_shape) # B x S_s x C_v - + context_layer = self.linear(context_layer) # B x S_s x C context_layer = self.dropout(context_layer) # B x S_s x C if self.output_with_attentions: return (context_layer, attention_probs) - + return context_layer # B x S_s x C - + class ChannelMLP(nn.Module): def __init__(self, hidden_size, intermediate_size, hidden_dropout_prob, hidden_activation_type='silu'): super().__init__() @@ -218,7 +218,7 @@ def __init__(self, hidden_size, intermediate_size, hidden_dropout_prob, hidden_a self.ffn.add_module('dense2', nn.Linear(in_features=intermediate_size, out_features=hidden_size, bias=True)) self.dropout = nn.Dropout(p=hidden_dropout_prob) - + def forward(self, x): x = self.ffn(x) x = self.dropout(x) @@ -231,20 +231,20 @@ def __init__(self, hidden_size, layer_norm_eps) -> None: self.layernorm_after = nn.LayerNorm(hidden_size) self.token_mixer = nn.Identity() # MultiHeadAttention() self.channel_mlp = nn.Identity() # ChannelMLP() - + def forward(self, x): out_token_mixer = self.layernorm_before(x) out_token_mixer = self.token_mixer(out_token_mixer) - + out_token_mixer = out_token_mixer + x - + out_final = self.layernorm_after(out_token_mixer) out_final = self.channel_mlp(out_final) - + out_final = out_final + out_token_mixer - + return out_final - + class MetaFormerEncoder(nn.Module): def __init__(self) -> None: super().__init__() @@ -252,7 +252,7 @@ def __init__(self) -> None: # self.blocks = nn.Sequential( # *[MetaFormerBlock(hidden_size, layer_norm_eps) for _ in range(num_layers)] # ) - + def forward(self, x): x = self.blocks(x) return x @@ -262,7 +262,7 @@ def __init__(self, hidden_sizes) -> None: super().__init__() self._feature_dim = hidden_sizes[-1] self._intermediate_features_dim = hidden_sizes - + self.patch_embed = nn.Identity() self.encoder = MetaFormerEncoder() self.norm = nn.Identity() @@ -270,14 +270,14 @@ def __init__(self, hidden_sizes) -> None: @property def feature_dim(self): return self._feature_dim - + @property def intermediate_features_dim(self): return self._intermediate_features_dim - + def forward(self, x: FXTensorType): x = self.patch_embed(x) x = self.encoder(x) x = self.norm(x) feat = torch.mean(x, dim=1) - return BackboneOutput(last_feature=feat) \ No newline at end of file + return BackboneOutput(last_feature=feat) diff --git a/src/netspresso_trainer/models/op/custom.py b/src/netspresso_trainer/models/op/custom.py index 787156255..7dd1752cd 100644 --- a/src/netspresso_trainer/models/op/custom.py +++ b/src/netspresso_trainer/models/op/custom.py @@ -296,9 +296,9 @@ def __init__( # project layers.append( ConvLayer( - in_channels=hidden_channels, - out_channels=out_channels, - kernel_size=1, + in_channels=hidden_channels, + out_channels=out_channels, + kernel_size=1, norm_type=norm_type, use_act=False ) @@ -365,7 +365,7 @@ def __init__( self.patch_dim = patch_dim self.register_buffer("pe", pos_encoding) - + def forward_patch_last( self, x, indices: Optional[Tensor] = None, *args, **kwargs ) -> Tensor: @@ -385,8 +385,8 @@ def forward_others( self, x, indices: Optional[Tensor] = None, *args, **kwargs ) -> Tensor: # seq_length should be the second last dim - - # @deepkyu: [fx tracing] Always `indices` is None + + # @deepkyu: [fx tracing] Always `indices` is None # if indices is None: # x = x + self.pe[..., : x.shape[-2], :] # else: @@ -396,10 +396,10 @@ def forward_others( # pe = self.pe.expand(repeat_size) # selected_pe = torch.gather(pe, index=indices, dim=-2) # x = x + selected_pe - + # x = x + self.pe[..., :seq_index, :] x = x + tensor_slice(self.pe, dim=1, index=x.shape[-2]) - + return x def forward(self, x, indices: Optional[Tensor] = None, *args, **kwargs) -> Tensor: @@ -480,7 +480,7 @@ def forward(self, x: Tensor) -> Tensor: # dims = [-3, -2, -1] # else: # raise NotImplementedError("Currently 2D and 3D global pooling supported") - + return self._global_pool(x, dims=(-2, -1)) # def profile_module(self, input: Tensor) -> Tuple[Tensor, float, float]: @@ -497,9 +497,9 @@ class Focus(nn.Module): def __init__(self, in_channels, out_channels, ksize=1, stride=1, act_type="silu"): super().__init__() self.conv = ConvLayer(in_channels=in_channels * 4, - out_channels=out_channels, - kernel_size=ksize, - stride=stride, + out_channels=out_channels, + kernel_size=ksize, + stride=stride, act_type=act_type) def forward(self, x): @@ -542,25 +542,25 @@ def __init__( # ch_in, ch_out, number, shortcut, groups, expansion super().__init__() hidden_channels = int(out_channels * expansion) # hidden channels - self.conv1 = ConvLayer(in_channels=in_channels, + self.conv1 = ConvLayer(in_channels=in_channels, out_channels=hidden_channels, - kernel_size=1, + kernel_size=1, stride=1, act_type=act_type) self.conv2 = ConvLayer(in_channels=in_channels, - out_channels=hidden_channels, - kernel_size=1, + out_channels=hidden_channels, + kernel_size=1, stride=1, act_type=act_type) - self.conv3 = ConvLayer(in_channels=2 * hidden_channels, - out_channels=out_channels, - kernel_size=1, + self.conv3 = ConvLayer(in_channels=2 * hidden_channels, + out_channels=out_channels, + kernel_size=1, stride=1, act_type=act_type) - + block = DarknetBlock module_list = [ block( - in_channels=hidden_channels, - out_channels=hidden_channels, + in_channels=hidden_channels, + out_channels=hidden_channels, shortcut=shortcut, expansion=1.0, act_type=act_type @@ -585,7 +585,7 @@ def __init__( ): super().__init__() hidden_channels = in_channels // 2 - self.conv1 = ConvLayer(in_channels=in_channels, out_channels=hidden_channels, + self.conv1 = ConvLayer(in_channels=in_channels, out_channels=hidden_channels, kernel_size=1, stride=1, act_type=act_type) self.m = nn.ModuleList( [ @@ -594,7 +594,7 @@ def __init__( ] ) conv2_channels = hidden_channels * (len(kernel_sizes) + 1) - self.conv2 = ConvLayer(in_channels=conv2_channels, out_channels=out_channels, + self.conv2 = ConvLayer(in_channels=conv2_channels, out_channels=out_channels, kernel_size=1, stride=1, act_type=act_type) def forward(self, x): @@ -618,9 +618,9 @@ def __init__( ): super().__init__() hidden_channels = int(out_channels * expansion) - self.conv1 = ConvLayer(in_channels=in_channels, out_channels=hidden_channels, + self.conv1 = ConvLayer(in_channels=in_channels, out_channels=hidden_channels, kernel_size=1, stride=1, act_type=act_type) - self.conv2 = ConvLayer(in_channels=hidden_channels, out_channels=out_channels, + self.conv2 = ConvLayer(in_channels=hidden_channels, out_channels=out_channels, kernel_size=3, stride=1, act_type=act_type) self.use_add = shortcut and in_channels == out_channels diff --git a/src/netspresso_trainer/models/op/depth.py b/src/netspresso_trainer/models/op/depth.py index 9c626bb78..276527d39 100644 --- a/src/netspresso_trainer/models/op/depth.py +++ b/src/netspresso_trainer/models/op/depth.py @@ -31,4 +31,4 @@ def __init__(self, drop_prob: float = 0., scale_by_keep: bool = True): self.scale_by_keep = scale_by_keep def forward(self, x): - return drop_path(x, self.drop_prob, self.training, self.scale_by_keep) \ No newline at end of file + return drop_path(x, self.drop_prob, self.training, self.scale_by_keep) diff --git a/src/netspresso_trainer/models/op/registry.py b/src/netspresso_trainer/models/op/registry.py index 4666e9f58..10c6f8cb8 100644 --- a/src/netspresso_trainer/models/op/registry.py +++ b/src/netspresso_trainer/models/op/registry.py @@ -16,4 +16,4 @@ 'silu': nn.SiLU, 'swish': nn.SiLU, 'hard_swish': nn.Hardswish, -} \ No newline at end of file +} diff --git a/src/netspresso_trainer/optimizers/__init__.py b/src/netspresso_trainer/optimizers/__init__.py index b1b64e76b..871ebc6e1 100644 --- a/src/netspresso_trainer/optimizers/__init__.py +++ b/src/netspresso_trainer/optimizers/__init__.py @@ -1 +1 @@ -from .builder import build_optimizer \ No newline at end of file +from .builder import build_optimizer diff --git a/src/netspresso_trainer/optimizers/builder.py b/src/netspresso_trainer/optimizers/builder.py index 3d2d2bf63..460af3fec 100644 --- a/src/netspresso_trainer/optimizers/builder.py +++ b/src/netspresso_trainer/optimizers/builder.py @@ -21,7 +21,7 @@ def build_optimizer( 'adam', 'adamw', 'adamax', 'adadelta', 'adagrad', 'rmsprop'] = opt.lower() assert opt_name in OPTIMIZER_DICT - + conf_optim = {'weight_decay': wd, 'lr': lr} if opt_name in ['sgd', 'nesterov', 'momentum', 'rmsprop']: @@ -32,7 +32,7 @@ def build_optimizer( conf_optim.update({'nesterov': True}) if opt_name in ['momentum']: conf_optim.update({'nesterov': False}) - + optimizer = OPTIMIZER_DICT[opt_name](parameters, **conf_optim) return optimizer diff --git a/src/netspresso_trainer/optimizers/registry.py b/src/netspresso_trainer/optimizers/registry.py index f4c3fc220..4f1ff7591 100644 --- a/src/netspresso_trainer/optimizers/registry.py +++ b/src/netspresso_trainer/optimizers/registry.py @@ -14,4 +14,4 @@ 'sgd': optim.SGD, 'nesterov': optim.SGD, 'momentum': optim.SGD, -} \ No newline at end of file +} diff --git a/src/netspresso_trainer/pipelines/builder.py b/src/netspresso_trainer/pipelines/builder.py index 4773261c8..18698855c 100644 --- a/src/netspresso_trainer/pipelines/builder.py +++ b/src/netspresso_trainer/pipelines/builder.py @@ -9,9 +9,9 @@ def build_pipeline(conf, task, model_name, model, devices, train_dataloader, eva task_ = "detection-two-stage" if conf.model.architecture.head.name in ["faster_rcnn"] else "detection-one-stage" task_pipeline = TASK_PIPELINE[task_] - + trainer = task_pipeline(conf, task, model_name, model, devices, train_dataloader, eval_dataloader, class_map, is_graphmodule_training=is_graphmodule_training, profile=profile) - return trainer \ No newline at end of file + return trainer diff --git a/src/netspresso_trainer/pipelines/detection.py b/src/netspresso_trainer/pipelines/detection.py index 3d7e1ab54..5cdef182c 100644 --- a/src/netspresso_trainer/pipelines/detection.py +++ b/src/netspresso_trainer/pipelines/detection.py @@ -139,7 +139,7 @@ def get_metric_with_all_outputs(self, outputs, phase: Literal['train', 'valid']) pred_on_image['post_labels'] = class_idx pred.append(pred_on_image) self.metric_factory.calc(pred, target=targets, phase=phase) - + def save_checkpoint(self, epoch: int): # Check whether the valid loss is minimum at this epoch @@ -211,9 +211,9 @@ def train_step(self, batch): images = images.to(self.devices) targets = [{"boxes": box.to(self.devices), "labels": label.to(self.devices),} for box, label in zip(bboxes, labels)] - - targets = {'gt': targets, - 'img_size': images.size(-1), + + targets = {'gt': targets, + 'img_size': images.size(-1), 'num_classes': self.num_classes,} self.optimizer.zero_grad() @@ -235,7 +235,7 @@ def train_step(self, batch): 'target': [(bbox.detach().cpu().numpy(), label.detach().cpu().numpy()) for bbox, label in zip(bboxes, labels)], 'pred': [(torch.cat([p[:, :4], p[:, 5:6]], dim=-1).detach().cpu().numpy(), - p[:, 6].to(torch.int).detach().cpu().numpy()) + p[:, 6].to(torch.int).detach().cpu().numpy()) if p is not None else (np.array([[]]), np.array([])) for p in pred] } @@ -247,9 +247,9 @@ def valid_step(self, batch): images = images.to(self.devices) targets = [{"boxes": box.to(self.devices), "labels": label.to(self.devices)} for box, label in zip(bboxes, labels)] - - targets = {'gt': targets, - 'img_size': images.size(-1), + + targets = {'gt': targets, + 'img_size': images.size(-1), 'num_classes': self.num_classes,} self.optimizer.zero_grad() @@ -269,7 +269,7 @@ def valid_step(self, batch): 'target': [(bbox.detach().cpu().numpy(), label.detach().cpu().numpy()) for bbox, label in zip(bboxes, labels)], 'pred': [(torch.cat([p[:, :4], p[:, 5:6]], dim=-1).detach().cpu().numpy(), - p[:, 6].to(torch.int).detach().cpu().numpy()) + p[:, 6].to(torch.int).detach().cpu().numpy()) if p is not None else (np.array([[]]), np.array([])) for p in pred] } diff --git a/src/netspresso_trainer/pipelines/registry.py b/src/netspresso_trainer/pipelines/registry.py index 61fb0dd2a..b0110bee7 100644 --- a/src/netspresso_trainer/pipelines/registry.py +++ b/src/netspresso_trainer/pipelines/registry.py @@ -13,4 +13,4 @@ 'segmentation': SegmentationPipeline, 'detection-two-stage': TwoStageDetectionPipeline, 'detection-one-stage': OneStageDetectionPipeline, -} \ No newline at end of file +} diff --git a/src/netspresso_trainer/schedulers/builder.py b/src/netspresso_trainer/schedulers/builder.py index ce566554b..df4b49f3a 100644 --- a/src/netspresso_trainer/schedulers/builder.py +++ b/src/netspresso_trainer/schedulers/builder.py @@ -15,8 +15,8 @@ def build_scheduler(optimizer, conf_training): 'total_iters': num_epochs, 'iters_per_phase': conf_training.iters_per_phase, # TODO: config for StepLR }) - + assert scheduler_name in SCHEDULER_DICT, f"{scheduler_name} not in scheduler dict!" lr_scheduler = SCHEDULER_DICT[scheduler_name](optimizer, **conf_sched) - + return lr_scheduler, num_epochs diff --git a/src/netspresso_trainer/schedulers/cosine_lr.py b/src/netspresso_trainer/schedulers/cosine_lr.py index aac4d3017..e24286a40 100644 --- a/src/netspresso_trainer/schedulers/cosine_lr.py +++ b/src/netspresso_trainer/schedulers/cosine_lr.py @@ -31,10 +31,10 @@ def get_lr(self): if not self._get_lr_called_within_step: warnings.warn("To get the last learning rate computed by the scheduler, " "please use `get_last_lr()`.", UserWarning, stacklevel=2) - + if self.last_epoch > self.T_max: return [group['lr'] for group in self.optimizer.param_groups] - + if self.last_epoch >= 0 and self.last_epoch < self.warmup_iters: return [self.warmup_bias_lr + (float(self.last_epoch + 1) / float(max(1, self.warmup_iters))) * (base_lr - self.warmup_bias_lr) for base_lr in self.base_lrs] @@ -63,4 +63,4 @@ def _get_closed_form_lr(self): ) ) for base_lr in self.base_lrs - ] \ No newline at end of file + ] diff --git a/src/netspresso_trainer/schedulers/cosine_warm_restart.py b/src/netspresso_trainer/schedulers/cosine_warm_restart.py index 4e114142f..ba0f501f8 100644 --- a/src/netspresso_trainer/schedulers/cosine_warm_restart.py +++ b/src/netspresso_trainer/schedulers/cosine_warm_restart.py @@ -91,7 +91,7 @@ def get_reassigned_t_i(current_t_i, next_t_i, remain_epochs): return remain_epochs, remain_epochs return current_t_i, remain_epochs - + def _step_without_given_epoch(self) -> int: if self.last_epoch < 0: epoch = 0 @@ -105,7 +105,7 @@ def _step_without_given_epoch(self) -> int: self.T_i = self.T_i * self.T_mult self.T_i, self.remain_iters = self.get_reassigned_t_i(self.T_i, self.T_i * self.T_mult, self.remain_iters) return epoch - + def step(self, epoch=None): """Step could be called after every batch update @@ -139,7 +139,7 @@ def step(self, epoch=None): else: if epoch < 0: raise ValueError("Expected non-negative epoch, but got {}".format(epoch)) - + if epoch >= self.T_0: if self.T_mult == 1: self.T_cur = epoch % self.T_0 diff --git a/src/netspresso_trainer/schedulers/poly_lr.py b/src/netspresso_trainer/schedulers/poly_lr.py index 9c9b21c20..d7f624610 100644 --- a/src/netspresso_trainer/schedulers/poly_lr.py +++ b/src/netspresso_trainer/schedulers/poly_lr.py @@ -34,7 +34,7 @@ def get_lr(self): if self.last_epoch > self.total_iters: return [group["lr"] for group in self.optimizer.param_groups] - + if self.last_epoch >= 0 and self.last_epoch < self.warmup_iters: return [self.warmup_bias_lr + (float(self.last_epoch + 1) / float(max(1, self.warmup_iters))) * (base_lr - self.warmup_bias_lr) for base_lr in self.base_lrs] @@ -45,7 +45,7 @@ def get_lr(self): return [self.min_lr + (group["lr"] - self.min_lr) * decay_factor for group in self.optimizer.param_groups] def _get_closed_form_lr(self): - decay_steps = self.total_iters - self.warmup_iters + decay_steps = self.total_iters - self.warmup_iters return [ ( min( @@ -54,4 +54,4 @@ def _get_closed_form_lr(self): ) ) for base_lr in self.base_lrs - ] \ No newline at end of file + ] diff --git a/src/netspresso_trainer/schedulers/registry.py b/src/netspresso_trainer/schedulers/registry.py index d389ae0d8..992a9d3cc 100644 --- a/src/netspresso_trainer/schedulers/registry.py +++ b/src/netspresso_trainer/schedulers/registry.py @@ -13,4 +13,4 @@ 'cosine_no_sgdr': CosineAnnealingLRWithCustomWarmUp, 'poly': PolynomialLRWithWarmUp, 'step': StepLR -} \ No newline at end of file +} diff --git a/src/netspresso_trainer/schedulers/step_lr.py b/src/netspresso_trainer/schedulers/step_lr.py index 26776e7c7..ab97636fd 100644 --- a/src/netspresso_trainer/schedulers/step_lr.py +++ b/src/netspresso_trainer/schedulers/step_lr.py @@ -52,4 +52,4 @@ def get_lr(self): def _get_closed_form_lr(self): return [base_lr * self.gamma ** (self.last_epoch // self.step_size) - for base_lr in self.base_lrs] \ No newline at end of file + for base_lr in self.base_lrs] diff --git a/src/netspresso_trainer/trainer_cli.py b/src/netspresso_trainer/trainer_cli.py index 30f7fda32..e0fa0c640 100644 --- a/src/netspresso_trainer/trainer_cli.py +++ b/src/netspresso_trainer/trainer_cli.py @@ -13,17 +13,17 @@ def run_distributed_training_script(gpu_ids, data, augmentation, model, training, logging, environment, log_level): - + command = [ "--data", data, - "--augmentation", augmentation, + "--augmentation", augmentation, "--model", model, "--training", training, "--logging", logging, "--environment", environment, "--log_level", log_level, ] - + # Distributed training script command = [ 'python', '-m', 'torch.distributed.launch', @@ -46,10 +46,10 @@ def parse_gpu_ids(gpu_arg: str): """Parse comma-separated GPU IDs and return as a list of integers.""" try: gpu_ids = [int(id) for id in gpu_arg.split(',')] - + if len(gpu_ids) == 1: # Single GPU return gpu_ids[0] - + gpu_ids = sorted(gpu_ids) return gpu_ids except ValueError as e: @@ -61,7 +61,7 @@ def parse_args_netspresso(with_gpus=False): parser = argparse.ArgumentParser(description="Parser for NetsPresso configuration") # -------- User arguments ---------------------------------------- - + if with_gpus: parser.add_argument( '--gpus', type=parse_gpu_ids, default=0, @@ -111,7 +111,7 @@ def parse_args_netspresso(with_gpus=False): def set_arguments(data: Union[Path, str], augmentation: Union[Path, str], model: Union[Path, str], training: Union[Path, str], logging: Union[Path, str], environment: Union[Path, str]) -> DictConfig: - + conf_data = OmegaConf.load(data) conf_augmentation = OmegaConf.load(augmentation) conf_model = OmegaConf.load(model) @@ -126,19 +126,19 @@ def set_arguments(data: Union[Path, str], augmentation: Union[Path, str], conf.merge_with(conf_training) conf.merge_with(conf_logging) conf.merge_with(conf_environment) - + return conf def train_with_yaml_impl(gpus: Union[list, int], data: Union[Path, str], augmentation: Union[Path, str], model: Union[Path, str], training: Union[Path, str], logging: Union[Path, str], environment: Union[Path, str], log_level: str = LOG_LEVEL): - + assert isinstance(gpus, (list, int)) gpu_ids_str = ','.join(map(str, gpus)) if isinstance(gpus, list) else str(gpus) os.environ['CUDA_VISIBLE_DEVICES'] = gpu_ids_str torch.cuda.empty_cache() # Reinitialize CUDA to apply the change - + if isinstance(gpus, int): conf = set_arguments(data, augmentation, model, training, logging, environment) train_common(conf, log_level=log_level) @@ -148,7 +148,7 @@ def train_with_yaml_impl(gpus: Union[list, int], data: Union[Path, str], augment def train_cli() -> None: args_parsed = parse_args_netspresso(with_gpus=True) - + train_with_yaml_impl( gpus=args_parsed.gpus, data=args_parsed.data, @@ -163,7 +163,7 @@ def train_cli() -> None: def train_cli_without_additional_gpu_check() -> None: args_parsed = parse_args_netspresso(with_gpus=False) - + conf = set_arguments( data=args_parsed.data, augmentation=args_parsed.augmentation, @@ -177,6 +177,6 @@ def train_cli_without_additional_gpu_check() -> None: if __name__ == "__main__": - + # Execute by `run_distributed_training_script` - train_cli_without_additional_gpu_check() \ No newline at end of file + train_cli_without_additional_gpu_check() diff --git a/src/netspresso_trainer/trainer_inline.py b/src/netspresso_trainer/trainer_inline.py index 556194df8..65d2b7893 100644 --- a/src/netspresso_trainer/trainer_inline.py +++ b/src/netspresso_trainer/trainer_inline.py @@ -12,7 +12,7 @@ def set_struct_recursive(conf: DictConfig, value: bool) -> None: OmegaConf.set_struct(conf, value) - + for _, conf_value in conf.items(): if isinstance(conf_value, DictConfig): set_struct_recursive(conf_value, value) @@ -32,9 +32,9 @@ def train_with_config(config: TrainerConfig, log_level: Literal['DEBUG', 'INFO', def train_with_yaml(gpus: str, data: Union[Path, str], augmentation: Union[Path, str], model: Union[Path, str], training: Union[Path, str], logging: Union[Path, str], environment: Union[Path, str], log_level: str = LOG_LEVEL): - + gpus: Union[List, int] = parse_gpu_ids(gpus) - + train_with_yaml_impl( gpus=gpus, data=data, @@ -44,4 +44,4 @@ def train_with_yaml(gpus: str, data: Union[Path, str], augmentation: Union[Path, logging=logging, environment=environment, log_level=log_level - ) \ No newline at end of file + ) diff --git a/src/netspresso_trainer/utils/logger.py b/src/netspresso_trainer/utils/logger.py index bf296de13..6b320a09c 100644 --- a/src/netspresso_trainer/utils/logger.py +++ b/src/netspresso_trainer/utils/logger.py @@ -24,7 +24,7 @@ def _custom_logger(name: str, level: str, distributed: bool): else: fmt = '%(asctime)s | %(levelname)s\t\t| %(funcName)s:<%(filename)s>:%(lineno)s >>> %(message)s' logger = logging.getLogger(name) - + if not logger.hasHandlers(): handler = logging.StreamHandler() @@ -46,7 +46,7 @@ def set_logger(logger_name="netspresso_trainer", level: str = 'INFO', distribute print("Skipping timezone setting.") _level: Literal['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] = level.upper() _custom_logger(logger_name, _level, distributed) - + logger = logging.getLogger(logger_name) if _level == 'DEBUG': logger.setLevel(logging.DEBUG) diff --git a/src/netspresso_trainer/utils/stats.py b/src/netspresso_trainer/utils/stats.py index aacc385dc..326721d24 100644 --- a/src/netspresso_trainer/utils/stats.py +++ b/src/netspresso_trainer/utils/stats.py @@ -12,7 +12,7 @@ def get_params_and_macs(model: nn.Module, sample_input: torch.Tensor): sample_input = sample_input.to(get_device(model)) # From v0.0.9 macs, params = _params_and_macs_fvcore(model, sample_input) - + # # Before v0.0.9 # macs, params = _params_and_macs_thop(model, sample_input) @@ -25,4 +25,4 @@ def _params_and_macs_fvcore(model: nn.Module, sample_input: torch.Tensor): def _params_and_macs_thop(model: nn.Module, sample_input: torch.Tensor): macs, params = thop.profile(model, inputs=(sample_input,), verbose=False) - return macs, params \ No newline at end of file + return macs, params From 96acca4755d728b0e437e7554ccfc68671fa8005 Mon Sep 17 00:00:00 2001 From: deepkyu Date: Fri, 3 Nov 2023 12:09:42 +0900 Subject: [PATCH 036/167] #217 fix not to call alias func --- train.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/train.py b/train.py index e2b937d0c..8b4e0c04b 100644 --- a/train.py +++ b/train.py @@ -1,7 +1,7 @@ from netspresso_trainer import train_cli def train_with_inline_cfg(): - from netspresso_trainer import TrainerConfig, train, export_config_as_yaml + from netspresso_trainer import TrainerConfig, train_with_config, export_config_as_yaml from netspresso_trainer.cfg import ClassificationResNetModelConfig, ExampleBeansDataset """ @@ -38,7 +38,7 @@ def train_with_inline_cfg(): print(export_config_as_yaml(cfg)) - train(cfg, log_level='INFO') + train_with_config(cfg, log_level='INFO') def train_with_inline_yaml(): from netspresso_trainer import train_with_yaml From dea5b3d2ddcd8bc26c6963e776a1378617050a5b Mon Sep 17 00:00:00 2001 From: deepkyu Date: Fri, 3 Nov 2023 12:17:22 +0900 Subject: [PATCH 037/167] hotfix: default augmentation config per task --- src/netspresso_trainer/cfg/__init__.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/netspresso_trainer/cfg/__init__.py b/src/netspresso_trainer/cfg/__init__.py index dd176a035..3c13d8ef9 100644 --- a/src/netspresso_trainer/cfg/__init__.py +++ b/src/netspresso_trainer/cfg/__init__.py @@ -53,6 +53,12 @@ ) from .training import ClassificationScheduleConfig, DetectionScheduleConfig, ScheduleConfig, SegmentationScheduleConfig +_AUGMENTATION_CONFIG_TYPE_DICT = { + 'classification': ClassificationAugmentationConfig, + 'segmentation': SegmentationAugmentationConfig, + 'detection': DetectionAugmentationConfig +} + _TRAINING_CONFIG_TYPE_DICT = { 'classification': ClassificationScheduleConfig, 'segmentation': SegmentationScheduleConfig, @@ -101,6 +107,6 @@ def __post_init__(self): if self.auto: if self.augmentation is None: - self.augmentation = AugmentationConfig() + self.augmentation = _AUGMENTATION_CONFIG_TYPE_DICT[self.task]() if self.training is None: self.training = _TRAINING_CONFIG_TYPE_DICT[self.task]() From 1a7b688be6e7690a38c774a7fed4e101aa2c35fb Mon Sep 17 00:00:00 2001 From: deepkyu Date: Fri, 3 Nov 2023 12:17:51 +0900 Subject: [PATCH 038/167] #217 add gpu variable for train_with_config --- src/netspresso_trainer/trainer_inline.py | 11 ++++++++++- train.py | 8 +++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/netspresso_trainer/trainer_inline.py b/src/netspresso_trainer/trainer_inline.py index 65d2b7893..30bac8afd 100644 --- a/src/netspresso_trainer/trainer_inline.py +++ b/src/netspresso_trainer/trainer_inline.py @@ -3,6 +3,7 @@ from typing import List, Literal, Union from omegaconf import DictConfig, OmegaConf +import torch from netspresso_trainer.cfg import TrainerConfig from netspresso_trainer.trainer_cli import parse_gpu_ids, train_with_yaml_impl @@ -23,9 +24,17 @@ def export_config_as_yaml(config: TrainerConfig) -> str: return OmegaConf.to_yaml(conf) -def train_with_config(config: TrainerConfig, log_level: Literal['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] = 'INFO') -> None: +def train_with_config(gpus: str, config: TrainerConfig, log_level: Literal['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] = 'INFO') -> None: + + gpus: Union[List, int] = parse_gpu_ids(gpus) + assert isinstance(gpus, int), f"Currently, only single-GPU training is supported in this API. Your gpu(s): {gpus}" + + os.environ['CUDA_VISIBLE_DEVICES'] = str(gpus) + torch.cuda.empty_cache() # Reinitialize CUDA to apply the change + conf: DictConfig = OmegaConf.create(config) set_struct_recursive(conf, False) + train_common(conf, log_level=log_level) diff --git a/train.py b/train.py index 8b4e0c04b..7c286a26b 100644 --- a/train.py +++ b/train.py @@ -38,7 +38,9 @@ def train_with_inline_cfg(): print(export_config_as_yaml(cfg)) - train_with_config(cfg, log_level='INFO') + train_with_config(gpus="1", + config=cfg, + log_level='INFO') def train_with_inline_yaml(): from netspresso_trainer import train_with_yaml @@ -52,10 +54,10 @@ def train_with_inline_yaml(): if __name__ == '__main__': - train_cli() + # train_cli() # With inline yaml # train_with_inline_yaml() # With inline pythonic config - # train_with_inline_cfg() \ No newline at end of file + train_with_inline_cfg() \ No newline at end of file From 778d48dfbac208a2d3bc85551f13df4e0abf1f1c Mon Sep 17 00:00:00 2001 From: deepkyu Date: Fri, 3 Nov 2023 12:19:58 +0900 Subject: [PATCH 039/167] update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0f11bee62..30027b1e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ ## New Features: -No changes to highlight. +- Add a gpu option in `train_with_config` (only single-GPU supported) by `@deepkyu` in [PR 219](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/219) ## Bug Fixes: From a296153158b9c2c8e73dad57f58b389a581b2e63 Mon Sep 17 00:00:00 2001 From: deepkyu Date: Fri, 3 Nov 2023 12:20:01 +0900 Subject: [PATCH 040/167] ruff fix --- src/netspresso_trainer/trainer_inline.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/netspresso_trainer/trainer_inline.py b/src/netspresso_trainer/trainer_inline.py index 30bac8afd..d291bce4d 100644 --- a/src/netspresso_trainer/trainer_inline.py +++ b/src/netspresso_trainer/trainer_inline.py @@ -2,8 +2,8 @@ from pathlib import Path from typing import List, Literal, Union -from omegaconf import DictConfig, OmegaConf import torch +from omegaconf import DictConfig, OmegaConf from netspresso_trainer.cfg import TrainerConfig from netspresso_trainer.trainer_cli import parse_gpu_ids, train_with_yaml_impl @@ -25,16 +25,16 @@ def export_config_as_yaml(config: TrainerConfig) -> str: def train_with_config(gpus: str, config: TrainerConfig, log_level: Literal['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] = 'INFO') -> None: - + gpus: Union[List, int] = parse_gpu_ids(gpus) assert isinstance(gpus, int), f"Currently, only single-GPU training is supported in this API. Your gpu(s): {gpus}" - + os.environ['CUDA_VISIBLE_DEVICES'] = str(gpus) torch.cuda.empty_cache() # Reinitialize CUDA to apply the change conf: DictConfig = OmegaConf.create(config) set_struct_recursive(conf, False) - + train_common(conf, log_level=log_level) From a5ba9db00e5af400ac93bf44c308cfb7a368d9e2 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Fri, 3 Nov 2023 17:07:50 +0900 Subject: [PATCH 041/167] Add RandomMixup --- .../dataloaders/augmentation/custom.py | 74 +++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/src/netspresso_trainer/dataloaders/augmentation/custom.py b/src/netspresso_trainer/dataloaders/augmentation/custom.py index 2e0673727..29234b630 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/custom.py +++ b/src/netspresso_trainer/dataloaders/augmentation/custom.py @@ -5,6 +5,7 @@ import numpy as np import PIL.Image as Image import torch +from torch.nn import functional as F_torch import torchvision.transforms as T import torchvision.transforms.functional as F from torchvision.transforms.functional import InterpolationMode @@ -349,6 +350,79 @@ def __repr__(self): return format_string +class RandomMixup: + """ + Given a batch of input images and labels, this class randomly applies the + `MixUp transformation `_ + + Args: + opts (argparse.Namespace): Arguments + num_classes (int): Number of classes in the dataset + """ + + def __init__(self, num_classes: int, alpha, p=1.0, inplace=False): + if not (num_classes > 0): + raise ValueError("Please provide a valid positive value for the num_classes.") + if not (alpha > 0): + raise ValueError("Alpha param can't be zero.") + if not (0.0 < p <= 1.0): + raise ValueError("MixUp probability should be between 0 and 1, where 1 is inclusive") + + self.num_classes = num_classes + self.alpha = alpha + self.p = p + self.inplace = inplace + + def _apply_mixup_transform(self, image_tensor, target_tensor): + if image_tensor.ndim != 4: + print(f"Batch ndim should be 4. Got {image_tensor.ndim}") + if target_tensor.ndim != 1: + print(f"Target ndim should be 1. Got {target_tensor.ndim}") + if not image_tensor.is_floating_point(): + print(f"Batch datatype should be a float tensor. Got {image_tensor.dtype}.") + if target_tensor.dtype != torch.int64: + print(f"Target datatype should be torch.int64. Got {target_tensor.dtype}") + + if not self.inplace: + image_tensor = image_tensor.clone() + target_tensor = target_tensor.clone() + + if target_tensor.ndim == 1: + target_tensor = F_torch.one_hot( + target_tensor, num_classes=self.num_classes + ).to(dtype=image_tensor.dtype) + + # It's faster to roll the batch by one instead of shuffling it to create image pairs + batch_rolled = image_tensor.roll(1, 0) + target_rolled = target_tensor.roll(1, 0) + + # Implemented as on mixup paper, page 3. + lambda_param = float( + torch._sample_dirichlet(torch.tensor([self.alpha, self.alpha]))[0] + ) + batch_rolled.mul_(1.0 - lambda_param) + image_tensor.mul_(lambda_param).add_(batch_rolled) + + target_rolled.mul_(1.0 - lambda_param) + target_tensor.mul_(lambda_param).add_(target_rolled) + return image_tensor, target_tensor + + def __call__(self, samples, targets): + if torch.rand(1).item() >= self.p: + return samples, targets + + mixup_samples, mixup_targets = self._apply_mixup_transform( + image_tensor=samples, target_tensor=targets + ) + + return mixup_samples, mixup_targets + + def __repr__(self) -> str: + return "{}(num_classes={}, p={}, alpha={}, inplace={})".format( + self.__class__.__name__, self.num_classes, self.p, self.alpha, self.inplace + ) + + class Normalize: visualize = False From d350500774a714ea510dca9662918bcb953ceefe Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Fri, 3 Nov 2023 17:08:36 +0900 Subject: [PATCH 042/167] Add classification_mix_collate_fn --- .../dataloaders/augmentation/registry.py | 4 ++-- src/netspresso_trainer/dataloaders/builder.py | 20 ++++++++++++++++++- .../dataloaders/classification/__init__.py | 2 +- .../dataloaders/classification/dataset.py | 19 ++++++++++++++++++ 4 files changed, 41 insertions(+), 4 deletions(-) diff --git a/src/netspresso_trainer/dataloaders/augmentation/registry.py b/src/netspresso_trainer/dataloaders/augmentation/registry.py index c724aafa6..88176046c 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/registry.py +++ b/src/netspresso_trainer/dataloaders/augmentation/registry.py @@ -1,7 +1,6 @@ from typing import Callable, Dict -from .custom import ColorJitter, Pad, RandomCrop, RandomHorizontalFlip, RandomResizedCrop, RandomVerticalFlip, Resize - +from .custom import ColorJitter, Pad, RandomCrop, RandomHorizontalFlip, RandomResizedCrop, RandomVerticalFlip, Resize, RandomMixup TRANSFORM_DICT: Dict[str, Callable] = { 'colorjitter': ColorJitter, 'pad': Pad, @@ -10,4 +9,5 @@ 'randomhorizontalflip': RandomHorizontalFlip, 'randomverticalflip': RandomVerticalFlip, 'resize': Resize, + 'mixup': RandomMixup, } diff --git a/src/netspresso_trainer/dataloaders/builder.py b/src/netspresso_trainer/dataloaders/builder.py index acba53526..e324bc1e7 100644 --- a/src/netspresso_trainer/dataloaders/builder.py +++ b/src/netspresso_trainer/dataloaders/builder.py @@ -1,8 +1,11 @@ +from functools import partial import logging import os from pathlib import Path from typing import Dict, List, Optional, Type, Union +from .augmentation.registry import TRANSFORM_DICT +from .classification import classification_mix_collate_fn from .detection import detection_collate_fn from .registry import CREATE_TRANSFORM, CUSTOM_DATASET, DATA_SAMPLER, HUGGINGFACE_DATASET from .utils.loader import create_loader @@ -100,7 +103,22 @@ def build_dataset(conf_data, conf_augmentation, task: str, model_name: str): def build_dataloader(conf, task: str, model_name: str, train_dataset, eval_dataset, profile=False): if task == 'classification': - collate_fn = None + if hasattr(conf.augmentation, 'mix_transform'): + mix_transforms = [] + for mix_transform_conf in conf.augmentation.mix_transform: + name = mix_transform_conf.name.lower() + + mix_kwargs = list(mix_transform_conf.keys()) + mix_kwargs.remove('name') + mix_kwargs = {k:mix_transform_conf[k] for k in mix_kwargs} + mix_kwargs['num_classes'] = train_dataset.num_classes + + transform = TRANSFORM_DICT[name](**mix_kwargs) + mix_transforms.append(transform) + + collate_fn = partial(classification_mix_collate_fn, mix_transforms=mix_transforms) + else: + collate_fn = None train_loader = create_loader( train_dataset, diff --git a/src/netspresso_trainer/dataloaders/classification/__init__.py b/src/netspresso_trainer/dataloaders/classification/__init__.py index d7d9a63f8..8198cd832 100644 --- a/src/netspresso_trainer/dataloaders/classification/__init__.py +++ b/src/netspresso_trainer/dataloaders/classification/__init__.py @@ -1,3 +1,3 @@ -from .dataset import ClassficationDataSampler +from .dataset import ClassficationDataSampler, classification_mix_collate_fn from .huggingface import ClassificationHFDataset from .local import ClassificationCustomDataset diff --git a/src/netspresso_trainer/dataloaders/classification/dataset.py b/src/netspresso_trainer/dataloaders/classification/dataset.py index bed9088c5..efb21a16c 100644 --- a/src/netspresso_trainer/dataloaders/classification/dataset.py +++ b/src/netspresso_trainer/dataloaders/classification/dataset.py @@ -3,6 +3,7 @@ from collections import Counter from itertools import chain from pathlib import Path +import random from typing import Dict, List, Optional, Tuple, Union import torch @@ -68,6 +69,24 @@ def is_file_dict(image_dir: Union[Path, str], file_or_dir_to_idx): return True + +def classification_mix_collate_fn(original_batch, mix_transforms): + images = [] + target = [] + for data_sample in original_batch: + images.append(data_sample[0]) + target.append(data_sample[1]) + + images = torch.stack(images, dim=0) + target = torch.tensor(target, dtype=torch.long) + + _mix_transform = random.choice(mix_transforms) + images, target = _mix_transform(images, target) + + outputs = (images, target) + return outputs + + class ClassficationDataSampler(BaseDataSampler): def __init__(self, conf_data, train_valid_split_ratio): super(ClassficationDataSampler, self).__init__(conf_data, train_valid_split_ratio) From f71a46908c41e0672050b1deb50ea9f397258c36 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Fri, 3 Nov 2023 17:37:11 +0900 Subject: [PATCH 043/167] Match encoding --- src/netspresso_trainer/dataloaders/builder.py | 12 +++++++----- .../dataloaders/classification/__init__.py | 2 +- .../dataloaders/classification/dataset.py | 16 ++++++++++++++++ .../pipelines/classification.py | 4 ++++ 4 files changed, 28 insertions(+), 6 deletions(-) diff --git a/src/netspresso_trainer/dataloaders/builder.py b/src/netspresso_trainer/dataloaders/builder.py index e324bc1e7..ea4114a45 100644 --- a/src/netspresso_trainer/dataloaders/builder.py +++ b/src/netspresso_trainer/dataloaders/builder.py @@ -5,7 +5,7 @@ from typing import Dict, List, Optional, Type, Union from .augmentation.registry import TRANSFORM_DICT -from .classification import classification_mix_collate_fn +from .classification import classification_mix_collate_fn, classification_onehot_collate_fn from .detection import detection_collate_fn from .registry import CREATE_TRANSFORM, CUSTOM_DATASET, DATA_SAMPLER, HUGGINGFACE_DATASET from .utils.loader import create_loader @@ -116,9 +116,11 @@ def build_dataloader(conf, task: str, model_name: str, train_dataset, eval_datas transform = TRANSFORM_DICT[name](**mix_kwargs) mix_transforms.append(transform) - collate_fn = partial(classification_mix_collate_fn, mix_transforms=mix_transforms) + train_collate_fn = partial(classification_mix_collate_fn, mix_transforms=mix_transforms) + eval_collate_fn = partial(classification_onehot_collate_fn, num_classes=train_dataset.num_classes) else: - collate_fn = None + train_collate_fn = None + eval_collate_fn = None train_loader = create_loader( train_dataset, @@ -129,7 +131,7 @@ def build_dataloader(conf, task: str, model_name: str, train_dataset, eval_datas is_training=True, num_workers=conf.environment.num_workers if not profile else 1, distributed=conf.distributed, - collate_fn=collate_fn, + collate_fn=train_collate_fn, pin_memory=False, world_size=conf.world_size, rank=conf.rank, @@ -145,7 +147,7 @@ def build_dataloader(conf, task: str, model_name: str, train_dataset, eval_datas is_training=False, num_workers=conf.environment.num_workers if not profile else 1, distributed=conf.distributed, - collate_fn=None, + collate_fn=eval_collate_fn, pin_memory=False, world_size=conf.world_size, rank=conf.rank, diff --git a/src/netspresso_trainer/dataloaders/classification/__init__.py b/src/netspresso_trainer/dataloaders/classification/__init__.py index 8198cd832..8618218e3 100644 --- a/src/netspresso_trainer/dataloaders/classification/__init__.py +++ b/src/netspresso_trainer/dataloaders/classification/__init__.py @@ -1,3 +1,3 @@ -from .dataset import ClassficationDataSampler, classification_mix_collate_fn +from .dataset import ClassficationDataSampler, classification_mix_collate_fn, classification_onehot_collate_fn from .huggingface import ClassificationHFDataset from .local import ClassificationCustomDataset diff --git a/src/netspresso_trainer/dataloaders/classification/dataset.py b/src/netspresso_trainer/dataloaders/classification/dataset.py index efb21a16c..abf1cb40c 100644 --- a/src/netspresso_trainer/dataloaders/classification/dataset.py +++ b/src/netspresso_trainer/dataloaders/classification/dataset.py @@ -8,6 +8,7 @@ import torch from omegaconf import DictConfig +from torch.nn import functional as F from torch.utils.data import random_split from ..base import BaseDataSampler @@ -87,6 +88,21 @@ def classification_mix_collate_fn(original_batch, mix_transforms): return outputs +def classification_onehot_collate_fn(original_batch, num_classes): + images = [] + target = [] + for data_sample in original_batch: + images.append(data_sample[0]) + target.append(data_sample[1]) + + images = torch.stack(images, dim=0) + target = torch.tensor(target, dtype=torch.long) + target = F.one_hot(target, num_classes=num_classes).to(dtype=images.dtype) + + outputs = (images, target) + return outputs + + class ClassficationDataSampler(BaseDataSampler): def __init__(self, conf_data, train_valid_split_ratio): super(ClassficationDataSampler, self).__init__(conf_data, train_valid_split_ratio) diff --git a/src/netspresso_trainer/pipelines/classification.py b/src/netspresso_trainer/pipelines/classification.py index 709da58bd..a557612bb 100644 --- a/src/netspresso_trainer/pipelines/classification.py +++ b/src/netspresso_trainer/pipelines/classification.py @@ -28,6 +28,8 @@ def train_step(self, batch): out = self.model(images) self.loss_factory.calc(out, target, phase='train') + if target.dim() > 1: # Soft label to label number + target = torch.argmax(target, dim=-1) self.metric_factory.calc(out['pred'], target, phase='train') self.loss_factory.backward() @@ -44,6 +46,8 @@ def valid_step(self, batch): out = self.model(images) self.loss_factory.calc(out, target, phase='valid') + if target.dim() > 1: # Soft label to label number + target = torch.argmax(target, dim=-1) self.metric_factory.calc(out['pred'], target, phase='valid') if self.conf.distributed: From 0028ea0e37ba84370c806e7da0363219c2f1a256 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Fri, 3 Nov 2023 17:48:50 +0900 Subject: [PATCH 044/167] Fix PIDNet model dataclass task field --- src/netspresso_trainer/cfg/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/netspresso_trainer/cfg/model.py b/src/netspresso_trainer/cfg/model.py index 4203c47b8..ad4a3aa12 100644 --- a/src/netspresso_trainer/cfg/model.py +++ b/src/netspresso_trainer/cfg/model.py @@ -251,7 +251,7 @@ class ClassificationMobileViTModelConfig(ModelConfig): @dataclass class PIDNetModelConfig(ModelConfig): - task: str = "classification" + task: str = "segmentation" checkpoint: Optional[Union[Path, str]] = "./weights/pidnet/pidnet_s.pth" architecture: ArchitectureConfig = field(default_factory=lambda: PIDNetArchitectureConfig()) losses: List[Dict[str, Any]] = field(default_factory=lambda: [ From 83cd3aaf4d1e40a45aa1d882c5eb6ae3bd3f3833 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Fri, 3 Nov 2023 17:51:22 +0900 Subject: [PATCH 045/167] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 30027b1e6..18206af99 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,7 @@ ## Bug Fixes: -No changes to highlight. +- Fix PIDNet model dataclass task field by `@illian01` in [PR 220](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/220) ## Breaking Changes: From 732ab6a345e0495ef86759184fa1c5bfe24dcb39 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 6 Nov 2023 10:46:34 +0900 Subject: [PATCH 046/167] Add cutmix --- .../dataloaders/augmentation/custom.py | 88 +++++++++++++++++++ .../dataloaders/augmentation/registry.py | 3 +- 2 files changed, 90 insertions(+), 1 deletion(-) diff --git a/src/netspresso_trainer/dataloaders/augmentation/custom.py b/src/netspresso_trainer/dataloaders/augmentation/custom.py index 29234b630..f1f8a92e6 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/custom.py +++ b/src/netspresso_trainer/dataloaders/augmentation/custom.py @@ -1,5 +1,6 @@ import random from collections.abc import Sequence +import math from typing import Dict, Optional import numpy as np @@ -423,6 +424,93 @@ def __repr__(self) -> str: ) +class RandomCutmix: + """ + Given a batch of input images and labels, this class randomly applies the + `CutMix transformation `_ + + Args: + opts (argparse.Namespace): Arguments + num_classes (int): Number of classes in the dataset + """ + + def __init__(self, num_classes, alpha, p=1.0, inplace=False): + if not (num_classes > 0): + raise ValueError("Please provide a valid positive value for the num_classes.") + if not (alpha > 0): + raise ValueError("Alpha param can't be zero.") + if not (0.0 < p <= 1.0): + raise ValueError("CutMix probability should be between 0 and 1, where 1 is inclusive") + + self.num_classes = num_classes + self.alpha = alpha + self.p = p + self.inplace = inplace + + def _apply_cutmix_transform(self, image_tensor, target_tensor): + if image_tensor.ndim != 4: + print(f"Batch ndim should be 4. Got {image_tensor.ndim}") + if target_tensor.ndim != 1: + print(f"Target ndim should be 1. Got {target_tensor.ndim}") + if not image_tensor.is_floating_point(): + print(f"Batch dtype should be a float tensor. Got {image_tensor.dtype}.") + if target_tensor.dtype != torch.int64: + print(f"Target dtype should be torch.int64. Got {target_tensor.dtype}") + + if not self.inplace: + image_tensor = image_tensor.clone() + target_tensor = target_tensor.clone() + + if target_tensor.ndim == 1: + target_tensor = F_torch.one_hot( + target_tensor, num_classes=self.num_classes + ).to(dtype=image_tensor.dtype) + + # It's faster to roll the batch by one instead of shuffling it to create image pairs + batch_rolled = image_tensor.roll(1, 0) + target_rolled = target_tensor.roll(1, 0) + + # Implemented as on cutmix paper, page 12 (with minor corrections on typos). + lambda_param = float( + torch._sample_dirichlet(torch.tensor([self.alpha, self.alpha]))[0] + ) + W, H = F.get_image_size(image_tensor) + + r_x = torch.randint(W, (1,)) + r_y = torch.randint(H, (1,)) + + r = 0.5 * math.sqrt(1.0 - lambda_param) + r_w_half = int(r * W) + r_h_half = int(r * H) + + x1 = int(torch.clamp(r_x - r_w_half, min=0)) + y1 = int(torch.clamp(r_y - r_h_half, min=0)) + x2 = int(torch.clamp(r_x + r_w_half, max=W)) + y2 = int(torch.clamp(r_y + r_h_half, max=H)) + + image_tensor[:, :, y1:y2, x1:x2] = batch_rolled[:, :, y1:y2, x1:x2] + lambda_param = float(1.0 - (x2 - x1) * (y2 - y1) / (W * H)) + + target_rolled.mul_(1.0 - lambda_param) + target_tensor.mul_(lambda_param).add_(target_rolled) + return image_tensor, target_tensor + + def __call__(self, samples, targets) -> Dict: + if torch.rand(1).item() >= self.p: + return samples, targets + + mixup_samples, mixup_targets = self._apply_cutmix_transform( + image_tensor=samples, target_tensor=targets + ) + + return mixup_samples, mixup_targets + + def __repr__(self) -> str: + return "{}(num_classes={}, p={}, alpha={}, inplace={})".format( + self.__class__.__name__, self.num_classes, self.p, self.alpha, self.inplace + ) + + class Normalize: visualize = False diff --git a/src/netspresso_trainer/dataloaders/augmentation/registry.py b/src/netspresso_trainer/dataloaders/augmentation/registry.py index 88176046c..f279315f4 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/registry.py +++ b/src/netspresso_trainer/dataloaders/augmentation/registry.py @@ -1,6 +1,6 @@ from typing import Callable, Dict -from .custom import ColorJitter, Pad, RandomCrop, RandomHorizontalFlip, RandomResizedCrop, RandomVerticalFlip, Resize, RandomMixup +from .custom import ColorJitter, Pad, RandomCrop, RandomHorizontalFlip, RandomResizedCrop, RandomVerticalFlip, Resize, RandomMixup, RandomCutmix TRANSFORM_DICT: Dict[str, Callable] = { 'colorjitter': ColorJitter, 'pad': Pad, @@ -10,4 +10,5 @@ 'randomverticalflip': RandomVerticalFlip, 'resize': Resize, 'mixup': RandomMixup, + 'cutmix': RandomCutmix } From 18402e904589ea40d4921e2419f9241e098a6fed Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 6 Nov 2023 10:47:32 +0900 Subject: [PATCH 047/167] Raise ValueError when dim is not match --- .../dataloaders/augmentation/custom.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/netspresso_trainer/dataloaders/augmentation/custom.py b/src/netspresso_trainer/dataloaders/augmentation/custom.py index f1f8a92e6..a5f541f87 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/custom.py +++ b/src/netspresso_trainer/dataloaders/augmentation/custom.py @@ -376,13 +376,13 @@ def __init__(self, num_classes: int, alpha, p=1.0, inplace=False): def _apply_mixup_transform(self, image_tensor, target_tensor): if image_tensor.ndim != 4: - print(f"Batch ndim should be 4. Got {image_tensor.ndim}") + raise ValueError(f"Batch ndim should be 4. Got {image_tensor.ndim}") if target_tensor.ndim != 1: - print(f"Target ndim should be 1. Got {target_tensor.ndim}") + raise ValueError(f"Target ndim should be 1. Got {target_tensor.ndim}") if not image_tensor.is_floating_point(): - print(f"Batch datatype should be a float tensor. Got {image_tensor.dtype}.") + raise ValueError(f"Batch datatype should be a float tensor. Got {image_tensor.dtype}.") if target_tensor.dtype != torch.int64: - print(f"Target datatype should be torch.int64. Got {target_tensor.dtype}") + raise ValueError(f"Target datatype should be torch.int64. Got {target_tensor.dtype}") if not self.inplace: image_tensor = image_tensor.clone() @@ -449,13 +449,13 @@ def __init__(self, num_classes, alpha, p=1.0, inplace=False): def _apply_cutmix_transform(self, image_tensor, target_tensor): if image_tensor.ndim != 4: - print(f"Batch ndim should be 4. Got {image_tensor.ndim}") + raise ValueError(f"Batch ndim should be 4. Got {image_tensor.ndim}") if target_tensor.ndim != 1: - print(f"Target ndim should be 1. Got {target_tensor.ndim}") + raise ValueError(f"Target ndim should be 1. Got {target_tensor.ndim}") if not image_tensor.is_floating_point(): - print(f"Batch dtype should be a float tensor. Got {image_tensor.dtype}.") + raise ValueError(f"Batch dtype should be a float tensor. Got {image_tensor.dtype}.") if target_tensor.dtype != torch.int64: - print(f"Target dtype should be torch.int64. Got {target_tensor.dtype}") + raise ValueError(f"Target dtype should be torch.int64. Got {target_tensor.dtype}") if not self.inplace: image_tensor = image_tensor.clone() From 94a695119e005dda64cb34ba7f2d8acdb6a69657 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 6 Nov 2023 10:58:23 +0900 Subject: [PATCH 048/167] Change field to mix_transforms --- src/netspresso_trainer/dataloaders/builder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/netspresso_trainer/dataloaders/builder.py b/src/netspresso_trainer/dataloaders/builder.py index ea4114a45..3d8892901 100644 --- a/src/netspresso_trainer/dataloaders/builder.py +++ b/src/netspresso_trainer/dataloaders/builder.py @@ -103,9 +103,9 @@ def build_dataset(conf_data, conf_augmentation, task: str, model_name: str): def build_dataloader(conf, task: str, model_name: str, train_dataset, eval_dataset, profile=False): if task == 'classification': - if hasattr(conf.augmentation, 'mix_transform'): + if hasattr(conf.augmentation, 'mix_transforms'): mix_transforms = [] - for mix_transform_conf in conf.augmentation.mix_transform: + for mix_transform_conf in conf.augmentation.mix_transforms: name = mix_transform_conf.name.lower() mix_kwargs = list(mix_transform_conf.keys()) From 01e42145d4effa7b53f8054c736ff1e752933531 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 6 Nov 2023 11:02:45 +0900 Subject: [PATCH 049/167] Ruff fix --- .../dataloaders/augmentation/custom.py | 8 ++++---- .../dataloaders/augmentation/registry.py | 13 ++++++++++++- src/netspresso_trainer/dataloaders/builder.py | 2 +- .../dataloaders/classification/dataset.py | 2 +- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/netspresso_trainer/dataloaders/augmentation/custom.py b/src/netspresso_trainer/dataloaders/augmentation/custom.py index a5f541f87..3f0447b7f 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/custom.py +++ b/src/netspresso_trainer/dataloaders/augmentation/custom.py @@ -1,14 +1,14 @@ +import math import random from collections.abc import Sequence -import math from typing import Dict, Optional import numpy as np import PIL.Image as Image import torch -from torch.nn import functional as F_torch import torchvision.transforms as T import torchvision.transforms.functional as F +from torch.nn import functional as F_torch from torchvision.transforms.functional import InterpolationMode BBOX_CROP_KEEP_THRESHOLD = 0.2 @@ -368,7 +368,7 @@ def __init__(self, num_classes: int, alpha, p=1.0, inplace=False): raise ValueError("Alpha param can't be zero.") if not (0.0 < p <= 1.0): raise ValueError("MixUp probability should be between 0 and 1, where 1 is inclusive") - + self.num_classes = num_classes self.alpha = alpha self.p = p @@ -441,7 +441,7 @@ def __init__(self, num_classes, alpha, p=1.0, inplace=False): raise ValueError("Alpha param can't be zero.") if not (0.0 < p <= 1.0): raise ValueError("CutMix probability should be between 0 and 1, where 1 is inclusive") - + self.num_classes = num_classes self.alpha = alpha self.p = p diff --git a/src/netspresso_trainer/dataloaders/augmentation/registry.py b/src/netspresso_trainer/dataloaders/augmentation/registry.py index f279315f4..0251b4de8 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/registry.py +++ b/src/netspresso_trainer/dataloaders/augmentation/registry.py @@ -1,6 +1,17 @@ from typing import Callable, Dict -from .custom import ColorJitter, Pad, RandomCrop, RandomHorizontalFlip, RandomResizedCrop, RandomVerticalFlip, Resize, RandomMixup, RandomCutmix +from .custom import ( + ColorJitter, + Pad, + RandomCrop, + RandomCutmix, + RandomHorizontalFlip, + RandomMixup, + RandomResizedCrop, + RandomVerticalFlip, + Resize, +) + TRANSFORM_DICT: Dict[str, Callable] = { 'colorjitter': ColorJitter, 'pad': Pad, diff --git a/src/netspresso_trainer/dataloaders/builder.py b/src/netspresso_trainer/dataloaders/builder.py index 3d8892901..23595e5ce 100644 --- a/src/netspresso_trainer/dataloaders/builder.py +++ b/src/netspresso_trainer/dataloaders/builder.py @@ -1,6 +1,6 @@ -from functools import partial import logging import os +from functools import partial from pathlib import Path from typing import Dict, List, Optional, Type, Union diff --git a/src/netspresso_trainer/dataloaders/classification/dataset.py b/src/netspresso_trainer/dataloaders/classification/dataset.py index abf1cb40c..298aeb0df 100644 --- a/src/netspresso_trainer/dataloaders/classification/dataset.py +++ b/src/netspresso_trainer/dataloaders/classification/dataset.py @@ -1,9 +1,9 @@ import csv import logging +import random from collections import Counter from itertools import chain from pathlib import Path -import random from typing import Dict, List, Optional, Tuple, Union import torch From 8b445393744948fab055f8caed717e3da3edc61e Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 6 Nov 2023 11:03:40 +0900 Subject: [PATCH 050/167] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 30027b1e6..27d18345c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## New Features: - Add a gpu option in `train_with_config` (only single-GPU supported) by `@deepkyu` in [PR 219](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/219) +- Support augmentation for classification task: cutmix, mixup by `@illian01` in [PR 221](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/221) ## Bug Fixes: From e1deac5aeaf6fb0e96297ac8248f3601d220a0cc Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 6 Nov 2023 11:15:28 +0900 Subject: [PATCH 051/167] Add cutmix and mixup reference --- src/netspresso_trainer/dataloaders/augmentation/custom.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/netspresso_trainer/dataloaders/augmentation/custom.py b/src/netspresso_trainer/dataloaders/augmentation/custom.py index 3f0447b7f..00fdc5145 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/custom.py +++ b/src/netspresso_trainer/dataloaders/augmentation/custom.py @@ -353,6 +353,9 @@ def __repr__(self): class RandomMixup: """ + Based on the RandomCutmix implementation of ml_cvnets. + https://github.com/apple/ml-cvnets/blob/77717569ab4a852614dae01f010b32b820cb33bb/data/transforms/image_torch.py + Given a batch of input images and labels, this class randomly applies the `MixUp transformation `_ @@ -426,6 +429,9 @@ def __repr__(self) -> str: class RandomCutmix: """ + Based on the RandomCutmix implementation of ml_cvnets. + https://github.com/apple/ml-cvnets/blob/77717569ab4a852614dae01f010b32b820cb33bb/data/transforms/image_torch.py + Given a batch of input images and labels, this class randomly applies the `CutMix transformation `_ From 0a10a722837e517d8b4ebddab095a42e278f1e54 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 6 Nov 2023 11:16:15 +0900 Subject: [PATCH 052/167] Fix typo --- src/netspresso_trainer/dataloaders/augmentation/custom.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/netspresso_trainer/dataloaders/augmentation/custom.py b/src/netspresso_trainer/dataloaders/augmentation/custom.py index 00fdc5145..8ecf07f6d 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/custom.py +++ b/src/netspresso_trainer/dataloaders/augmentation/custom.py @@ -353,7 +353,7 @@ def __repr__(self): class RandomMixup: """ - Based on the RandomCutmix implementation of ml_cvnets. + Based on the RandomMixup implementation of ml_cvnets. https://github.com/apple/ml-cvnets/blob/77717569ab4a852614dae01f010b32b820cb33bb/data/transforms/image_torch.py Given a batch of input images and labels, this class randomly applies the From a022b5f93ce8a4514cce02b6e529a255214a9a67 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 6 Nov 2023 14:09:24 +0900 Subject: [PATCH 053/167] Add visualize class attribute --- src/netspresso_trainer/dataloaders/augmentation/custom.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/netspresso_trainer/dataloaders/augmentation/custom.py b/src/netspresso_trainer/dataloaders/augmentation/custom.py index 8ecf07f6d..4078569cd 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/custom.py +++ b/src/netspresso_trainer/dataloaders/augmentation/custom.py @@ -363,6 +363,7 @@ class RandomMixup: opts (argparse.Namespace): Arguments num_classes (int): Number of classes in the dataset """ + visualize = False def __init__(self, num_classes: int, alpha, p=1.0, inplace=False): if not (num_classes > 0): @@ -439,6 +440,7 @@ class RandomCutmix: opts (argparse.Namespace): Arguments num_classes (int): Number of classes in the dataset """ + visualize = False def __init__(self, num_classes, alpha, p=1.0, inplace=False): if not (num_classes > 0): From 25b6e103671de9263981d238c70e38d48726ead2 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 6 Nov 2023 14:19:50 +0900 Subject: [PATCH 054/167] Change recipe field to transforms --- config/augmentation/classification.yaml | 2 +- config/augmentation/detection.yaml | 2 +- config/augmentation/segmentation.yaml | 2 +- config/augmentation/template/common.yaml | 2 +- src/netspresso_trainer/dataloaders/augmentation/transforms.py | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/config/augmentation/classification.yaml b/config/augmentation/classification.yaml index 115988d3b..e0bbd0502 100644 --- a/config/augmentation/classification.yaml +++ b/config/augmentation/classification.yaml @@ -1,6 +1,6 @@ augmentation: img_size: &img_size 256 - recipe: + transforms: - name: randomresizedcrop size: *img_size diff --git a/config/augmentation/detection.yaml b/config/augmentation/detection.yaml index 099dd3655..110226185 100644 --- a/config/augmentation/detection.yaml +++ b/config/augmentation/detection.yaml @@ -1,6 +1,6 @@ augmentation: img_size: &img_size 512 - recipe: + transforms: - name: resize size: *img_size diff --git a/config/augmentation/segmentation.yaml b/config/augmentation/segmentation.yaml index 97b8630f7..d878f5af1 100644 --- a/config/augmentation/segmentation.yaml +++ b/config/augmentation/segmentation.yaml @@ -1,6 +1,6 @@ augmentation: img_size: &img_size 512 - recipe: + transforms: - name: randomresizedcrop size: *img_size diff --git a/config/augmentation/template/common.yaml b/config/augmentation/template/common.yaml index af96c0926..899363fe6 100644 --- a/config/augmentation/template/common.yaml +++ b/config/augmentation/template/common.yaml @@ -1,6 +1,6 @@ augmentation: img_size: &img_size ~ - recipe: + transforms: - name: randomresizedcrop size: ~ diff --git a/src/netspresso_trainer/dataloaders/augmentation/transforms.py b/src/netspresso_trainer/dataloaders/augmentation/transforms.py index c96809881..35336ee66 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/transforms.py +++ b/src/netspresso_trainer/dataloaders/augmentation/transforms.py @@ -31,7 +31,7 @@ def generate_edge(label: np.ndarray) -> Image.Image: def transforms_custom_train(conf_augmentation): assert conf_augmentation.img_size > 32 preprocess = [] - for augment in conf_augmentation.recipe: + for augment in conf_augmentation.transforms: name = augment.name.lower() augment_kwargs = list(augment.keys()) augment_kwargs.remove('name') @@ -58,7 +58,7 @@ def transforms_custom_eval(conf_augmentation): def train_transforms_pidnet(conf_augmentation): preprocess = [] - for augment in conf_augmentation.recipe: + for augment in conf_augmentation.transforms: name = augment.name.lower() augment_kwargs = list(augment.keys()) augment_kwargs.remove('name') From becea4b11290f0ae0d6fb079597f18e002da7339 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 6 Nov 2023 14:22:14 +0900 Subject: [PATCH 055/167] Change recipe field to transforms for cfg class --- src/netspresso_trainer/cfg/augmentation.py | 8 ++++---- tools/config_test.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/netspresso_trainer/cfg/augmentation.py b/src/netspresso_trainer/cfg/augmentation.py index fb51d02d6..3b5e098eb 100644 --- a/src/netspresso_trainer/cfg/augmentation.py +++ b/src/netspresso_trainer/cfg/augmentation.py @@ -15,7 +15,7 @@ class Transform: @dataclass class AugmentationConfig: img_size: int = DEFAULT_IMG_SIZE - recipe: List[Transform] = field(default_factory=lambda: [ + transforms: List[Transform] = field(default_factory=lambda: [ Transform() ]) @@ -72,7 +72,7 @@ class Resize(Transform): @dataclass class ClassificationAugmentationConfig(AugmentationConfig): img_size: int = 256 - recipe: List[Transform] = field(default_factory=lambda: [ + transforms: List[Transform] = field(default_factory=lambda: [ RandomResizedCrop(size=256), RandomHorizontalFlip() ]) @@ -81,7 +81,7 @@ class ClassificationAugmentationConfig(AugmentationConfig): @dataclass class SegmentationAugmentationConfig(AugmentationConfig): img_size: int = 512 - recipe: List[Transform] = field(default_factory=lambda: [ + transforms: List[Transform] = field(default_factory=lambda: [ RandomResizedCrop(size=512), RandomHorizontalFlip(), ColorJitter() @@ -91,6 +91,6 @@ class SegmentationAugmentationConfig(AugmentationConfig): @dataclass class DetectionAugmentationConfig(AugmentationConfig): img_size: int = 512 - recipe: List[Transform] = field(default_factory=lambda: [ + transforms: List[Transform] = field(default_factory=lambda: [ Resize(size=512) ]) diff --git a/tools/config_test.py b/tools/config_test.py index 10e345388..81f7bc5d6 100644 --- a/tools/config_test.py +++ b/tools/config_test.py @@ -35,12 +35,12 @@ # OK: update value of subclass in the main dataclass cfg_new: TrainerConfig = deepcopy(cfg) - cfg_new.augmentation.recipe[-1].saturation = 0.0 + cfg_new.augmentation.transforms[-1].saturation = 0.0 # print(OmegaConf.to_yaml(OmegaConf.structured(cfg_new))) # OK: update value from OmegaConf Config config_new: TrainerConfig = deepcopy(config) - cfg_new.augmentation.recipe[-1].hue = 0.5 + cfg_new.augmentation.transforms[-1].hue = 0.5 # print(OmegaConf.to_yaml(config_new)) From 05b5286238c0bfb9be75f2775f83951889ab7ed3 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 6 Nov 2023 14:49:38 +0900 Subject: [PATCH 056/167] Fix default augmentation for classification --- config/augmentation/classification.yaml | 4 ++++ config/augmentation/template/common.yaml | 3 ++- src/netspresso_trainer/cfg/augmentation.py | 18 ++++++++++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/config/augmentation/classification.yaml b/config/augmentation/classification.yaml index e0bbd0502..0648b74b4 100644 --- a/config/augmentation/classification.yaml +++ b/config/augmentation/classification.yaml @@ -8,3 +8,7 @@ augmentation: - name: randomhorizontalflip p: 0.5 + mix_transforms: + - + name: cutmix + alpha: 1.0 diff --git a/config/augmentation/template/common.yaml b/config/augmentation/template/common.yaml index 899363fe6..881cb816d 100644 --- a/config/augmentation/template/common.yaml +++ b/config/augmentation/template/common.yaml @@ -23,4 +23,5 @@ augmentation: size: ~ - name: pad - padding: ~ \ No newline at end of file + padding: ~ + mix_transforms: ~ \ No newline at end of file diff --git a/src/netspresso_trainer/cfg/augmentation.py b/src/netspresso_trainer/cfg/augmentation.py index 3b5e098eb..1a0d5730e 100644 --- a/src/netspresso_trainer/cfg/augmentation.py +++ b/src/netspresso_trainer/cfg/augmentation.py @@ -18,6 +18,7 @@ class AugmentationConfig: transforms: List[Transform] = field(default_factory=lambda: [ Transform() ]) + mix_transforms: Optional[List[Transform]] = None @dataclass @@ -69,6 +70,20 @@ class Resize(Transform): interpolation: Optional[str] = 'bilinear' +@dataclass +class RandomMixup(Transform): + name: str = 'mixup' + alpha: float = 0.2 + p: float = 1.0 + + +@dataclass +class RandomCutmix(Transform): + name: str = 'cutmix' + alpha: float = 1.0 + p: float = 1.0 + + @dataclass class ClassificationAugmentationConfig(AugmentationConfig): img_size: int = 256 @@ -76,6 +91,9 @@ class ClassificationAugmentationConfig(AugmentationConfig): RandomResizedCrop(size=256), RandomHorizontalFlip() ]) + mix_transforms: List[Transform] = field(default_factory=lambda: [ + RandomCutmix(), + ]) @dataclass From 1f28816bfcb1843acaffdbd5dd7ae4f0e023f61c Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 6 Nov 2023 15:32:31 +0900 Subject: [PATCH 057/167] Init postprocessor --- src/netspresso_trainer/postprocessors/__init__.py | 0 src/netspresso_trainer/postprocessors/builder.py | 2 ++ 2 files changed, 2 insertions(+) create mode 100644 src/netspresso_trainer/postprocessors/__init__.py create mode 100644 src/netspresso_trainer/postprocessors/builder.py diff --git a/src/netspresso_trainer/postprocessors/__init__.py b/src/netspresso_trainer/postprocessors/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/netspresso_trainer/postprocessors/builder.py b/src/netspresso_trainer/postprocessors/builder.py new file mode 100644 index 000000000..01583f28c --- /dev/null +++ b/src/netspresso_trainer/postprocessors/builder.py @@ -0,0 +1,2 @@ +def build_postprocessor(task: str, conf_model): + pass \ No newline at end of file From c0c2d124e9d40d4ab18a8832ce80c590512eaaa8 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 6 Nov 2023 16:28:23 +0900 Subject: [PATCH 058/167] Apply postprocessor on classification task --- .../metrics/classification/metric.py | 5 ++--- src/netspresso_trainer/pipelines/base.py | 2 ++ .../pipelines/classification.py | 8 +++++--- .../postprocessors/__init__.py | 1 + .../postprocessors/builder.py | 6 +++++- .../postprocessors/classification.py | 18 ++++++++++++++++++ .../postprocessors/register.py | 7 +++++++ 7 files changed, 40 insertions(+), 7 deletions(-) create mode 100644 src/netspresso_trainer/postprocessors/classification.py create mode 100644 src/netspresso_trainer/postprocessors/register.py diff --git a/src/netspresso_trainer/metrics/classification/metric.py b/src/netspresso_trainer/metrics/classification/metric.py index efa4dc29f..ad18b3709 100644 --- a/src/netspresso_trainer/metrics/classification/metric.py +++ b/src/netspresso_trainer/metrics/classification/metric.py @@ -8,11 +8,10 @@ @torch.no_grad() -def accuracy_topk(output, target): +def accuracy_topk(pred, target): """Computes the accuracy over the k top predictions for the specified values of k""" - maxk = min(TOPK_MAX, output.size()[1]) batch_size = target.size(0) - _, pred = output.topk(maxk, 1, True, True) + maxk = pred.size(-1) pred = pred.t() correct = pred.eq(target.reshape(1, -1).expand_as(pred)) return lambda topk: correct[:min(topk, maxk)].reshape(-1).float().sum(0) * 100. / batch_size diff --git a/src/netspresso_trainer/pipelines/base.py b/src/netspresso_trainer/pipelines/base.py index f43fb2dd1..3982f9979 100644 --- a/src/netspresso_trainer/pipelines/base.py +++ b/src/netspresso_trainer/pipelines/base.py @@ -21,6 +21,7 @@ from ..utils.onnx import save_onnx from ..utils.record import Timer, TrainingSummary from ..utils.stats import get_params_and_macs +from ..postprocessors import build_postprocessor logger = logging.getLogger("netspresso_trainer") @@ -87,6 +88,7 @@ def set_train(self): self.scheduler, _ = build_scheduler(self.optimizer, self.conf.training) self.loss_factory = build_losses(self.conf.model, ignore_index=self.ignore_index) self.metric_factory = build_metrics(self.task, self.conf.model, ignore_index=self.ignore_index, num_classes=self.num_classes) + self.postprocessor = build_postprocessor(self.task, self.conf.model) resume_optimizer_checkpoint = self.conf.model.resume_optimizer_checkpoint if resume_optimizer_checkpoint is not None: resume_optimizer_checkpoint = Path(resume_optimizer_checkpoint) diff --git a/src/netspresso_trainer/pipelines/classification.py b/src/netspresso_trainer/pipelines/classification.py index 709da58bd..dff9dc9f1 100644 --- a/src/netspresso_trainer/pipelines/classification.py +++ b/src/netspresso_trainer/pipelines/classification.py @@ -28,7 +28,8 @@ def train_step(self, batch): out = self.model(images) self.loss_factory.calc(out, target, phase='train') - self.metric_factory.calc(out['pred'], target, phase='train') + pred = self.postprocessor(out) + self.metric_factory.calc(pred, target, phase='train') self.loss_factory.backward() self.optimizer.step() @@ -44,7 +45,8 @@ def valid_step(self, batch): out = self.model(images) self.loss_factory.calc(out, target, phase='valid') - self.metric_factory.calc(out['pred'], target, phase='valid') + pred = self.postprocessor(out) + self.metric_factory.calc(pred, target, phase='valid') if self.conf.distributed: torch.distributed.barrier() @@ -55,7 +57,7 @@ def test_step(self, batch): images = images.to(self.devices) out = self.model(images.unsqueeze(0)) - _, pred = out['pred'].topk(1, 1, True, True) + pred = self.postprocessor(out) if self.conf.distributed: torch.distributed.barrier() diff --git a/src/netspresso_trainer/postprocessors/__init__.py b/src/netspresso_trainer/postprocessors/__init__.py index e69de29bb..d5ad68226 100644 --- a/src/netspresso_trainer/postprocessors/__init__.py +++ b/src/netspresso_trainer/postprocessors/__init__.py @@ -0,0 +1 @@ +from .builder import build_postprocessor \ No newline at end of file diff --git a/src/netspresso_trainer/postprocessors/builder.py b/src/netspresso_trainer/postprocessors/builder.py index 01583f28c..8fa3749bb 100644 --- a/src/netspresso_trainer/postprocessors/builder.py +++ b/src/netspresso_trainer/postprocessors/builder.py @@ -1,2 +1,6 @@ +from .register import POSTPROCESSOR_DICT + + def build_postprocessor(task: str, conf_model): - pass \ No newline at end of file + head_name = conf_model.architecture.head.name + return POSTPROCESSOR_DICT[head_name]() \ No newline at end of file diff --git a/src/netspresso_trainer/postprocessors/classification.py b/src/netspresso_trainer/postprocessors/classification.py new file mode 100644 index 000000000..f3b753790 --- /dev/null +++ b/src/netspresso_trainer/postprocessors/classification.py @@ -0,0 +1,18 @@ +from typing import Optional + +from ..models.utils import ModelOutput + +TOPK_MAX = 20 + + +class TopK(): + def __init__(self): + pass + + def __call__(self, outputs: ModelOutput, k: Optional[int]=None): + pred = outputs['pred'] + maxk = min(TOPK_MAX, pred.size()[1]) + if k: + k = min(k, maxk) + _, pred = pred.topk(maxk, 1, True, True) + return pred diff --git a/src/netspresso_trainer/postprocessors/register.py b/src/netspresso_trainer/postprocessors/register.py new file mode 100644 index 000000000..6a39b1f69 --- /dev/null +++ b/src/netspresso_trainer/postprocessors/register.py @@ -0,0 +1,7 @@ +from typing import Dict, Type + +from .classification import TopK + +POSTPROCESSOR_DICT = { + 'fc': TopK, +} \ No newline at end of file From 89ee672350f127937b13bc5e74ec8927805e5c31 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 6 Nov 2023 16:28:42 +0900 Subject: [PATCH 059/167] Remove unused --- .../metrics/classification/metric.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/netspresso_trainer/metrics/classification/metric.py b/src/netspresso_trainer/metrics/classification/metric.py index ad18b3709..a41263135 100644 --- a/src/netspresso_trainer/metrics/classification/metric.py +++ b/src/netspresso_trainer/metrics/classification/metric.py @@ -24,17 +24,6 @@ class ClassificationMetric(BaseMetric): def __init__(self, **kwargs): super().__init__() - @torch.no_grad() - @staticmethod - def accuracy_topk(output, target): - """Computes the accuracy over the k top predictions for the specified values of k""" - maxk = min(TOPK_MAX, output.size()[1]) - batch_size = target.size(0) - _, pred = output.topk(maxk, 1, True, True) - pred = pred.t() - correct = pred.eq(target.reshape(1, -1).expand_as(pred)) - return lambda topk: correct[:min(topk, maxk)].reshape(-1).float().sum(0) * 100. / batch_size - def calibrate(self, pred, target, **kwargs): result_dict = {k: 0. for k in self.metric_names} topk_callable = accuracy_topk(pred, target) From 145e7e5c74bc53de6d8af7ae7622a20ef8433497 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 6 Nov 2023 17:16:03 +0900 Subject: [PATCH 060/167] Apply postprocessor on segmentation task --- .../metrics/segmentation/metric.py | 3 +-- src/netspresso_trainer/pipelines/segmentation.py | 10 ++++++---- src/netspresso_trainer/postprocessors/register.py | 2 ++ .../postprocessors/segmentation.py | 15 +++++++++++++++ 4 files changed, 24 insertions(+), 6 deletions(-) create mode 100644 src/netspresso_trainer/postprocessors/segmentation.py diff --git a/src/netspresso_trainer/metrics/segmentation/metric.py b/src/netspresso_trainer/metrics/segmentation/metric.py index fea6398ed..65ca19a88 100644 --- a/src/netspresso_trainer/metrics/segmentation/metric.py +++ b/src/netspresso_trainer/metrics/segmentation/metric.py @@ -47,8 +47,7 @@ def calibrate(self, pred, target, **kwargs): result_dict = {k: AverageMeter(k) for k in self.metric_names} B = pred.size(0) - output_seg = torch.max(pred, dim=1)[1] # argmax - metrics = self.intersection_and_union_gpu(output_seg, target) + metrics = self.intersection_and_union_gpu(pred, target) result_dict['iou'].update(sum(metrics['intersection']) / (sum(metrics['union']) + 1e-10), n=B) result_dict['pixel_acc'].update(sum(metrics['intersection']) / (sum(metrics['target']) + 1e-10), n=B) diff --git a/src/netspresso_trainer/pipelines/segmentation.py b/src/netspresso_trainer/pipelines/segmentation.py index 1da5af678..782d4952b 100644 --- a/src/netspresso_trainer/pipelines/segmentation.py +++ b/src/netspresso_trainer/pipelines/segmentation.py @@ -41,7 +41,8 @@ def train_step(self, batch): self.optimizer.step() out = {k: v.detach() for k, v in out.items()} - self.metric_factory.calc(out['pred'], target, phase='train') + pred = self.postprocessor(out) + self.metric_factory.calc(pred, target, phase='train') if self.conf.distributed: torch.distributed.barrier() @@ -62,7 +63,8 @@ def valid_step(self, batch): else: self.loss_factory.calc(out, target, phase='valid') - self.metric_factory.calc(out['pred'], target, phase='valid') + pred = self.postprocessor(out) + self.metric_factory.calc(pred, target, phase='valid') if self.conf.distributed: torch.distributed.barrier() @@ -87,9 +89,9 @@ def test_step(self, batch): out = self.model(images.unsqueeze(0)) - output_seg = torch.max(out['pred'], dim=1)[1] # argmax + pred = self.postprocessor(out) - return output_seg + return pred def get_metric_with_all_outputs(self, outputs, phase: Literal['train', 'valid']): pass diff --git a/src/netspresso_trainer/postprocessors/register.py b/src/netspresso_trainer/postprocessors/register.py index 6a39b1f69..0c8b7297b 100644 --- a/src/netspresso_trainer/postprocessors/register.py +++ b/src/netspresso_trainer/postprocessors/register.py @@ -1,7 +1,9 @@ from typing import Dict, Type from .classification import TopK +from .segmentation import SegmentationArgMax POSTPROCESSOR_DICT = { 'fc': TopK, + 'all_mlp_decoder': SegmentationArgMax } \ No newline at end of file diff --git a/src/netspresso_trainer/postprocessors/segmentation.py b/src/netspresso_trainer/postprocessors/segmentation.py new file mode 100644 index 000000000..e1b2420bc --- /dev/null +++ b/src/netspresso_trainer/postprocessors/segmentation.py @@ -0,0 +1,15 @@ +from typing import Any, Optional + +import torch + +from ..models.utils import ModelOutput + + +class SegmentationArgMax: + def __init__(self): + pass + + def __call__(self, outputs: ModelOutput): + pred = outputs['pred'] + pred = torch.max(pred, dim=1)[1] # argmax + return pred \ No newline at end of file From bb44c71f5b98a83ee04098cc4a8a9bbb828d7516 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 6 Nov 2023 17:32:43 +0900 Subject: [PATCH 061/167] Rename postprocessor modules --- src/netspresso_trainer/postprocessors/classification.py | 2 +- src/netspresso_trainer/postprocessors/register.py | 8 ++++---- src/netspresso_trainer/postprocessors/segmentation.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/netspresso_trainer/postprocessors/classification.py b/src/netspresso_trainer/postprocessors/classification.py index f3b753790..cfc2d0a66 100644 --- a/src/netspresso_trainer/postprocessors/classification.py +++ b/src/netspresso_trainer/postprocessors/classification.py @@ -5,7 +5,7 @@ TOPK_MAX = 20 -class TopK(): +class ClassificationPostprocessor(): def __init__(self): pass diff --git a/src/netspresso_trainer/postprocessors/register.py b/src/netspresso_trainer/postprocessors/register.py index 0c8b7297b..306f2ead4 100644 --- a/src/netspresso_trainer/postprocessors/register.py +++ b/src/netspresso_trainer/postprocessors/register.py @@ -1,9 +1,9 @@ from typing import Dict, Type -from .classification import TopK -from .segmentation import SegmentationArgMax +from .classification import ClassificationPostprocessor +from .segmentation import SegmentationPostprocessor POSTPROCESSOR_DICT = { - 'fc': TopK, - 'all_mlp_decoder': SegmentationArgMax + 'fc': ClassificationPostprocessor, + 'all_mlp_decoder': SegmentationPostprocessor } \ No newline at end of file diff --git a/src/netspresso_trainer/postprocessors/segmentation.py b/src/netspresso_trainer/postprocessors/segmentation.py index e1b2420bc..c9e6e1ae8 100644 --- a/src/netspresso_trainer/postprocessors/segmentation.py +++ b/src/netspresso_trainer/postprocessors/segmentation.py @@ -5,7 +5,7 @@ from ..models.utils import ModelOutput -class SegmentationArgMax: +class SegmentationPostprocessor: def __init__(self): pass From 55bc2056b896f9abfc727b56938b4734d22f48e9 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 6 Nov 2023 17:47:25 +0900 Subject: [PATCH 062/167] Fix classification postprocessing error --- src/netspresso_trainer/pipelines/classification.py | 2 +- src/netspresso_trainer/postprocessors/classification.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/netspresso_trainer/pipelines/classification.py b/src/netspresso_trainer/pipelines/classification.py index dff9dc9f1..4646f6010 100644 --- a/src/netspresso_trainer/pipelines/classification.py +++ b/src/netspresso_trainer/pipelines/classification.py @@ -57,7 +57,7 @@ def test_step(self, batch): images = images.to(self.devices) out = self.model(images.unsqueeze(0)) - pred = self.postprocessor(out) + pred = self.postprocessor(out, k=1) if self.conf.distributed: torch.distributed.barrier() diff --git a/src/netspresso_trainer/postprocessors/classification.py b/src/netspresso_trainer/postprocessors/classification.py index cfc2d0a66..4929fe37d 100644 --- a/src/netspresso_trainer/postprocessors/classification.py +++ b/src/netspresso_trainer/postprocessors/classification.py @@ -13,6 +13,6 @@ def __call__(self, outputs: ModelOutput, k: Optional[int]=None): pred = outputs['pred'] maxk = min(TOPK_MAX, pred.size()[1]) if k: - k = min(k, maxk) + maxk = min(k, maxk) _, pred = pred.topk(maxk, 1, True, True) return pred From 9c1805b51e9612417591dce736235c3ddbb224ea Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 6 Nov 2023 18:06:41 +0900 Subject: [PATCH 063/167] Wrap yolox output with ModelOutput --- src/netspresso_trainer/losses/detection/yolox.py | 1 + .../models/heads/detection/experimental/yolo_head.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/netspresso_trainer/losses/detection/yolox.py b/src/netspresso_trainer/losses/detection/yolox.py index 7fcd8d534..76263d6f6 100644 --- a/src/netspresso_trainer/losses/detection/yolox.py +++ b/src/netspresso_trainer/losses/detection/yolox.py @@ -50,6 +50,7 @@ def __init__(self, **kwargs) -> None: def forward(self, out: List, target: Dict) -> torch.Tensor: + out = out['pred'] x_shifts = [] y_shifts = [] expanded_strides = [] diff --git a/src/netspresso_trainer/models/heads/detection/experimental/yolo_head.py b/src/netspresso_trainer/models/heads/detection/experimental/yolo_head.py index 9376a2d5a..10f1346ea 100644 --- a/src/netspresso_trainer/models/heads/detection/experimental/yolo_head.py +++ b/src/netspresso_trainer/models/heads/detection/experimental/yolo_head.py @@ -6,6 +6,7 @@ import torch.nn as nn from ....op.custom import ConvLayer +from ....utils import ModelOutput from .fpn import PAFPN @@ -134,7 +135,7 @@ def forward(self, xin): outputs.append(output) - return outputs + return ModelOutput(pred=outputs) def yolo_head(num_classes, intermediate_features_dim, **kwargs): From ffa1c3a22fa046424e88382a3bae1ba2668ccba3 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 6 Nov 2023 18:07:08 +0900 Subject: [PATCH 064/167] Move yolox postprocessor --- src/netspresso_trainer/pipelines/detection.py | 80 +---------------- .../postprocessors/detection.py | 85 +++++++++++++++++++ .../postprocessors/register.py | 4 +- 3 files changed, 92 insertions(+), 77 deletions(-) create mode 100644 src/netspresso_trainer/postprocessors/detection.py diff --git a/src/netspresso_trainer/pipelines/detection.py b/src/netspresso_trainer/pipelines/detection.py index 5cdef182c..0fe4a6fd9 100644 --- a/src/netspresso_trainer/pipelines/detection.py +++ b/src/netspresso_trainer/pipelines/detection.py @@ -224,9 +224,8 @@ def train_step(self, batch): self.loss_factory.backward() self.optimizer.step() - # TODO: This step will be moved to postprocessor module - pred = self.decode_outputs(out, dtype=out[0].type(), stage_strides=[images.shape[-1] // o.shape[-1] for o in out]) - pred = self.postprocess(pred, self.num_classes) + pred = self.postprocessor(out, dtype=out['pred'][0].type(), stage_strides=[images.shape[-1] // o.shape[-1] for o in out['pred']], + num_classes=self.num_classes) if self.conf.distributed: torch.distributed.barrier() @@ -257,9 +256,8 @@ def valid_step(self, batch): out = self.model(images) self.loss_factory.calc(out, targets, phase='valid') - # TODO: This step will be moved to postprocessor module - pred = self.decode_outputs(out, dtype=out[0].type(), stage_strides=[images.shape[-1] // o.shape[-1] for o in out]) - pred = self.postprocess(pred, self.num_classes) + pred = self.postprocessor(out, dtype=out['pred'][0].type(), stage_strides=[images.shape[-1] // o.shape[-1] for o in out['pred']], + num_classes=self.num_classes) if self.conf.distributed: torch.distributed.barrier() @@ -309,73 +307,3 @@ def get_metric_with_all_outputs(self, outputs, phase: Literal['train', 'valid']) pred_on_image['post_labels'] = class_idx pred.append(pred_on_image) self.metric_factory.calc(pred, target=targets, phase=phase) - - # TODO: Temporary defined in pipeline, it will be moved to postprocessor module. - def decode_outputs(self, outputs, dtype, stage_strides): - hw = [x.shape[-2:] for x in outputs] - # [batch, n_anchors_all, num_classes + 5] - outputs = torch.cat([x.flatten(start_dim=2) for x in outputs], dim=2).permute(0, 2, 1) - outputs[..., 4:] = outputs[..., 4:].sigmoid() - - grids = [] - strides = [] - for (hsize, wsize), stride in zip(hw, stage_strides): - yv, xv = torch.meshgrid(torch.arange(hsize), torch.arange(wsize), indexing='ij') - grid = torch.stack((xv, yv), 2).view(1, -1, 2) - grids.append(grid) - shape = grid.shape[:2] - strides.append(torch.full((*shape, 1), stride)) - - grids = torch.cat(grids, dim=1).type(dtype) - strides = torch.cat(strides, dim=1).type(dtype) - - outputs = torch.cat([ - (outputs[..., 0:2] + grids) * strides, - torch.exp(outputs[..., 2:4]) * strides, - outputs[..., 4:] - ], dim=-1) - return outputs - - # TODO: Temporary defined in pipeline, it will be moved to postprocessor module. - def postprocess(self, prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agnostic=False): - box_corner = prediction.new(prediction.shape) - box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2 - box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2 - box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2 - box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2 - prediction[:, :, :4] = box_corner[:, :, :4] - - output = [torch.zeros(0, 7).to(prediction.device) for i in range(len(prediction))] - for i, image_pred in enumerate(prediction): - - # If none are remaining => process next image - if not image_pred.size(0): - continue - # Get score and class with highest confidence - class_conf, class_pred = torch.max(image_pred[:, 5: 5 + num_classes], 1, keepdim=True) - - conf_mask = (image_pred[:, 4] * class_conf.squeeze() >= conf_thre).squeeze() - # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred) - detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1) - detections = detections[conf_mask] - if not detections.size(0): - continue - - if class_agnostic: - nms_out_index = torchvision.ops.nms( - detections[:, :4], - detections[:, 4] * detections[:, 5], - nms_thre, - ) - else: - nms_out_index = torchvision.ops.batched_nms( - detections[:, :4], - detections[:, 4] * detections[:, 5], - detections[:, 6], - nms_thre, - ) - - detections = detections[nms_out_index] - output[i] = torch.cat((output[i], detections)) - - return output diff --git a/src/netspresso_trainer/postprocessors/detection.py b/src/netspresso_trainer/postprocessors/detection.py new file mode 100644 index 000000000..f7fed2fdb --- /dev/null +++ b/src/netspresso_trainer/postprocessors/detection.py @@ -0,0 +1,85 @@ +import torch +import torchvision + +from ..models.utils import ModelOutput + + +class DetectionPostprocessor: + def __init__(self): + pass + + def __call__(self, outputs: ModelOutput, dtype, stage_strides, num_classes, + conf_thresh=0.7, nms_thre=0.45, class_agnostic=False): + pred = outputs['pred'] + + pred = self.decode_outputs(pred, dtype=dtype, stage_strides=stage_strides) + pred = self.postprocess(pred, num_classes=num_classes, conf_thre=conf_thresh, nms_thre=nms_thre, class_agnostic=class_agnostic) + return pred + + def decode_outputs(self, outputs, dtype, stage_strides): + hw = [x.shape[-2:] for x in outputs] + # [batch, n_anchors_all, num_classes + 5] + outputs = torch.cat([x.flatten(start_dim=2) for x in outputs], dim=2).permute(0, 2, 1) + outputs[..., 4:] = outputs[..., 4:].sigmoid() + + grids = [] + strides = [] + for (hsize, wsize), stride in zip(hw, stage_strides): + yv, xv = torch.meshgrid(torch.arange(hsize), torch.arange(wsize), indexing='ij') + grid = torch.stack((xv, yv), 2).view(1, -1, 2) + grids.append(grid) + shape = grid.shape[:2] + strides.append(torch.full((*shape, 1), stride)) + + grids = torch.cat(grids, dim=1).type(dtype) + strides = torch.cat(strides, dim=1).type(dtype) + + outputs = torch.cat([ + (outputs[..., 0:2] + grids) * strides, + torch.exp(outputs[..., 2:4]) * strides, + outputs[..., 4:] + ], dim=-1) + return outputs + + def postprocess(self, prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agnostic=False): + box_corner = prediction.new(prediction.shape) + box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2 + box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2 + box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2 + box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2 + prediction[:, :, :4] = box_corner[:, :, :4] + + output = [torch.zeros(0, 7).to(prediction.device) for i in range(len(prediction))] + for i, image_pred in enumerate(prediction): + + # If none are remaining => process next image + if not image_pred.size(0): + continue + # Get score and class with highest confidence + class_conf, class_pred = torch.max(image_pred[:, 5: 5 + num_classes], 1, keepdim=True) + + conf_mask = (image_pred[:, 4] * class_conf.squeeze() >= conf_thre).squeeze() + # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred) + detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1) + detections = detections[conf_mask] + if not detections.size(0): + continue + + if class_agnostic: + nms_out_index = torchvision.ops.nms( + detections[:, :4], + detections[:, 4] * detections[:, 5], + nms_thre, + ) + else: + nms_out_index = torchvision.ops.batched_nms( + detections[:, :4], + detections[:, 4] * detections[:, 5], + detections[:, 6], + nms_thre, + ) + + detections = detections[nms_out_index] + output[i] = torch.cat((output[i], detections)) + + return output diff --git a/src/netspresso_trainer/postprocessors/register.py b/src/netspresso_trainer/postprocessors/register.py index 306f2ead4..cb7291fea 100644 --- a/src/netspresso_trainer/postprocessors/register.py +++ b/src/netspresso_trainer/postprocessors/register.py @@ -2,8 +2,10 @@ from .classification import ClassificationPostprocessor from .segmentation import SegmentationPostprocessor +from .detection import DetectionPostprocessor POSTPROCESSOR_DICT = { 'fc': ClassificationPostprocessor, - 'all_mlp_decoder': SegmentationPostprocessor + 'all_mlp_decoder': SegmentationPostprocessor, + 'yolo_head': DetectionPostprocessor, } \ No newline at end of file From 5d6124383c7143a177c553ce7efec8404c5c53f2 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 6 Nov 2023 18:16:07 +0900 Subject: [PATCH 065/167] Refactoring input param --- src/netspresso_trainer/pipelines/detection.py | 10 +++------- src/netspresso_trainer/postprocessors/detection.py | 5 +++-- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/netspresso_trainer/pipelines/detection.py b/src/netspresso_trainer/pipelines/detection.py index 0fe4a6fd9..52f90e2d1 100644 --- a/src/netspresso_trainer/pipelines/detection.py +++ b/src/netspresso_trainer/pipelines/detection.py @@ -224,8 +224,7 @@ def train_step(self, batch): self.loss_factory.backward() self.optimizer.step() - pred = self.postprocessor(out, dtype=out['pred'][0].type(), stage_strides=[images.shape[-1] // o.shape[-1] for o in out['pred']], - num_classes=self.num_classes) + pred = self.postprocessor(out, original_shape=images[0].shape, num_classes=self.num_classes) if self.conf.distributed: torch.distributed.barrier() @@ -256,8 +255,7 @@ def valid_step(self, batch): out = self.model(images) self.loss_factory.calc(out, targets, phase='valid') - pred = self.postprocessor(out, dtype=out['pred'][0].type(), stage_strides=[images.shape[-1] // o.shape[-1] for o in out['pred']], - num_classes=self.num_classes) + pred = self.postprocessor(out, original_shape=images[0].shape, num_classes=self.num_classes) if self.conf.distributed: torch.distributed.barrier() @@ -280,9 +278,7 @@ def test_step(self, batch): out = self.model(images.unsqueeze(0)) - # TODO: This step will be moved to postprocessor module - pred = self.decode_outputs(out, dtype=out[0].type(), stage_strides=[images.shape[-1] // o.shape[-1] for o in out]) - pred = self.postprocess(pred, self.num_classes) + pred = self.postprocessor(out, original_shape=images[0].shape, num_classes=self.num_classes) results = [(p[:, :4].detach().cpu().numpy(), p[:, 6].to(torch.int).detach().cpu().numpy()) if p is not None else (np.array([[]]), np.array([])) diff --git a/src/netspresso_trainer/postprocessors/detection.py b/src/netspresso_trainer/postprocessors/detection.py index f7fed2fdb..97a1c3b7a 100644 --- a/src/netspresso_trainer/postprocessors/detection.py +++ b/src/netspresso_trainer/postprocessors/detection.py @@ -8,9 +8,10 @@ class DetectionPostprocessor: def __init__(self): pass - def __call__(self, outputs: ModelOutput, dtype, stage_strides, num_classes, - conf_thresh=0.7, nms_thre=0.45, class_agnostic=False): + def __call__(self, outputs: ModelOutput, original_shape, num_classes, conf_thresh=0.7, nms_thre=0.45, class_agnostic=False): pred = outputs['pred'] + dtype = pred[0].type() + stage_strides= [original_shape[-1] // o.shape[-1] for o in pred] pred = self.decode_outputs(pred, dtype=dtype, stage_strides=stage_strides) pred = self.postprocess(pred, num_classes=num_classes, conf_thre=conf_thresh, nms_thre=nms_thre, class_agnostic=class_agnostic) From 230f514e2eaaabff0537d797aae9aa8375c427fd Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Tue, 7 Nov 2023 11:22:43 +0900 Subject: [PATCH 066/167] [hotfix] Fix classification mix_transforms bug --- src/netspresso_trainer/dataloaders/builder.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/netspresso_trainer/dataloaders/builder.py b/src/netspresso_trainer/dataloaders/builder.py index 23595e5ce..3910ec450 100644 --- a/src/netspresso_trainer/dataloaders/builder.py +++ b/src/netspresso_trainer/dataloaders/builder.py @@ -103,7 +103,8 @@ def build_dataset(conf_data, conf_augmentation, task: str, model_name: str): def build_dataloader(conf, task: str, model_name: str, train_dataset, eval_dataset, profile=False): if task == 'classification': - if hasattr(conf.augmentation, 'mix_transforms'): + conf_mix_transform = getattr(conf.augmentation, 'mix_transforms', None) + if conf_mix_transform: mix_transforms = [] for mix_transform_conf in conf.augmentation.mix_transforms: name = mix_transform_conf.name.lower() From b207dbdfb64edb73c596cf55f7ca48c14557e52d Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Tue, 7 Nov 2023 13:15:44 +0900 Subject: [PATCH 067/167] Rename YOLOXPostprocessor --- src/netspresso_trainer/postprocessors/detection.py | 2 +- src/netspresso_trainer/postprocessors/register.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/netspresso_trainer/postprocessors/detection.py b/src/netspresso_trainer/postprocessors/detection.py index 97a1c3b7a..f67ad6f97 100644 --- a/src/netspresso_trainer/postprocessors/detection.py +++ b/src/netspresso_trainer/postprocessors/detection.py @@ -4,7 +4,7 @@ from ..models.utils import ModelOutput -class DetectionPostprocessor: +class YOLOXPostprocessor: def __init__(self): pass diff --git a/src/netspresso_trainer/postprocessors/register.py b/src/netspresso_trainer/postprocessors/register.py index cb7291fea..990f2d83b 100644 --- a/src/netspresso_trainer/postprocessors/register.py +++ b/src/netspresso_trainer/postprocessors/register.py @@ -2,10 +2,10 @@ from .classification import ClassificationPostprocessor from .segmentation import SegmentationPostprocessor -from .detection import DetectionPostprocessor +from .detection import YOLOXPostprocessor POSTPROCESSOR_DICT = { 'fc': ClassificationPostprocessor, 'all_mlp_decoder': SegmentationPostprocessor, - 'yolo_head': DetectionPostprocessor, + 'yolo_head': YOLOXPostprocessor, } \ No newline at end of file From 2f22333dc338e7d76afd40c3d1a0e22b0b178b21 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Tue, 7 Nov 2023 13:27:48 +0900 Subject: [PATCH 068/167] Temporary return None for 2-stage detector --- src/netspresso_trainer/postprocessors/builder.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/netspresso_trainer/postprocessors/builder.py b/src/netspresso_trainer/postprocessors/builder.py index 8fa3749bb..89b818946 100644 --- a/src/netspresso_trainer/postprocessors/builder.py +++ b/src/netspresso_trainer/postprocessors/builder.py @@ -3,4 +3,6 @@ def build_postprocessor(task: str, conf_model): head_name = conf_model.architecture.head.name + if head_name not in POSTPROCESSOR_DICT: + return None return POSTPROCESSOR_DICT[head_name]() \ No newline at end of file From 9c023db5e95f8547ef97625621228546533d2d59 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Tue, 7 Nov 2023 15:55:45 +0900 Subject: [PATCH 069/167] Handle exploding value of yolox postprocessor --- src/netspresso_trainer/postprocessors/detection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/netspresso_trainer/postprocessors/detection.py b/src/netspresso_trainer/postprocessors/detection.py index f67ad6f97..683b24843 100644 --- a/src/netspresso_trainer/postprocessors/detection.py +++ b/src/netspresso_trainer/postprocessors/detection.py @@ -37,7 +37,7 @@ def decode_outputs(self, outputs, dtype, stage_strides): outputs = torch.cat([ (outputs[..., 0:2] + grids) * strides, - torch.exp(outputs[..., 2:4]) * strides, + torch.clamp(torch.exp(outputs[..., 2:4]) * strides, min=torch.iinfo(torch.int32).min, max=torch.iinfo(torch.int32).max), outputs[..., 4:] ], dim=-1) return outputs From 605e119cdd6bd2332233ba4c1d65241473ba7d5f Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Tue, 7 Nov 2023 17:35:21 +0900 Subject: [PATCH 070/167] Ruff fix --- src/netspresso_trainer/pipelines/base.py | 2 +- src/netspresso_trainer/postprocessors/__init__.py | 2 +- src/netspresso_trainer/postprocessors/builder.py | 2 +- src/netspresso_trainer/postprocessors/detection.py | 2 +- src/netspresso_trainer/postprocessors/register.py | 4 ++-- src/netspresso_trainer/postprocessors/segmentation.py | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/netspresso_trainer/pipelines/base.py b/src/netspresso_trainer/pipelines/base.py index 3982f9979..58dd7aa6c 100644 --- a/src/netspresso_trainer/pipelines/base.py +++ b/src/netspresso_trainer/pipelines/base.py @@ -15,13 +15,13 @@ from ..losses import build_losses from ..metrics import build_metrics from ..optimizers import build_optimizer +from ..postprocessors import build_postprocessor from ..schedulers import build_scheduler from ..utils.fx import save_graphmodule from ..utils.logger import yaml_for_logging from ..utils.onnx import save_onnx from ..utils.record import Timer, TrainingSummary from ..utils.stats import get_params_and_macs -from ..postprocessors import build_postprocessor logger = logging.getLogger("netspresso_trainer") diff --git a/src/netspresso_trainer/postprocessors/__init__.py b/src/netspresso_trainer/postprocessors/__init__.py index d5ad68226..5fddd44e1 100644 --- a/src/netspresso_trainer/postprocessors/__init__.py +++ b/src/netspresso_trainer/postprocessors/__init__.py @@ -1 +1 @@ -from .builder import build_postprocessor \ No newline at end of file +from .builder import build_postprocessor diff --git a/src/netspresso_trainer/postprocessors/builder.py b/src/netspresso_trainer/postprocessors/builder.py index 89b818946..a60a8652e 100644 --- a/src/netspresso_trainer/postprocessors/builder.py +++ b/src/netspresso_trainer/postprocessors/builder.py @@ -5,4 +5,4 @@ def build_postprocessor(task: str, conf_model): head_name = conf_model.architecture.head.name if head_name not in POSTPROCESSOR_DICT: return None - return POSTPROCESSOR_DICT[head_name]() \ No newline at end of file + return POSTPROCESSOR_DICT[head_name]() diff --git a/src/netspresso_trainer/postprocessors/detection.py b/src/netspresso_trainer/postprocessors/detection.py index 683b24843..772f81066 100644 --- a/src/netspresso_trainer/postprocessors/detection.py +++ b/src/netspresso_trainer/postprocessors/detection.py @@ -12,7 +12,7 @@ def __call__(self, outputs: ModelOutput, original_shape, num_classes, conf_thres pred = outputs['pred'] dtype = pred[0].type() stage_strides= [original_shape[-1] // o.shape[-1] for o in pred] - + pred = self.decode_outputs(pred, dtype=dtype, stage_strides=stage_strides) pred = self.postprocess(pred, num_classes=num_classes, conf_thre=conf_thresh, nms_thre=nms_thre, class_agnostic=class_agnostic) return pred diff --git a/src/netspresso_trainer/postprocessors/register.py b/src/netspresso_trainer/postprocessors/register.py index 990f2d83b..35f6e08f3 100644 --- a/src/netspresso_trainer/postprocessors/register.py +++ b/src/netspresso_trainer/postprocessors/register.py @@ -1,11 +1,11 @@ from typing import Dict, Type from .classification import ClassificationPostprocessor -from .segmentation import SegmentationPostprocessor from .detection import YOLOXPostprocessor +from .segmentation import SegmentationPostprocessor POSTPROCESSOR_DICT = { 'fc': ClassificationPostprocessor, 'all_mlp_decoder': SegmentationPostprocessor, 'yolo_head': YOLOXPostprocessor, -} \ No newline at end of file +} diff --git a/src/netspresso_trainer/postprocessors/segmentation.py b/src/netspresso_trainer/postprocessors/segmentation.py index c9e6e1ae8..0f5f7f22b 100644 --- a/src/netspresso_trainer/postprocessors/segmentation.py +++ b/src/netspresso_trainer/postprocessors/segmentation.py @@ -12,4 +12,4 @@ def __init__(self): def __call__(self, outputs: ModelOutput): pred = outputs['pred'] pred = torch.max(pred, dim=1)[1] # argmax - return pred \ No newline at end of file + return pred From 5b8bf3ad75f6899bcbd24b82b766874b2d26cd6a Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Tue, 7 Nov 2023 17:36:14 +0900 Subject: [PATCH 071/167] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ccb868afd..25bce0ac2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ ## Breaking Changes: - Enable dataset augmentation customizing by `@illian01` in [PR 201](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/201) +- Add postprocessor module by `@illian01` in [PR 223](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/223) ## Other Changes: From 5ab291187704239ca0e5007ae645be526001d863 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 8 Nov 2023 14:26:36 +0900 Subject: [PATCH 072/167] Revert detection postprocessor name --- src/netspresso_trainer/postprocessors/detection.py | 2 +- src/netspresso_trainer/postprocessors/register.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/netspresso_trainer/postprocessors/detection.py b/src/netspresso_trainer/postprocessors/detection.py index 772f81066..5b4b2a213 100644 --- a/src/netspresso_trainer/postprocessors/detection.py +++ b/src/netspresso_trainer/postprocessors/detection.py @@ -4,7 +4,7 @@ from ..models.utils import ModelOutput -class YOLOXPostprocessor: +class DetectionPostprocessor: def __init__(self): pass diff --git a/src/netspresso_trainer/postprocessors/register.py b/src/netspresso_trainer/postprocessors/register.py index 35f6e08f3..adf1c7387 100644 --- a/src/netspresso_trainer/postprocessors/register.py +++ b/src/netspresso_trainer/postprocessors/register.py @@ -1,11 +1,11 @@ from typing import Dict, Type from .classification import ClassificationPostprocessor -from .detection import YOLOXPostprocessor +from .detection import DetectionPostprocessor from .segmentation import SegmentationPostprocessor POSTPROCESSOR_DICT = { 'fc': ClassificationPostprocessor, 'all_mlp_decoder': SegmentationPostprocessor, - 'yolo_head': YOLOXPostprocessor, + 'yolo_head': DetectionPostprocessor, } From fa529f8f3d12d4abf4e3470e359fe1b1ef24ceae Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 8 Nov 2023 14:27:44 +0900 Subject: [PATCH 073/167] Add init parameter conf_model --- src/netspresso_trainer/postprocessors/builder.py | 2 +- src/netspresso_trainer/postprocessors/classification.py | 2 +- src/netspresso_trainer/postprocessors/detection.py | 2 +- src/netspresso_trainer/postprocessors/segmentation.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/netspresso_trainer/postprocessors/builder.py b/src/netspresso_trainer/postprocessors/builder.py index a60a8652e..9b4e45b67 100644 --- a/src/netspresso_trainer/postprocessors/builder.py +++ b/src/netspresso_trainer/postprocessors/builder.py @@ -5,4 +5,4 @@ def build_postprocessor(task: str, conf_model): head_name = conf_model.architecture.head.name if head_name not in POSTPROCESSOR_DICT: return None - return POSTPROCESSOR_DICT[head_name]() + return POSTPROCESSOR_DICT[head_name](conf_model) diff --git a/src/netspresso_trainer/postprocessors/classification.py b/src/netspresso_trainer/postprocessors/classification.py index 4929fe37d..379693256 100644 --- a/src/netspresso_trainer/postprocessors/classification.py +++ b/src/netspresso_trainer/postprocessors/classification.py @@ -6,7 +6,7 @@ class ClassificationPostprocessor(): - def __init__(self): + def __init__(self, conf_model): pass def __call__(self, outputs: ModelOutput, k: Optional[int]=None): diff --git a/src/netspresso_trainer/postprocessors/detection.py b/src/netspresso_trainer/postprocessors/detection.py index 5b4b2a213..2154d14c5 100644 --- a/src/netspresso_trainer/postprocessors/detection.py +++ b/src/netspresso_trainer/postprocessors/detection.py @@ -5,7 +5,7 @@ class DetectionPostprocessor: - def __init__(self): + def __init__(self, conf_model): pass def __call__(self, outputs: ModelOutput, original_shape, num_classes, conf_thresh=0.7, nms_thre=0.45, class_agnostic=False): diff --git a/src/netspresso_trainer/postprocessors/segmentation.py b/src/netspresso_trainer/postprocessors/segmentation.py index 0f5f7f22b..f40334c1c 100644 --- a/src/netspresso_trainer/postprocessors/segmentation.py +++ b/src/netspresso_trainer/postprocessors/segmentation.py @@ -6,7 +6,7 @@ class SegmentationPostprocessor: - def __init__(self): + def __init__(self, conf_model): pass def __call__(self, outputs: ModelOutput): From 838226feb20c89ef80830046eec95fca937ba814 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 8 Nov 2023 14:46:42 +0900 Subject: [PATCH 074/167] Change detection postprocessor architecture --- .../postprocessors/detection.py | 147 +++++++++--------- 1 file changed, 77 insertions(+), 70 deletions(-) diff --git a/src/netspresso_trainer/postprocessors/detection.py b/src/netspresso_trainer/postprocessors/detection.py index 2154d14c5..64bfbb89b 100644 --- a/src/netspresso_trainer/postprocessors/detection.py +++ b/src/netspresso_trainer/postprocessors/detection.py @@ -4,9 +4,84 @@ from ..models.utils import ModelOutput +def decode_outputs(outputs, dtype, stage_strides): + hw = [x.shape[-2:] for x in outputs] + # [batch, n_anchors_all, num_classes + 5] + outputs = torch.cat([x.flatten(start_dim=2) for x in outputs], dim=2).permute(0, 2, 1) + outputs[..., 4:] = outputs[..., 4:].sigmoid() + + grids = [] + strides = [] + for (hsize, wsize), stride in zip(hw, stage_strides): + yv, xv = torch.meshgrid(torch.arange(hsize), torch.arange(wsize), indexing='ij') + grid = torch.stack((xv, yv), 2).view(1, -1, 2) + grids.append(grid) + shape = grid.shape[:2] + strides.append(torch.full((*shape, 1), stride)) + + grids = torch.cat(grids, dim=1).type(dtype) + strides = torch.cat(strides, dim=1).type(dtype) + + outputs = torch.cat([ + (outputs[..., 0:2] + grids) * strides, + torch.clamp(torch.exp(outputs[..., 2:4]) * strides, min=torch.iinfo(torch.int32).min, max=torch.iinfo(torch.int32).max), + outputs[..., 4:] + ], dim=-1) + return outputs + + +def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agnostic=False): + box_corner = prediction.new(prediction.shape) + box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2 + box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2 + box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2 + box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2 + prediction[:, :, :4] = box_corner[:, :, :4] + + output = [torch.zeros(0, 7).to(prediction.device) for i in range(len(prediction))] + for i, image_pred in enumerate(prediction): + + # If none are remaining => process next image + if not image_pred.size(0): + continue + # Get score and class with highest confidence + class_conf, class_pred = torch.max(image_pred[:, 5: 5 + num_classes], 1, keepdim=True) + + conf_mask = (image_pred[:, 4] * class_conf.squeeze() >= conf_thre).squeeze() + # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred) + detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1) + detections = detections[conf_mask] + if not detections.size(0): + continue + + if class_agnostic: + nms_out_index = torchvision.ops.nms( + detections[:, :4], + detections[:, 4] * detections[:, 5], + nms_thre, + ) + else: + nms_out_index = torchvision.ops.batched_nms( + detections[:, :4], + detections[:, 4] * detections[:, 5], + detections[:, 6], + nms_thre, + ) + + detections = detections[nms_out_index] + output[i] = torch.cat((output[i], detections)) + + return output + + class DetectionPostprocessor: def __init__(self, conf_model): - pass + HEAD_POSTPROCESS_MAPPING = { + 'yolo_head': [decode_outputs, postprocess] + } + + head_name = conf_model.architecture.head.name + self.decode_outputs, self.postprocess = HEAD_POSTPROCESS_MAPPING[head_name] def __call__(self, outputs: ModelOutput, original_shape, num_classes, conf_thresh=0.7, nms_thre=0.45, class_agnostic=False): pred = outputs['pred'] @@ -15,72 +90,4 @@ def __call__(self, outputs: ModelOutput, original_shape, num_classes, conf_thres pred = self.decode_outputs(pred, dtype=dtype, stage_strides=stage_strides) pred = self.postprocess(pred, num_classes=num_classes, conf_thre=conf_thresh, nms_thre=nms_thre, class_agnostic=class_agnostic) - return pred - - def decode_outputs(self, outputs, dtype, stage_strides): - hw = [x.shape[-2:] for x in outputs] - # [batch, n_anchors_all, num_classes + 5] - outputs = torch.cat([x.flatten(start_dim=2) for x in outputs], dim=2).permute(0, 2, 1) - outputs[..., 4:] = outputs[..., 4:].sigmoid() - - grids = [] - strides = [] - for (hsize, wsize), stride in zip(hw, stage_strides): - yv, xv = torch.meshgrid(torch.arange(hsize), torch.arange(wsize), indexing='ij') - grid = torch.stack((xv, yv), 2).view(1, -1, 2) - grids.append(grid) - shape = grid.shape[:2] - strides.append(torch.full((*shape, 1), stride)) - - grids = torch.cat(grids, dim=1).type(dtype) - strides = torch.cat(strides, dim=1).type(dtype) - - outputs = torch.cat([ - (outputs[..., 0:2] + grids) * strides, - torch.clamp(torch.exp(outputs[..., 2:4]) * strides, min=torch.iinfo(torch.int32).min, max=torch.iinfo(torch.int32).max), - outputs[..., 4:] - ], dim=-1) - return outputs - - def postprocess(self, prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agnostic=False): - box_corner = prediction.new(prediction.shape) - box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2 - box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2 - box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2 - box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2 - prediction[:, :, :4] = box_corner[:, :, :4] - - output = [torch.zeros(0, 7).to(prediction.device) for i in range(len(prediction))] - for i, image_pred in enumerate(prediction): - - # If none are remaining => process next image - if not image_pred.size(0): - continue - # Get score and class with highest confidence - class_conf, class_pred = torch.max(image_pred[:, 5: 5 + num_classes], 1, keepdim=True) - - conf_mask = (image_pred[:, 4] * class_conf.squeeze() >= conf_thre).squeeze() - # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred) - detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1) - detections = detections[conf_mask] - if not detections.size(0): - continue - - if class_agnostic: - nms_out_index = torchvision.ops.nms( - detections[:, :4], - detections[:, 4] * detections[:, 5], - nms_thre, - ) - else: - nms_out_index = torchvision.ops.batched_nms( - detections[:, :4], - detections[:, 4] * detections[:, 5], - detections[:, 6], - nms_thre, - ) - - detections = detections[nms_out_index] - output[i] = torch.cat((output[i], detections)) - - return output + return pred \ No newline at end of file From 6b92ab2bd21c7893b2b1b1c9acd345d876653b4b Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 8 Nov 2023 14:55:10 +0900 Subject: [PATCH 075/167] Move box operation --- src/netspresso_trainer/postprocessors/detection.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/netspresso_trainer/postprocessors/detection.py b/src/netspresso_trainer/postprocessors/detection.py index 64bfbb89b..747e0aa7a 100644 --- a/src/netspresso_trainer/postprocessors/detection.py +++ b/src/netspresso_trainer/postprocessors/detection.py @@ -27,17 +27,17 @@ def decode_outputs(outputs, dtype, stage_strides): torch.clamp(torch.exp(outputs[..., 2:4]) * strides, min=torch.iinfo(torch.int32).min, max=torch.iinfo(torch.int32).max), outputs[..., 4:] ], dim=-1) + + box_corner = outputs.new(outputs.shape) + box_corner[:, :, 0] = outputs[:, :, 0] - outputs[:, :, 2] / 2 + box_corner[:, :, 1] = outputs[:, :, 1] - outputs[:, :, 3] / 2 + box_corner[:, :, 2] = outputs[:, :, 0] + outputs[:, :, 2] / 2 + box_corner[:, :, 3] = outputs[:, :, 1] + outputs[:, :, 3] / 2 + outputs[:, :, :4] = box_corner[:, :, :4] return outputs def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agnostic=False): - box_corner = prediction.new(prediction.shape) - box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2 - box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2 - box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2 - box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2 - prediction[:, :, :4] = box_corner[:, :, :4] - output = [torch.zeros(0, 7).to(prediction.device) for i in range(len(prediction))] for i, image_pred in enumerate(prediction): From ef8c3b34fda22fc0db94d9c375d0b3d43cb18a05 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 8 Nov 2023 14:55:35 +0900 Subject: [PATCH 076/167] Rename yolox box decode function --- src/netspresso_trainer/postprocessors/detection.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/netspresso_trainer/postprocessors/detection.py b/src/netspresso_trainer/postprocessors/detection.py index 747e0aa7a..3f6cec74b 100644 --- a/src/netspresso_trainer/postprocessors/detection.py +++ b/src/netspresso_trainer/postprocessors/detection.py @@ -4,7 +4,7 @@ from ..models.utils import ModelOutput -def decode_outputs(outputs, dtype, stage_strides): +def yolox_decode_outputs(outputs, dtype, stage_strides): hw = [x.shape[-2:] for x in outputs] # [batch, n_anchors_all, num_classes + 5] outputs = torch.cat([x.flatten(start_dim=2) for x in outputs], dim=2).permute(0, 2, 1) @@ -77,7 +77,7 @@ def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agn class DetectionPostprocessor: def __init__(self, conf_model): HEAD_POSTPROCESS_MAPPING = { - 'yolo_head': [decode_outputs, postprocess] + 'yolo_head': [yolox_decode_outputs, postprocess] } head_name = conf_model.architecture.head.name From 5ab8d98a1e54c065301a1b6a6087ed97f9d07c6d Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 8 Nov 2023 15:03:31 +0900 Subject: [PATCH 077/167] Refactoring for output decoder --- .../postprocessors/detection.py | 37 ++++++++++--------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/src/netspresso_trainer/postprocessors/detection.py b/src/netspresso_trainer/postprocessors/detection.py index 3f6cec74b..0ab8b2484 100644 --- a/src/netspresso_trainer/postprocessors/detection.py +++ b/src/netspresso_trainer/postprocessors/detection.py @@ -4,11 +4,14 @@ from ..models.utils import ModelOutput -def yolox_decode_outputs(outputs, dtype, stage_strides): - hw = [x.shape[-2:] for x in outputs] +def yolox_decode_outputs(pred, original_shape): + dtype = pred[0].type() + stage_strides= [original_shape[-1] // o.shape[-1] for o in pred] + + hw = [x.shape[-2:] for x in pred] # [batch, n_anchors_all, num_classes + 5] - outputs = torch.cat([x.flatten(start_dim=2) for x in outputs], dim=2).permute(0, 2, 1) - outputs[..., 4:] = outputs[..., 4:].sigmoid() + pred = torch.cat([x.flatten(start_dim=2) for x in pred], dim=2).permute(0, 2, 1) + pred[..., 4:] = pred[..., 4:].sigmoid() grids = [] strides = [] @@ -22,19 +25,19 @@ def yolox_decode_outputs(outputs, dtype, stage_strides): grids = torch.cat(grids, dim=1).type(dtype) strides = torch.cat(strides, dim=1).type(dtype) - outputs = torch.cat([ - (outputs[..., 0:2] + grids) * strides, - torch.clamp(torch.exp(outputs[..., 2:4]) * strides, min=torch.iinfo(torch.int32).min, max=torch.iinfo(torch.int32).max), - outputs[..., 4:] + pred = torch.cat([ + (pred[..., 0:2] + grids) * strides, + torch.clamp(torch.exp(pred[..., 2:4]) * strides, min=torch.iinfo(torch.int32).min, max=torch.iinfo(torch.int32).max), + pred[..., 4:] ], dim=-1) - box_corner = outputs.new(outputs.shape) - box_corner[:, :, 0] = outputs[:, :, 0] - outputs[:, :, 2] / 2 - box_corner[:, :, 1] = outputs[:, :, 1] - outputs[:, :, 3] / 2 - box_corner[:, :, 2] = outputs[:, :, 0] + outputs[:, :, 2] / 2 - box_corner[:, :, 3] = outputs[:, :, 1] + outputs[:, :, 3] / 2 - outputs[:, :, :4] = box_corner[:, :, :4] - return outputs + box_corner = pred.new(pred.shape) + box_corner[:, :, 0] = pred[:, :, 0] - pred[:, :, 2] / 2 + box_corner[:, :, 1] = pred[:, :, 1] - pred[:, :, 3] / 2 + box_corner[:, :, 2] = pred[:, :, 0] + pred[:, :, 2] / 2 + box_corner[:, :, 3] = pred[:, :, 1] + pred[:, :, 3] / 2 + pred[:, :, :4] = box_corner[:, :, :4] + return pred def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agnostic=False): @@ -85,9 +88,7 @@ def __init__(self, conf_model): def __call__(self, outputs: ModelOutput, original_shape, num_classes, conf_thresh=0.7, nms_thre=0.45, class_agnostic=False): pred = outputs['pred'] - dtype = pred[0].type() - stage_strides= [original_shape[-1] // o.shape[-1] for o in pred] - pred = self.decode_outputs(pred, dtype=dtype, stage_strides=stage_strides) + pred = self.decode_outputs(pred, original_shape) pred = self.postprocess(pred, num_classes=num_classes, conf_thre=conf_thresh, nms_thre=nms_thre, class_agnostic=class_agnostic) return pred \ No newline at end of file From a3cc3ba9d7e84eb388869a8579b962f6c9f30b87 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 8 Nov 2023 15:09:13 +0900 Subject: [PATCH 078/167] Add conditional processing --- src/netspresso_trainer/postprocessors/detection.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/netspresso_trainer/postprocessors/detection.py b/src/netspresso_trainer/postprocessors/detection.py index 0ab8b2484..21ec49a23 100644 --- a/src/netspresso_trainer/postprocessors/detection.py +++ b/src/netspresso_trainer/postprocessors/detection.py @@ -89,6 +89,8 @@ def __init__(self, conf_model): def __call__(self, outputs: ModelOutput, original_shape, num_classes, conf_thresh=0.7, nms_thre=0.45, class_agnostic=False): pred = outputs['pred'] - pred = self.decode_outputs(pred, original_shape) - pred = self.postprocess(pred, num_classes=num_classes, conf_thre=conf_thresh, nms_thre=nms_thre, class_agnostic=class_agnostic) + if self.decode_outputs: + pred = self.decode_outputs(pred, original_shape) + if self.postprocess: + pred = self.postprocess(pred, num_classes=num_classes, conf_thre=conf_thresh, nms_thre=nms_thre, class_agnostic=class_agnostic) return pred \ No newline at end of file From f7d7768c83b65b1eeac974193aa64660bd9b554d Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 8 Nov 2023 15:12:19 +0900 Subject: [PATCH 079/167] Rename nms --- src/netspresso_trainer/postprocessors/detection.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/netspresso_trainer/postprocessors/detection.py b/src/netspresso_trainer/postprocessors/detection.py index 21ec49a23..873121416 100644 --- a/src/netspresso_trainer/postprocessors/detection.py +++ b/src/netspresso_trainer/postprocessors/detection.py @@ -40,7 +40,7 @@ def yolox_decode_outputs(pred, original_shape): return pred -def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agnostic=False): +def nms(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agnostic=False): output = [torch.zeros(0, 7).to(prediction.device) for i in range(len(prediction))] for i, image_pred in enumerate(prediction): @@ -80,7 +80,7 @@ def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agn class DetectionPostprocessor: def __init__(self, conf_model): HEAD_POSTPROCESS_MAPPING = { - 'yolo_head': [yolox_decode_outputs, postprocess] + 'yolo_head': [yolox_decode_outputs, nms] } head_name = conf_model.architecture.head.name From 103b93f72ca055211a008989cf8ea433bd300c4e Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 8 Nov 2023 15:16:19 +0900 Subject: [PATCH 080/167] Rename yolo_head to yolox_head --- config/model/yolox/yolox-detection.yaml | 2 +- src/netspresso_trainer/models/heads/detection/__init__.py | 2 +- .../detection/experimental/{yolo_head.py => yolox_head.py} | 2 +- src/netspresso_trainer/models/registry.py | 4 ++-- src/netspresso_trainer/postprocessors/detection.py | 2 +- src/netspresso_trainer/postprocessors/register.py | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) rename src/netspresso_trainer/models/heads/detection/experimental/{yolo_head.py => yolox_head.py} (98%) diff --git a/config/model/yolox/yolox-detection.yaml b/config/model/yolox/yolox-detection.yaml index a5502fba8..0f3c2e13b 100644 --- a/config/model/yolox/yolox-detection.yaml +++ b/config/model/yolox/yolox-detection.yaml @@ -11,7 +11,7 @@ model: dep_mul: 0.33 wid_mul: 0.5 head: - name: yolo_head + name: yolox_head losses: - criterion: yolox_loss weight: ~ \ No newline at end of file diff --git a/src/netspresso_trainer/models/heads/detection/__init__.py b/src/netspresso_trainer/models/heads/detection/__init__.py index 455a00ee2..8d362011d 100644 --- a/src/netspresso_trainer/models/heads/detection/__init__.py +++ b/src/netspresso_trainer/models/heads/detection/__init__.py @@ -1,2 +1,2 @@ from .experimental.faster_rcnn import faster_rcnn -from .experimental.yolo_head import yolo_head \ No newline at end of file +from .experimental.yolox_head import yolox_head \ No newline at end of file diff --git a/src/netspresso_trainer/models/heads/detection/experimental/yolo_head.py b/src/netspresso_trainer/models/heads/detection/experimental/yolox_head.py similarity index 98% rename from src/netspresso_trainer/models/heads/detection/experimental/yolo_head.py rename to src/netspresso_trainer/models/heads/detection/experimental/yolox_head.py index 10f1346ea..dfcf6fdcb 100644 --- a/src/netspresso_trainer/models/heads/detection/experimental/yolo_head.py +++ b/src/netspresso_trainer/models/heads/detection/experimental/yolox_head.py @@ -138,7 +138,7 @@ def forward(self, xin): return ModelOutput(pred=outputs) -def yolo_head(num_classes, intermediate_features_dim, **kwargs): +def yolox_head(num_classes, intermediate_features_dim, **kwargs): configuration = { 'act_type': 'silu', } diff --git a/src/netspresso_trainer/models/registry.py b/src/netspresso_trainer/models/registry.py index 59ee6cda2..c43ed0f47 100644 --- a/src/netspresso_trainer/models/registry.py +++ b/src/netspresso_trainer/models/registry.py @@ -6,7 +6,7 @@ from .backbones import cspdarknet, efficientformer, mobilenetv3_small, mobilevit, resnet50, segformer, vit from .full import pidnet from .heads.classification import fc -from .heads.detection import faster_rcnn, yolo_head +from .heads.detection import faster_rcnn, yolox_head from .heads.segmentation import all_mlp_decoder MODEL_BACKBONE_DICT: Dict[str, Callable[..., nn.Module]] = { @@ -28,7 +28,7 @@ }, 'detection': { 'faster_rcnn': faster_rcnn, - 'yolo_head': yolo_head + 'yolox_head': yolox_head }, } diff --git a/src/netspresso_trainer/postprocessors/detection.py b/src/netspresso_trainer/postprocessors/detection.py index 873121416..1216c189b 100644 --- a/src/netspresso_trainer/postprocessors/detection.py +++ b/src/netspresso_trainer/postprocessors/detection.py @@ -80,7 +80,7 @@ def nms(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agnostic=Fa class DetectionPostprocessor: def __init__(self, conf_model): HEAD_POSTPROCESS_MAPPING = { - 'yolo_head': [yolox_decode_outputs, nms] + 'yolox_head': [yolox_decode_outputs, nms] } head_name = conf_model.architecture.head.name diff --git a/src/netspresso_trainer/postprocessors/register.py b/src/netspresso_trainer/postprocessors/register.py index adf1c7387..0c705efe6 100644 --- a/src/netspresso_trainer/postprocessors/register.py +++ b/src/netspresso_trainer/postprocessors/register.py @@ -7,5 +7,5 @@ POSTPROCESSOR_DICT = { 'fc': ClassificationPostprocessor, 'all_mlp_decoder': SegmentationPostprocessor, - 'yolo_head': DetectionPostprocessor, + 'yolox_head': DetectionPostprocessor, } From fb33fde7a829a8a6eb51c2b85211fc6d7e324e55 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 8 Nov 2023 15:18:43 +0900 Subject: [PATCH 081/167] Ruff fix --- src/netspresso_trainer/postprocessors/detection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/netspresso_trainer/postprocessors/detection.py b/src/netspresso_trainer/postprocessors/detection.py index 1216c189b..8e710ac7e 100644 --- a/src/netspresso_trainer/postprocessors/detection.py +++ b/src/netspresso_trainer/postprocessors/detection.py @@ -93,4 +93,4 @@ def __call__(self, outputs: ModelOutput, original_shape, num_classes, conf_thres pred = self.decode_outputs(pred, original_shape) if self.postprocess: pred = self.postprocess(pred, num_classes=num_classes, conf_thre=conf_thresh, nms_thre=nms_thre, class_agnostic=class_agnostic) - return pred \ No newline at end of file + return pred From d4d4ea120c44a98243fbc8b1963668c6fbdbf5ee Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Thu, 9 Nov 2023 11:29:56 +0900 Subject: [PATCH 082/167] Set CrossEntropy parameter like nn.CrossEntropy --- src/netspresso_trainer/losses/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/netspresso_trainer/losses/common.py b/src/netspresso_trainer/losses/common.py index 5c7b4e2ad..6e8506fb2 100644 --- a/src/netspresso_trainer/losses/common.py +++ b/src/netspresso_trainer/losses/common.py @@ -6,9 +6,9 @@ class CrossEntropyLoss(nn.Module): - def __init__(self, ignore_index, **kwargs) -> None: + def __init__(self, ignore_index=-100, label_smoothing=0.0, **kwargs) -> None: super(CrossEntropyLoss, self).__init__() - self.loss_fn = nn.CrossEntropyLoss(ignore_index=ignore_index, **kwargs) + self.loss_fn = nn.CrossEntropyLoss(ignore_index=ignore_index, label_smoothing=label_smoothing, **kwargs) def forward(self, out: Dict, target: torch.Tensor) -> torch.Tensor: pred = out['pred'] From d084a56945ad48cef41ed9d75b24954d49db0caf Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Thu, 9 Nov 2023 11:33:29 +0900 Subject: [PATCH 083/167] Remove variants of CrossEntropyLoss --- .../losses/classification/label_smooth.py | 22 ------------------- .../losses/classification/soft_target.py | 15 ------------- src/netspresso_trainer/losses/registry.py | 3 --- 3 files changed, 40 deletions(-) delete mode 100644 src/netspresso_trainer/losses/classification/label_smooth.py delete mode 100644 src/netspresso_trainer/losses/classification/soft_target.py diff --git a/src/netspresso_trainer/losses/classification/label_smooth.py b/src/netspresso_trainer/losses/classification/label_smooth.py deleted file mode 100644 index 495d13543..000000000 --- a/src/netspresso_trainer/losses/classification/label_smooth.py +++ /dev/null @@ -1,22 +0,0 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F - - -class LabelSmoothingCrossEntropy(nn.Module): - """ NLL loss with label smoothing. - """ - def __init__(self, smoothing=0.1): - super(LabelSmoothingCrossEntropy, self).__init__() - assert smoothing < 1.0 - self.smoothing = smoothing - self.confidence = 1. - smoothing - - def forward(self, out: torch.Tensor, target: torch.Tensor) -> torch.Tensor: - pred = out['pred'] - logprobs = F.log_softmax(pred, dim=-1) - nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1)) - nll_loss = nll_loss.squeeze(1) - smooth_loss = -logprobs.mean(dim=-1) - loss = self.confidence * nll_loss + self.smoothing * smooth_loss - return loss.mean() diff --git a/src/netspresso_trainer/losses/classification/soft_target.py b/src/netspresso_trainer/losses/classification/soft_target.py deleted file mode 100644 index 8f25bc9c8..000000000 --- a/src/netspresso_trainer/losses/classification/soft_target.py +++ /dev/null @@ -1,15 +0,0 @@ -from typing import Dict - -import torch -import torch.nn as nn -import torch.nn.functional as F - - -class SoftTargetCrossEntropy(nn.Module): # cutmix/mixup augmentation - def __init__(self): - super(SoftTargetCrossEntropy, self).__init__() - - def forward(self, out: Dict, target: torch.Tensor) -> torch.Tensor: - pred = out['pred'] - loss = torch.sum(-target * F.log_softmax(pred, dim=-1), dim=-1) - return loss.mean() diff --git a/src/netspresso_trainer/losses/registry.py b/src/netspresso_trainer/losses/registry.py index 82f60e869..7e45b3833 100644 --- a/src/netspresso_trainer/losses/registry.py +++ b/src/netspresso_trainer/losses/registry.py @@ -1,12 +1,9 @@ -from .classification import LabelSmoothingCrossEntropy, SoftTargetCrossEntropy from .common import CrossEntropyLoss from .detection import RoiHeadLoss, RPNLoss, YOLOXLoss from .segmentation import BoundaryLoss, PIDNetBoundaryAwareCrossEntropy, PIDNetCrossEntropy LOSS_DICT = { 'cross_entropy': CrossEntropyLoss, - 'soft_target_cross_entropy': SoftTargetCrossEntropy, - 'label_smoothing_cross_entropy': LabelSmoothingCrossEntropy, 'pidnet_cross_entropy': PIDNetCrossEntropy, 'boundary_loss': BoundaryLoss, 'pidnet_cross_entropy_with_boundary': PIDNetBoundaryAwareCrossEntropy, From 9542008f26c0a59f876ebb0b1bc76a42a4a6efb8 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Thu, 9 Nov 2023 11:34:45 +0900 Subject: [PATCH 084/167] Update classification model config --- .../efficientformer/efficientformer-l1-classification.yaml | 4 ++-- .../model/mobilenetv3/mobilenetv3-small-classification.yaml | 4 ++-- config/model/mobilevit/mobilevit-s-classification.yaml | 4 ++-- config/model/resnet/resnet50-classification.yaml | 4 ++-- config/model/segformer/segformer-classification.yaml | 4 ++-- config/model/vit/vit-classification.yaml | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/config/model/efficientformer/efficientformer-l1-classification.yaml b/config/model/efficientformer/efficientformer-l1-classification.yaml index 681794cc1..55aeb99e3 100644 --- a/config/model/efficientformer/efficientformer-l1-classification.yaml +++ b/config/model/efficientformer/efficientformer-l1-classification.yaml @@ -31,6 +31,6 @@ model: head: name: fc losses: - - criterion: label_smoothing_cross_entropy - smoothing: 0.1 + - criterion: cross_entropy + label_smoothing: 0.1 weight: ~ \ No newline at end of file diff --git a/config/model/mobilenetv3/mobilenetv3-small-classification.yaml b/config/model/mobilenetv3/mobilenetv3-small-classification.yaml index 273988282..0f5f958f9 100644 --- a/config/model/mobilenetv3/mobilenetv3-small-classification.yaml +++ b/config/model/mobilenetv3/mobilenetv3-small-classification.yaml @@ -27,6 +27,6 @@ model: head: name: fc losses: - - criterion: label_smoothing_cross_entropy - smoothing: 0.1 + - criterion: cross_entropy + label_smoothing: 0.1 weight: ~ \ No newline at end of file diff --git a/config/model/mobilevit/mobilevit-s-classification.yaml b/config/model/mobilevit/mobilevit-s-classification.yaml index 17bcc1cfc..c3cb1cdfb 100644 --- a/config/model/mobilevit/mobilevit-s-classification.yaml +++ b/config/model/mobilevit/mobilevit-s-classification.yaml @@ -29,6 +29,6 @@ model: head: name: fc losses: - - criterion: label_smoothing_cross_entropy - smoothing: 0.1 + - criterion: cross_entropy + label_smoothing: 0.1 weight: ~ \ No newline at end of file diff --git a/config/model/resnet/resnet50-classification.yaml b/config/model/resnet/resnet50-classification.yaml index 10c9bc992..53944ca72 100644 --- a/config/model/resnet/resnet50-classification.yaml +++ b/config/model/resnet/resnet50-classification.yaml @@ -13,6 +13,6 @@ model: head: name: fc losses: - - criterion: label_smoothing_cross_entropy - smoothing: 0.1 + - criterion: cross_entropy + label_smoothing: 0.1 weight: ~ \ No newline at end of file diff --git a/config/model/segformer/segformer-classification.yaml b/config/model/segformer/segformer-classification.yaml index e669de24a..134920cfa 100644 --- a/config/model/segformer/segformer-classification.yaml +++ b/config/model/segformer/segformer-classification.yaml @@ -23,6 +23,6 @@ model: head: name: fc losses: - - criterion: label_smoothing_cross_entropy - smoothing: 0.1 + - criterion: cross_entropy + label_smoothing: 0.1 weight: ~ \ No newline at end of file diff --git a/config/model/vit/vit-classification.yaml b/config/model/vit/vit-classification.yaml index 2fa9a0a87..b4d07acf7 100644 --- a/config/model/vit/vit-classification.yaml +++ b/config/model/vit/vit-classification.yaml @@ -18,6 +18,6 @@ model: head: name: fc losses: - - criterion: label_smoothing_cross_entropy - smoothing: 0.1 + - criterion: cross_entropy + label_smoothing: 0.1 weight: ~ \ No newline at end of file From 4dcaff43ba29d47fbf18b086963490e09fbba433 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Thu, 9 Nov 2023 11:37:58 +0900 Subject: [PATCH 085/167] Remove missed lines --- src/netspresso_trainer/losses/classification/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/netspresso_trainer/losses/classification/__init__.py b/src/netspresso_trainer/losses/classification/__init__.py index 6f89bba8f..e69de29bb 100644 --- a/src/netspresso_trainer/losses/classification/__init__.py +++ b/src/netspresso_trainer/losses/classification/__init__.py @@ -1,2 +0,0 @@ -from .label_smooth import LabelSmoothingCrossEntropy -from .soft_target import SoftTargetCrossEntropy From bfb73d62dda94cc54897356637debdac108b8e0f Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Thu, 9 Nov 2023 12:04:07 +0900 Subject: [PATCH 086/167] Set CrossEntropy parameter like nn.CrossEntropy --- src/netspresso_trainer/losses/common.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/netspresso_trainer/losses/common.py b/src/netspresso_trainer/losses/common.py index 6e8506fb2..fa709c006 100644 --- a/src/netspresso_trainer/losses/common.py +++ b/src/netspresso_trainer/losses/common.py @@ -1,14 +1,17 @@ -from typing import Dict +from typing import Dict, Optional import torch import torch.nn as nn import torch.nn.functional as F +from torch import Tensor class CrossEntropyLoss(nn.Module): - def __init__(self, ignore_index=-100, label_smoothing=0.0, **kwargs) -> None: + def __init__(self, weight: Optional[Tensor]=None, size_average=None, ignore_index: int=-100, + reduce=None, label_smoothing: float=0.0): super(CrossEntropyLoss, self).__init__() - self.loss_fn = nn.CrossEntropyLoss(ignore_index=ignore_index, label_smoothing=label_smoothing, **kwargs) + self.loss_fn = nn.CrossEntropyLoss(weight=weight, size_average=size_average, ignore_index=ignore_index, + reduce=reduce, reduction='mean', label_smoothing=label_smoothing) def forward(self, out: Dict, target: torch.Tensor) -> torch.Tensor: pred = out['pred'] From 61743349607fb4feb5df88ce3ec7019579f7f286 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Thu, 9 Nov 2023 12:16:57 +0900 Subject: [PATCH 087/167] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 25bce0ac2..446e57776 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ ## Other Changes: - Update ruff rule (`W`) by `@deepkyu` in [PR 218](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/218) +- Integrate classification loss modules by `@illian01` in [PR 226](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/226) # v0.0.9 From 6791364a08279a92c75beb3eda114aab40d79425 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Thu, 9 Nov 2023 17:17:27 +0900 Subject: [PATCH 088/167] Modify resnet to stage format --- .../models/backbones/experimental/resnet.py | 44 +++++++++---------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/src/netspresso_trainer/models/backbones/experimental/resnet.py b/src/netspresso_trainer/models/backbones/experimental/resnet.py index 77baeb5f1..d3208939c 100644 --- a/src/netspresso_trainer/models/backbones/experimental/resnet.py +++ b/src/netspresso_trainer/models/backbones/experimental/resnet.py @@ -27,11 +27,10 @@ def __init__( self, task: str, block: Literal['basicblock', 'bottleneck'], - layers: List[int], + stages_info: List[Dict], zero_init_residual: bool = False, groups: int = 1, width_per_group: int = 64, - replace_stride_with_dilation: Optional[List[bool]] = None, norm_layer: Optional[str] = None, expansion: Optional[int] = None, **kwargs @@ -48,13 +47,9 @@ def __init__( self.inplanes = 64 self.dilation = 1 - if replace_stride_with_dilation is None: - # each element in the tuple indicates if we should replace - # the 2x2 stride with a dilated convolution instead - replace_stride_with_dilation = [False, False, False] - if len(replace_stride_with_dilation) != 3: - raise ValueError("replace_stride_with_dilation should be None " - "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) + for i in range(1, len(stages_info)): + if 'replace_stride_with_dilation' not in stages_info[i]: + stages_info[i]['replace_stride_with_dilation'] = False self.groups = groups self.base_width = width_per_group @@ -64,22 +59,23 @@ def __init__( self.conv1 = ConvLayer(in_channels=3, out_channels=self.inplanes, kernel_size=7, stride=2, padding=3, bias=False, norm_type='batch_norm', act_type='relu') - - planes = [64, 128, 256, 512] self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) - self.layer1 = self._make_layer(block, planes[0], layers[0], expansion=expansion) - self.layer2 = self._make_layer(block, planes[1], layers[1], stride=2, - dilate=replace_stride_with_dilation[0], - expansion=expansion) - self.layer3 = self._make_layer(block, planes[2], layers[2], stride=2, - dilate=replace_stride_with_dilation[1], - expansion=expansion) - self.layer4 = self._make_layer(block, planes[3], layers[3], stride=2, - dilate=replace_stride_with_dilation[2], - expansion=expansion) + + stages: List[nn.Module] = [] + + first_stage = stages_info[0] + layer = self._make_layer(block, first_stage['plane'], first_stage['layers'], expansion=expansion) + stages.append(layer) + for stage in stages_info[1:]: + layer = self._make_layer(block, stage['plane'], stage['layers'], stride=2, + dilate=stage['replace_stride_with_dilation'], + expansion=expansion) + stages.append(layer) + + self.stages = nn.ModuleList(stages) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) - hidden_sizes = [h * 4 for h in planes] + hidden_sizes = [stage['plane'] * expansion for stage in stages_info] self._feature_dim = hidden_sizes[-1] self._intermediate_features_dim = hidden_sizes @@ -134,8 +130,8 @@ def forward(self, x: Tensor): x = self.maxpool(x) all_hidden_states = () if self.use_intermediate_features else None - for layer in [self.layer1, self.layer2, self.layer3, self.layer4]: - x = layer(x) + for stage in self.stages: + x = stage(x) if self.use_intermediate_features: all_hidden_states = all_hidden_states + (x,) From 1b1dea8f738719aa56b2eb24d077221c2e5d4d1f Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Thu, 9 Nov 2023 17:18:23 +0900 Subject: [PATCH 089/167] Update resnet50 config --- config/model/resnet/resnet50-classification.yaml | 16 +++++++++++++++- config/model/resnet/resnet50-segmentation.yaml | 16 +++++++++++++++- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/config/model/resnet/resnet50-classification.yaml b/config/model/resnet/resnet50-classification.yaml index 10c9bc992..a4dc3fe06 100644 --- a/config/model/resnet/resnet50-classification.yaml +++ b/config/model/resnet/resnet50-classification.yaml @@ -9,7 +9,21 @@ model: backbone: name: resnet50 block: bottleneck - layers: [3, 4, 6, 3] + norm_layer: batch_norm + groups: 1 + width_per_group: 64 + stages_info: + - plane: 64 + layers: 3 + - plane: 128 + layers: 4 + replace_stride_with_dilation: False + - plane: 256 + layers: 6 + replace_stride_with_dilation: False + - plane: 512 + layers: 3 + replace_stride_with_dilation: False head: name: fc losses: diff --git a/config/model/resnet/resnet50-segmentation.yaml b/config/model/resnet/resnet50-segmentation.yaml index 3f83f708d..3aa8f46ae 100644 --- a/config/model/resnet/resnet50-segmentation.yaml +++ b/config/model/resnet/resnet50-segmentation.yaml @@ -10,7 +10,21 @@ model: backbone: name: resnet50 block: bottleneck - layers: [3, 4, 6, 3] + norm_layer: batch_norm + groups: 1 + width_per_group: 64 + stages_info: + - plane: 64 + layers: 3 + - plane: 128 + layers: 4 + replace_stride_with_dilation: False + - plane: 256 + layers: 6 + replace_stride_with_dilation: False + - plane: 512 + layers: 3 + replace_stride_with_dilation: False head: name: all_mlp_decoder losses: From 38da703a94b1246afb3ae5c3467398c4341ee98c Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Thu, 9 Nov 2023 18:43:29 +0900 Subject: [PATCH 090/167] Add general_info --- config/model/resnet/resnet50-classification.yaml | 9 +++++---- .../models/backbones/experimental/resnet.py | 15 +++++++++------ 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/config/model/resnet/resnet50-classification.yaml b/config/model/resnet/resnet50-classification.yaml index a4dc3fe06..302e58637 100644 --- a/config/model/resnet/resnet50-classification.yaml +++ b/config/model/resnet/resnet50-classification.yaml @@ -8,10 +8,11 @@ model: full: ~ # auto backbone: name: resnet50 - block: bottleneck - norm_layer: batch_norm - groups: 1 - width_per_group: 64 + general_info: + block: bottleneck + norm_layer: batch_norm + groups: 1 + width_per_group: 64 stages_info: - plane: 64 layers: 3 diff --git a/src/netspresso_trainer/models/backbones/experimental/resnet.py b/src/netspresso_trainer/models/backbones/experimental/resnet.py index d3208939c..9cae623fc 100644 --- a/src/netspresso_trainer/models/backbones/experimental/resnet.py +++ b/src/netspresso_trainer/models/backbones/experimental/resnet.py @@ -26,16 +26,19 @@ class ResNet(nn.Module): def __init__( self, task: str, - block: Literal['basicblock', 'bottleneck'], + general_info: List[Dict], stages_info: List[Dict], - zero_init_residual: bool = False, - groups: int = 1, - width_per_group: int = 64, - norm_layer: Optional[str] = None, - expansion: Optional[int] = None, **kwargs ) -> None: super(ResNet, self).__init__() + # Mandatory fields + block: Literal['basicblock', 'bottleneck'] = general_info['block'] + # Fields with defaults + zero_init_residual: bool = general_info['zero_init_residual'] if 'zero_init_residual' in general_info else False + groups: int = general_info['groups'] if 'groups' in general_info else 1 + width_per_group: int = general_info['width_per_group'] if 'width_per_group' in general_info else 64 + norm_layer: Optional[str] = general_info['norm_layer'] if 'norm_layer' in general_info else None + expansion: Optional[int] = general_info['expansion'] if 'expansion' in general_info else None self.task = task.lower() block = BLOCK_FROM_LITERAL[block.lower()] From fdb0a382ce491e0fc8d843b1c76bfade478ec751 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Thu, 9 Nov 2023 18:45:40 +0900 Subject: [PATCH 091/167] Modify segmentation config --- config/model/resnet/resnet50-segmentation.yaml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/config/model/resnet/resnet50-segmentation.yaml b/config/model/resnet/resnet50-segmentation.yaml index 3aa8f46ae..964731c51 100644 --- a/config/model/resnet/resnet50-segmentation.yaml +++ b/config/model/resnet/resnet50-segmentation.yaml @@ -9,10 +9,11 @@ model: name: ~ # auto backbone: name: resnet50 - block: bottleneck - norm_layer: batch_norm - groups: 1 - width_per_group: 64 + general_info: + block: bottleneck + norm_layer: batch_norm + groups: 1 + width_per_group: 64 stages_info: - plane: 64 layers: 3 From a594ed3b1f8b328978a5edd05b7217df817e963e Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 13 Nov 2023 17:47:44 +0900 Subject: [PATCH 092/167] Change keyword info to params --- .../model/resnet/resnet50-classification.yaml | 4 +-- .../model/resnet/resnet50-segmentation.yaml | 4 +-- .../models/backbones/experimental/resnet.py | 28 +++++++++---------- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/config/model/resnet/resnet50-classification.yaml b/config/model/resnet/resnet50-classification.yaml index 2708dbc26..9bf1946a4 100644 --- a/config/model/resnet/resnet50-classification.yaml +++ b/config/model/resnet/resnet50-classification.yaml @@ -8,12 +8,12 @@ model: full: ~ # auto backbone: name: resnet50 - general_info: + params: block: bottleneck norm_layer: batch_norm groups: 1 width_per_group: 64 - stages_info: + stage_params: - plane: 64 layers: 3 - plane: 128 diff --git a/config/model/resnet/resnet50-segmentation.yaml b/config/model/resnet/resnet50-segmentation.yaml index 964731c51..286a6cad0 100644 --- a/config/model/resnet/resnet50-segmentation.yaml +++ b/config/model/resnet/resnet50-segmentation.yaml @@ -9,12 +9,12 @@ model: name: ~ # auto backbone: name: resnet50 - general_info: + params: block: bottleneck norm_layer: batch_norm groups: 1 width_per_group: 64 - stages_info: + stage_params: - plane: 64 layers: 3 - plane: 128 diff --git a/src/netspresso_trainer/models/backbones/experimental/resnet.py b/src/netspresso_trainer/models/backbones/experimental/resnet.py index 9cae623fc..fd4ffe863 100644 --- a/src/netspresso_trainer/models/backbones/experimental/resnet.py +++ b/src/netspresso_trainer/models/backbones/experimental/resnet.py @@ -26,19 +26,19 @@ class ResNet(nn.Module): def __init__( self, task: str, - general_info: List[Dict], - stages_info: List[Dict], + params: List[Dict], + stage_params: List[Dict], **kwargs ) -> None: super(ResNet, self).__init__() # Mandatory fields - block: Literal['basicblock', 'bottleneck'] = general_info['block'] + block: Literal['basicblock', 'bottleneck'] = params['block'] # Fields with defaults - zero_init_residual: bool = general_info['zero_init_residual'] if 'zero_init_residual' in general_info else False - groups: int = general_info['groups'] if 'groups' in general_info else 1 - width_per_group: int = general_info['width_per_group'] if 'width_per_group' in general_info else 64 - norm_layer: Optional[str] = general_info['norm_layer'] if 'norm_layer' in general_info else None - expansion: Optional[int] = general_info['expansion'] if 'expansion' in general_info else None + zero_init_residual: bool = params['zero_init_residual'] if 'zero_init_residual' in params else False + groups: int = params['groups'] if 'groups' in params else 1 + width_per_group: int = params['width_per_group'] if 'width_per_group' in params else 64 + norm_layer: Optional[str] = params['norm_layer'] if 'norm_layer' in params else None + expansion: Optional[int] = params['expansion'] if 'expansion' in params else None self.task = task.lower() block = BLOCK_FROM_LITERAL[block.lower()] @@ -50,9 +50,9 @@ def __init__( self.inplanes = 64 self.dilation = 1 - for i in range(1, len(stages_info)): - if 'replace_stride_with_dilation' not in stages_info[i]: - stages_info[i]['replace_stride_with_dilation'] = False + for i in range(1, len(stage_params)): + if 'replace_stride_with_dilation' not in stage_params[i]: + stage_params[i]['replace_stride_with_dilation'] = False self.groups = groups self.base_width = width_per_group @@ -66,10 +66,10 @@ def __init__( stages: List[nn.Module] = [] - first_stage = stages_info[0] + first_stage = stage_params[0] layer = self._make_layer(block, first_stage['plane'], first_stage['layers'], expansion=expansion) stages.append(layer) - for stage in stages_info[1:]: + for stage in stage_params[1:]: layer = self._make_layer(block, stage['plane'], stage['layers'], stride=2, dilate=stage['replace_stride_with_dilation'], expansion=expansion) @@ -78,7 +78,7 @@ def __init__( self.stages = nn.ModuleList(stages) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) - hidden_sizes = [stage['plane'] * expansion for stage in stages_info] + hidden_sizes = [stage['plane'] * expansion for stage in stage_params] self._feature_dim = hidden_sizes[-1] self._intermediate_features_dim = hidden_sizes From 19402ac0b6d41e5d2b638004bc078c04cf1d108f Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Tue, 14 Nov 2023 10:52:58 +0900 Subject: [PATCH 093/167] Change MobileNetV3 config architecture --- .../mobilenetv3-small-classification.yaml | 46 ++++++++++++++----- .../mobilenetv3-small-segmentation.yaml | 46 ++++++++++++++----- .../backbones/experimental/mobilenetv3.py | 34 +++++--------- 3 files changed, 80 insertions(+), 46 deletions(-) diff --git a/config/model/mobilenetv3/mobilenetv3-small-classification.yaml b/config/model/mobilenetv3/mobilenetv3-small-classification.yaml index 0f5f958f9..a58793c54 100644 --- a/config/model/mobilenetv3/mobilenetv3-small-classification.yaml +++ b/config/model/mobilenetv3/mobilenetv3-small-classification.yaml @@ -8,22 +8,44 @@ model: full: ~ # auto backbone: name: mobilenetv3_small - block_info: # [in_channels, kernel, expended_channels, out_channels, use_se, activation, stride, dilation] + params: ~ + stage_params: - - - [16, 3, 16, 16, True, "relu", 2, 1] + in_channels: [16] + kernel: [3] + expanded_channels: [16] + out_channels: [16] + use_se: [True] + activation: ["relu"] + stride: [2] + dilation: [1] - - - [16, 3, 72, 24, False, "relu", 2, 1] - - [24, 3, 88, 24, False, "relu", 1, 1] + in_channels: [16, 24] + kernel: [3, 3] + expanded_channels: [72, 88] + out_channels: [24, 24] + use_se: [False, False] + activation: ["relu", "relu"] + stride: [2, 1] + dilation: [1, 1] - - - [24, 5, 96, 40, True, "hard_swish", 2, 1] - - [40, 5, 240, 40, True, "hard_swish", 1, 1] - - [40, 5, 240, 40, True, "hard_swish", 1, 1] - - [40, 5, 120, 48, True, "hard_swish", 1, 1] - - [48, 5, 144, 48, True, "hard_swish", 1, 1] + in_channels: [24, 40, 40, 40, 48] + kernel: [5, 5, 5, 5, 5] + expanded_channels: [96, 240, 240, 120, 144] + out_channels: [40, 40, 40, 48, 48] + use_se: [True, True, True, True, True] + activation: ["hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish"] + stride: [2, 1, 1, 1, 1] + dilation: [1, 1, 1, 1, 1] - - - [48, 5, 288, 96, True, "hard_swish", 2, 1] - - [96, 5, 576, 96, True, "hard_swish", 1, 1] - - [96, 5, 576, 96, True, "hard_swish", 1, 1] + in_channels: [48, 96, 96] + kernel: [5, 5, 5] + expanded_channels: [288, 576, 576] + out_channels: [96, 96, 96] + use_se: [True, True, True] + activation: ["hard_swish", "hard_swish", "hard_swish"] + stride: [2, 1, 1] + dilation: [1, 1, 1] head: name: fc losses: diff --git a/config/model/mobilenetv3/mobilenetv3-small-segmentation.yaml b/config/model/mobilenetv3/mobilenetv3-small-segmentation.yaml index 58aceec88..6a9f1cdc5 100644 --- a/config/model/mobilenetv3/mobilenetv3-small-segmentation.yaml +++ b/config/model/mobilenetv3/mobilenetv3-small-segmentation.yaml @@ -8,22 +8,44 @@ model: full: ~ # auto backbone: name: mobilenetv3_small - block_info: # [in_channels, kernel, expended_channels, out_channels, use_se, activation, stride, dilation] + params: ~ + stage_params: - - - [16, 3, 16, 16, True, "relu", 2, 1] + in_channels: [16] + kernel: [3] + expanded_channels: [16] + out_channels: [16] + use_se: [True] + activation: ["relu"] + stride: [2] + dilation: [1] - - - [16, 3, 72, 24, False, "relu", 2, 1] - - [24, 3, 88, 24, False, "relu", 1, 1] + in_channels: [16, 24] + kernel: [3, 3] + expanded_channels: [72, 88] + out_channels: [24, 24] + use_se: [False, False] + activation: ["relu", "relu"] + stride: [2, 1] + dilation: [1, 1] - - - [24, 5, 96, 40, True, "hard_swish", 2, 1] - - [40, 5, 240, 40, True, "hard_swish", 1, 1] - - [40, 5, 240, 40, True, "hard_swish", 1, 1] - - [40, 5, 120, 48, True, "hard_swish", 1, 1] - - [48, 5, 144, 48, True, "hard_swish", 1, 1] + in_channels: [24, 40, 40, 40, 48] + kernel: [5, 5, 5, 5, 5] + expanded_channels: [96, 240, 240, 120, 144] + out_channels: [40, 40, 40, 48, 48] + use_se: [True, True, True, True, True] + activation: ["hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish"] + stride: [2, 1, 1, 1, 1] + dilation: [1, 1, 1, 1, 1] - - - [48, 5, 288, 96, True, "hard_swish", 2, 1] - - [96, 5, 576, 96, True, "hard_swish", 1, 1] - - [96, 5, 576, 96, True, "hard_swish", 1, 1] + in_channels: [48, 96, 96] + kernel: [5, 5, 5] + expanded_channels: [288, 576, 576] + out_channels: [96, 96, 96] + use_se: [True, True, True] + activation: ["hard_swish", "hard_swish", "hard_swish"] + stride: [2, 1, 1] + dilation: [1, 1, 1] head: name: all_mlp_decoder losses: diff --git a/src/netspresso_trainer/models/backbones/experimental/mobilenetv3.py b/src/netspresso_trainer/models/backbones/experimental/mobilenetv3.py index d87b9695a..7e0cf03d9 100644 --- a/src/netspresso_trainer/models/backbones/experimental/mobilenetv3.py +++ b/src/netspresso_trainer/models/backbones/experimental/mobilenetv3.py @@ -2,7 +2,7 @@ Based on the Torchvision implementation of MobileNetV3. https://pytorch.org/vision/main/_modules/torchvision/models/mobilenetv3.html """ -from typing import List +from typing import List, Dict, Optional import torch import torch.nn as nn @@ -16,19 +16,13 @@ SUPPORTING_TASK = ['classification', 'segmentation'] -def list_depth(block_info): - if isinstance(block_info[0], list): - return 1 + list_depth(block_info[0]) - else: - return 1 - - class MobileNetV3(nn.Module): def __init__( self, task: str, - block_info, # [in_channels, kernel, expended_channels, out_channels, use_se, activation, stride, dilation] + params: Optional[List[Dict]], + stage_params: Optional[List[Dict]], **kwargs ) -> None: super(MobileNetV3, self).__init__() @@ -39,7 +33,7 @@ def __init__( act_type = 'hard_swish' # building first layer - firstconv_output_channels = block_info[0][0][0] + firstconv_output_channels = stage_params[0]['in_channels'][0] self.conv_first = ConvLayer( in_channels=3, out_channels=firstconv_output_channels, @@ -52,20 +46,16 @@ def __init__( # building inverted residual blocks stages: List[nn.Module] = [] - lastconv_input_channels = block_info[-1][-1][3] + lastconv_input_channels = stage_params[-1]['out_channels'][-1] lastconv_output_channels = 6 * lastconv_input_channels - for stg_idx, stage_info in enumerate(block_info): + for stg_idx, stage_info in enumerate(stage_params): stage: List[nn.Module] = [] - for block in stage_info: - in_channels = block[0] - kernel_size = block[1] - hidden_channels = block[2] - out_channels = block[3] - use_se = block[4] - act_type_b = block[5].lower() - stride = block[6] - dilation = block[7] + for block in zip(stage_info['in_channels'], stage_info['kernel'], stage_info['expanded_channels'], + stage_info['out_channels'], stage_info['use_se'], stage_info['activation'], + stage_info['stride'], stage_info['dilation']): + in_channels, kernel_size, hidden_channels, out_channels, use_se, act_type_b, stride, dilation = block + act_type_b = act_type_b.lower() stage.append( InvertedResidual(in_channels=in_channels, hidden_channels=hidden_channels, @@ -79,7 +69,7 @@ def __init__( ) # add last conv - if stg_idx == len(block_info) - 1: + if stg_idx == len(stage_params) - 1: stage.append( ConvLayer(in_channels=lastconv_input_channels, out_channels=lastconv_output_channels, From 6a414fbc9b7def74ff9cdf710c0d59992f46bf50 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Tue, 14 Nov 2023 10:53:12 +0900 Subject: [PATCH 094/167] Add Optional on resnet params --- .../models/backbones/experimental/resnet.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/netspresso_trainer/models/backbones/experimental/resnet.py b/src/netspresso_trainer/models/backbones/experimental/resnet.py index fd4ffe863..9a4e920ae 100644 --- a/src/netspresso_trainer/models/backbones/experimental/resnet.py +++ b/src/netspresso_trainer/models/backbones/experimental/resnet.py @@ -26,8 +26,8 @@ class ResNet(nn.Module): def __init__( self, task: str, - params: List[Dict], - stage_params: List[Dict], + params: Optional[List[Dict]], + stage_params: Optional[List[Dict]], **kwargs ) -> None: super(ResNet, self).__init__() From 9b443ae3e5687a6f853a997d63f8f28c34a88dd2 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Tue, 14 Nov 2023 11:06:11 +0900 Subject: [PATCH 095/167] Change Vit to have params --- config/model/vit/vit-classification.yaml | 16 +++++----- .../models/backbones/experimental/vit.py | 29 ++++++++++--------- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/config/model/vit/vit-classification.yaml b/config/model/vit/vit-classification.yaml index b4d07acf7..afcef1df4 100644 --- a/config/model/vit/vit-classification.yaml +++ b/config/model/vit/vit-classification.yaml @@ -8,13 +8,15 @@ model: full: ~ # auto backbone: name: vit - patch_size: 16 - hidden_size: 192 - num_blocks: 12 - num_attention_heads: 3 - attention_dropout_prob: 0.0 - intermediate_size: 768 # hidden_size * 4 - hidden_dropout_prob: 0.1 + params: + patch_size: 16 + hidden_size: 192 + num_blocks: 12 + num_attention_heads: 3 + attention_dropout_prob: 0.0 + intermediate_size: 768 # hidden_size * 4 + hidden_dropout_prob: 0.1 + stage_parmas: ~ head: name: fc losses: diff --git a/src/netspresso_trainer/models/backbones/experimental/vit.py b/src/netspresso_trainer/models/backbones/experimental/vit.py index f5344f20f..146d90d05 100644 --- a/src/netspresso_trainer/models/backbones/experimental/vit.py +++ b/src/netspresso_trainer/models/backbones/experimental/vit.py @@ -3,7 +3,7 @@ https://github.com/apple/ml-cvnets/blob/84d992f413e52c0468f86d23196efd9dad885e6f/cvnets/models/classification/vit.py """ import argparse -from typing import Any, Dict, Optional, Tuple, Union +from typing import Any, Dict, Optional, Tuple, Union, List import torch import torch.nn as nn @@ -93,19 +93,22 @@ def __init__(self, num_blocks, hidden_size, num_attention_heads, attention_dropo class VisionTransformer(MetaFormer): def __init__( self, - task, - patch_size, - hidden_size, - num_blocks, - num_attention_heads, - attention_dropout_prob, - intermediate_size, - hidden_dropout_prob, - layer_norm_eps=1e-6, - use_cls_token=True, - vocab_size=1000, - **kwargs + task: str, + params: Optional[List[Dict]], + stage_params: Optional[List[Dict]], ) -> None: + patch_size = params['patch_size'] + hidden_size = params['hidden_size'] + num_blocks = params['num_blocks'] + num_attention_heads = params['num_attention_heads'] + attention_dropout_prob = params['attention_dropout_prob'] + intermediate_size = params['intermediate_size'] + hidden_dropout_prob = params['hidden_dropout_prob'] + + layer_norm_eps = params['layer_norm_eps'] if 'layer_norm_eps' in params else 1e-6 + use_cls_token = params['use_cls_token'] if 'use_cls_token' in params else True + vocab_size = params['vocab_size'] if 'vocab_size' in params else 1000 + hidden_sizes = hidden_size if isinstance(hidden_size, list) else [hidden_size] * num_blocks super().__init__(hidden_sizes) self.task = task From 8baed416816ff444c60e3365a1838f26b1686597 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Tue, 14 Nov 2023 11:08:23 +0900 Subject: [PATCH 096/167] Set params default as None, and remove kwargs --- .../models/backbones/experimental/mobilenetv3.py | 5 ++--- .../models/backbones/experimental/resnet.py | 5 ++--- src/netspresso_trainer/models/backbones/experimental/vit.py | 4 ++-- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/netspresso_trainer/models/backbones/experimental/mobilenetv3.py b/src/netspresso_trainer/models/backbones/experimental/mobilenetv3.py index 7e0cf03d9..0c41ffed2 100644 --- a/src/netspresso_trainer/models/backbones/experimental/mobilenetv3.py +++ b/src/netspresso_trainer/models/backbones/experimental/mobilenetv3.py @@ -21,9 +21,8 @@ class MobileNetV3(nn.Module): def __init__( self, task: str, - params: Optional[List[Dict]], - stage_params: Optional[List[Dict]], - **kwargs + params: Optional[List[Dict]] = None, + stage_params: Optional[List[Dict]] = None, ) -> None: super(MobileNetV3, self).__init__() diff --git a/src/netspresso_trainer/models/backbones/experimental/resnet.py b/src/netspresso_trainer/models/backbones/experimental/resnet.py index 9a4e920ae..c0e9933d7 100644 --- a/src/netspresso_trainer/models/backbones/experimental/resnet.py +++ b/src/netspresso_trainer/models/backbones/experimental/resnet.py @@ -26,9 +26,8 @@ class ResNet(nn.Module): def __init__( self, task: str, - params: Optional[List[Dict]], - stage_params: Optional[List[Dict]], - **kwargs + params: Optional[List[Dict]] = None, + stage_params: Optional[List[Dict]] = None, ) -> None: super(ResNet, self).__init__() # Mandatory fields diff --git a/src/netspresso_trainer/models/backbones/experimental/vit.py b/src/netspresso_trainer/models/backbones/experimental/vit.py index 146d90d05..34ba69a70 100644 --- a/src/netspresso_trainer/models/backbones/experimental/vit.py +++ b/src/netspresso_trainer/models/backbones/experimental/vit.py @@ -94,8 +94,8 @@ class VisionTransformer(MetaFormer): def __init__( self, task: str, - params: Optional[List[Dict]], - stage_params: Optional[List[Dict]], + params: Optional[List[Dict]] = None, + stage_params: Optional[List[Dict]] = None, ) -> None: patch_size = params['patch_size'] hidden_size = params['hidden_size'] From 7d75ad604ff23c06497bfa6d2051c5c1bff3334f Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Tue, 14 Nov 2023 12:48:35 +0900 Subject: [PATCH 097/167] Change SegFormer model config --- .../segformer/segformer-classification.yaml | 47 ++++++++++++----- .../segformer/segformer-segmentation.yaml | 47 ++++++++++++----- .../backbones/experimental/segformer.py | 52 +++++++++++++------ 3 files changed, 105 insertions(+), 41 deletions(-) diff --git a/config/model/segformer/segformer-classification.yaml b/config/model/segformer/segformer-classification.yaml index 134920cfa..75469a4cd 100644 --- a/config/model/segformer/segformer-classification.yaml +++ b/config/model/segformer/segformer-classification.yaml @@ -8,18 +8,41 @@ model: full: ~ # auto backbone: name: segformer - num_modules: 4 # `num_encoder_blocks` in original - num_blocks: [2, 2, 2, 2] # `depth` in original - sr_ratios: [8, 4, 2, 1] - hidden_sizes: [32, 64, 160, 256] - embedding_patch_sizes: [7, 3, 3, 3] - embedding_strides: [4, 2, 2, 2] - num_attention_heads: [1, 2, 5, 8] - intermediate_ratio: 4 - hidden_activation_type: "gelu" - hidden_dropout_prob: 0.0 - attention_dropout_prob: 0.0 - layer_norm_eps: 1e-5 + params: + intermediate_ratio: 4 + hidden_activation_type: "gelu" + hidden_dropout_prob: 0.0 + attention_dropout_prob: 0.0 + layer_norm_eps: 1e-5 + stage_params: + - + num_blocks: 2 + sr_ratios: 8 + hidden_sizes: 32 + embedding_patch_sizes: 7 + embedding_strides: 4 + num_attention_heads: 1 + - + num_blocks: 2 + sr_ratios: 4 + hidden_sizes: 64 + embedding_patch_sizes: 3 + embedding_strides: 2 + num_attention_heads: 2 + - + num_blocks: 2 + sr_ratios: 2 + hidden_sizes: 160 + embedding_patch_sizes: 3 + embedding_strides: 2 + num_attention_heads: 5 + - + num_blocks: 2 + sr_ratios: 1 + hidden_sizes: 256 + embedding_patch_sizes: 3 + embedding_strides: 2 + num_attention_heads: 8 head: name: fc losses: diff --git a/config/model/segformer/segformer-segmentation.yaml b/config/model/segformer/segformer-segmentation.yaml index bb990dfcd..c739d8f56 100644 --- a/config/model/segformer/segformer-segmentation.yaml +++ b/config/model/segformer/segformer-segmentation.yaml @@ -8,18 +8,41 @@ model: full: ~ # auto backbone: name: segformer - num_modules: 4 # `num_encoder_blocks` in original - num_blocks: [2, 2, 2, 2] # `depth` in original - sr_ratios: [8, 4, 2, 1] - hidden_sizes: [32, 64, 160, 256] - embedding_patch_sizes: [7, 3, 3, 3] - embedding_strides: [4, 2, 2, 2] - num_attention_heads: [1, 2, 5, 8] - intermediate_ratio: 4 - hidden_activation_type: "gelu" - hidden_dropout_prob: 0.0 - attention_dropout_prob: 0.0 - layer_norm_eps: 1e-5 + params: + intermediate_ratio: 4 + hidden_activation_type: "gelu" + hidden_dropout_prob: 0.0 + attention_dropout_prob: 0.0 + layer_norm_eps: 1e-5 + stage_params: + - + num_blocks: 2 + sr_ratios: 8 + hidden_sizes: 32 + embedding_patch_sizes: 7 + embedding_strides: 4 + num_attention_heads: 1 + - + num_blocks: 2 + sr_ratios: 4 + hidden_sizes: 64 + embedding_patch_sizes: 3 + embedding_strides: 2 + num_attention_heads: 2 + - + num_blocks: 2 + sr_ratios: 2 + hidden_sizes: 160 + embedding_patch_sizes: 3 + embedding_strides: 2 + num_attention_heads: 5 + - + num_blocks: 2 + sr_ratios: 1 + hidden_sizes: 256 + embedding_patch_sizes: 3 + embedding_strides: 2 + num_attention_heads: 8 head: name: all_mlp_decoder losses: diff --git a/src/netspresso_trainer/models/backbones/experimental/segformer.py b/src/netspresso_trainer/models/backbones/experimental/segformer.py index 9fb55c87b..77b74df25 100644 --- a/src/netspresso_trainer/models/backbones/experimental/segformer.py +++ b/src/netspresso_trainer/models/backbones/experimental/segformer.py @@ -1,5 +1,5 @@ import math -from typing import Optional +from typing import Optional, List, Dict import torch import torch.nn as nn @@ -135,42 +135,60 @@ def forward(self, x, height, width): class SegFormer(MetaFormer): - def __init__(self, task, num_modules, num_blocks, embedding_patch_sizes, embedding_strides, hidden_sizes, - num_attention_heads, attention_dropout_prob, sr_ratios, - intermediate_ratio, hidden_dropout_prob, hidden_activation_type, layer_norm_eps, - **kwargs): - super().__init__(hidden_sizes) + def __init__( + self, + task: str, + params: Optional[List[Dict]] = None, + stage_params: Optional[List[Dict]] = None, + **kwargs, + ) -> None: + super().__init__([stage['hidden_sizes'] for stage in stage_params]) self.task = task self.use_intermediate_features = self.task in ['segmentation', 'detection'] - image_channels = 3 + intermediate_ratio = params['intermediate_ratio'] + hidden_activation_type = params['hidden_activation_type'] + hidden_dropout_prob = params['hidden_dropout_prob'] + attention_dropout_prob = params['attention_dropout_prob'] + layer_norm_eps = params['layer_norm_eps'] + + in_channels = 3 self.encoder_modules = nn.ModuleList() - for i in range(num_modules): + for blocks in stage_params: + num_blocks = blocks['num_blocks'] + sr_ratios = blocks['sr_ratios'] + hidden_sizes = blocks['hidden_sizes'] + embedding_patch_sizes = blocks['embedding_patch_sizes'] + embedding_strides = blocks['embedding_strides'] + num_attention_heads = blocks['num_attention_heads'] + module = nn.ModuleDict( { 'patch_embed': SegformerOverlapPatchEmbeddings( - embedding_patch_sizes[i], - embedding_strides[i], - image_channels if i == 0 else hidden_sizes[i - 1], - hidden_sizes[i] + embedding_patch_sizes, + embedding_strides, + in_channels, + hidden_sizes ), 'encoder': SegformerEncoder( - num_blocks[i], - hidden_sizes[i], - num_attention_heads[i], + num_blocks, + hidden_sizes, + num_attention_heads, attention_dropout_prob, - sr_ratios[i], + sr_ratios, intermediate_ratio, hidden_dropout_prob, hidden_activation_type, layer_norm_eps ), - 'norm': nn.LayerNorm(hidden_sizes[i]) + 'norm': nn.LayerNorm(hidden_sizes) } ) self.encoder_modules.append(module) + in_channels = hidden_sizes + def forward(self, x): B = x.size(0) all_hidden_states = () if self.use_intermediate_features else None From 2cd37239d6f71b9fc27a933b8f1d59be3a3451d6 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Tue, 14 Nov 2023 12:49:03 +0900 Subject: [PATCH 098/167] Revert kwargs --- .../models/backbones/experimental/mobilenetv3.py | 1 + src/netspresso_trainer/models/backbones/experimental/resnet.py | 1 + src/netspresso_trainer/models/backbones/experimental/vit.py | 1 + 3 files changed, 3 insertions(+) diff --git a/src/netspresso_trainer/models/backbones/experimental/mobilenetv3.py b/src/netspresso_trainer/models/backbones/experimental/mobilenetv3.py index 0c41ffed2..3886a5dd9 100644 --- a/src/netspresso_trainer/models/backbones/experimental/mobilenetv3.py +++ b/src/netspresso_trainer/models/backbones/experimental/mobilenetv3.py @@ -23,6 +23,7 @@ def __init__( task: str, params: Optional[List[Dict]] = None, stage_params: Optional[List[Dict]] = None, + **kwargs, ) -> None: super(MobileNetV3, self).__init__() diff --git a/src/netspresso_trainer/models/backbones/experimental/resnet.py b/src/netspresso_trainer/models/backbones/experimental/resnet.py index c0e9933d7..f50c9a824 100644 --- a/src/netspresso_trainer/models/backbones/experimental/resnet.py +++ b/src/netspresso_trainer/models/backbones/experimental/resnet.py @@ -28,6 +28,7 @@ def __init__( task: str, params: Optional[List[Dict]] = None, stage_params: Optional[List[Dict]] = None, + **kwargs, ) -> None: super(ResNet, self).__init__() # Mandatory fields diff --git a/src/netspresso_trainer/models/backbones/experimental/vit.py b/src/netspresso_trainer/models/backbones/experimental/vit.py index 34ba69a70..da40d370e 100644 --- a/src/netspresso_trainer/models/backbones/experimental/vit.py +++ b/src/netspresso_trainer/models/backbones/experimental/vit.py @@ -96,6 +96,7 @@ def __init__( task: str, params: Optional[List[Dict]] = None, stage_params: Optional[List[Dict]] = None, + **kwargs, ) -> None: patch_size = params['patch_size'] hidden_size = params['hidden_size'] From 6104f9bd8f79274b966bc81bf4186ff18e0b0fb6 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Tue, 14 Nov 2023 13:34:09 +0900 Subject: [PATCH 099/167] Change EfficientFormer model config --- .../efficientformer-l1-classification.yaml | 55 ++++++++++++------- .../efficientformer-l1-detection.yaml | 55 ++++++++++++------- .../efficientformer-l1-segmentation.yaml | 55 ++++++++++++------- .../backbones/experimental/efficientformer.py | 39 +++++++++---- 4 files changed, 134 insertions(+), 70 deletions(-) diff --git a/config/model/efficientformer/efficientformer-l1-classification.yaml b/config/model/efficientformer/efficientformer-l1-classification.yaml index 55aeb99e3..d1eca5c1a 100644 --- a/config/model/efficientformer/efficientformer-l1-classification.yaml +++ b/config/model/efficientformer/efficientformer-l1-classification.yaml @@ -8,26 +8,41 @@ model: full: ~ # auto backbone: name: efficientformer - num_blocks: [3, 2, 6, 4] - hidden_sizes: [48, 96, 224, 448] - num_attention_heads: 8 - attention_hidden_size: 256 # attention_hidden_size_splitted * num_attention_heads - attention_dropout_prob: 0. - attention_ratio: 4 - attention_bias_resolution: 16 - pool_size: 3 - intermediate_ratio: 4 - hidden_dropout_prob: 0. - hidden_activation_type: 'gelu' - layer_norm_eps: 1e-5 - drop_path_rate: 0. - use_layer_scale: True - layer_scale_init_value: 1e-5 - downsamples: [True, True, True, True] - down_patch_size: 3 - down_stride: 2 - down_pad: 1 - vit_num: 1 + params: + num_attention_heads: 8 + attention_hidden_size: 256 # attention_hidden_size_splitted * num_attention_heads + attention_dropout_prob: 0. + attention_ratio: 4 + attention_bias_resolution: 16 + pool_size: 3 + intermediate_ratio: 4 + hidden_dropout_prob: 0. + hidden_activation_type: 'gelu' + layer_norm_eps: 1e-5 + drop_path_rate: 0. + use_layer_scale: True + layer_scale_init_value: 1e-5 + down_patch_size: 3 + down_stride: 2 + down_pad: 1 + vit_num: 1 + stage_params: + - + num_blocks: 3 + hidden_sizes: 48 + downsamples: True + - + num_blocks: 2 + hidden_sizes: 96 + downsamples: True + - + num_blocks: 6 + hidden_sizes: 224 + downsamples: True + - + num_blocks: 4 + hidden_sizes: 448 + downsamples: True head: name: fc losses: diff --git a/config/model/efficientformer/efficientformer-l1-detection.yaml b/config/model/efficientformer/efficientformer-l1-detection.yaml index e81da42f9..d62a461b5 100644 --- a/config/model/efficientformer/efficientformer-l1-detection.yaml +++ b/config/model/efficientformer/efficientformer-l1-detection.yaml @@ -8,26 +8,41 @@ model: full: ~ # auto backbone: name: efficientformer - num_blocks: [3, 2, 6, 4] - hidden_sizes: [48, 96, 224, 448] - num_attention_heads: 8 - attention_hidden_size: 256 # attention_hidden_size_splitted * num_attention_heads - attention_dropout_prob: 0. - attention_ratio: 4 - attention_bias_resolution: 16 - pool_size: 3 - intermediate_ratio: 4 - hidden_dropout_prob: 0. - hidden_activation_type: 'gelu' - layer_norm_eps: 1e-5 - drop_path_rate: 0. - use_layer_scale: True - layer_scale_init_value: 1e-5 - downsamples: [True, True, True, True] - down_patch_size: 3 - down_stride: 2 - down_pad: 1 - vit_num: 1 + params: + num_attention_heads: 8 + attention_hidden_size: 256 # attention_hidden_size_splitted * num_attention_heads + attention_dropout_prob: 0. + attention_ratio: 4 + attention_bias_resolution: 16 + pool_size: 3 + intermediate_ratio: 4 + hidden_dropout_prob: 0. + hidden_activation_type: 'gelu' + layer_norm_eps: 1e-5 + drop_path_rate: 0. + use_layer_scale: True + layer_scale_init_value: 1e-5 + down_patch_size: 3 + down_stride: 2 + down_pad: 1 + vit_num: 1 + stage_params: + - + num_blocks: 3 + hidden_sizes: 48 + downsamples: True + - + num_blocks: 2 + hidden_sizes: 96 + downsamples: True + - + num_blocks: 6 + hidden_sizes: 224 + downsamples: True + - + num_blocks: 4 + hidden_sizes: 448 + downsamples: True head: name: faster_rcnn losses: diff --git a/config/model/efficientformer/efficientformer-l1-segmentation.yaml b/config/model/efficientformer/efficientformer-l1-segmentation.yaml index 029abf54f..cb5e8c729 100644 --- a/config/model/efficientformer/efficientformer-l1-segmentation.yaml +++ b/config/model/efficientformer/efficientformer-l1-segmentation.yaml @@ -8,26 +8,41 @@ model: full: ~ # auto backbone: name: efficientformer - num_blocks: [3, 2, 6, 4] - hidden_sizes: [48, 96, 224, 448] - num_attention_heads: 8 - attention_hidden_size: 256 # attention_hidden_size_splitted * num_attention_heads - attention_dropout_prob: 0. - attention_ratio: 4 - attention_bias_resolution: 16 - pool_size: 3 - intermediate_ratio: 4 - hidden_dropout_prob: 0. - hidden_activation_type: 'gelu' - layer_norm_eps: 1e-5 - drop_path_rate: 0. - use_layer_scale: True - layer_scale_init_value: 1e-5 - downsamples: [True, True, True, True] - down_patch_size: 3 - down_stride: 2 - down_pad: 1 - vit_num: 1 + params: + num_attention_heads: 8 + attention_hidden_size: 256 # attention_hidden_size_splitted * num_attention_heads + attention_dropout_prob: 0. + attention_ratio: 4 + attention_bias_resolution: 16 + pool_size: 3 + intermediate_ratio: 4 + hidden_dropout_prob: 0. + hidden_activation_type: 'gelu' + layer_norm_eps: 1e-5 + drop_path_rate: 0. + use_layer_scale: True + layer_scale_init_value: 1e-5 + down_patch_size: 3 + down_stride: 2 + down_pad: 1 + vit_num: 1 + stage_params: + - + num_blocks: 3 + hidden_sizes: 48 + downsamples: True + - + num_blocks: 2 + hidden_sizes: 96 + downsamples: True + - + num_blocks: 6 + hidden_sizes: 224 + downsamples: True + - + num_blocks: 4 + hidden_sizes: 448 + downsamples: True head: name: all_mlp_decoder losses: diff --git a/src/netspresso_trainer/models/backbones/experimental/efficientformer.py b/src/netspresso_trainer/models/backbones/experimental/efficientformer.py index 6ba4a5e3c..01a13a4ab 100644 --- a/src/netspresso_trainer/models/backbones/experimental/efficientformer.py +++ b/src/netspresso_trainer/models/backbones/experimental/efficientformer.py @@ -5,7 +5,7 @@ import itertools import math import os -from typing import Dict, Optional +from typing import Dict, Optional, List import torch import torch.nn as nn @@ -329,15 +329,34 @@ def forward(self, x): class EfficientFormer(MetaFormer): def __init__( - self, task, num_blocks, hidden_sizes, - num_attention_heads, attention_hidden_size, attention_dropout_prob, - attention_ratio, attention_bias_resolution, - pool_size, intermediate_ratio, hidden_dropout_prob, hidden_activation_type, - layer_norm_eps, - drop_path_rate=0., use_layer_scale=True, layer_scale_init_value=1e-5, - downsamples=None, down_patch_size=3, down_stride=2, down_pad=1, - vit_num=1, **kwargs - ): + self, + task: str, + params: Optional[List[Dict]] = None, + stage_params: Optional[List[Dict]] = None, + **kwargs, + ) -> None: + + num_blocks = [stage['num_blocks'] for stage in stage_params] + hidden_sizes = [stage['hidden_sizes'] for stage in stage_params] + downsamples = [stage['downsamples'] for stage in stage_params] + + num_attention_heads = params['num_attention_heads'] + attention_hidden_size = params['attention_hidden_size'] + attention_dropout_prob = params['attention_dropout_prob'] + attention_ratio = params['attention_ratio'] + attention_bias_resolution = params['attention_bias_resolution'] + pool_size = params['pool_size'] + intermediate_ratio = params['intermediate_ratio'] + hidden_dropout_prob = params['hidden_dropout_prob'] + hidden_activation_type = params['hidden_activation_type'] + layer_norm_eps = params['layer_norm_eps'] + drop_path_rate = params['drop_path_rate'] + use_layer_scale = params['use_layer_scale'] + layer_scale_init_value = params['layer_scale_init_value'] + down_patch_size = params['down_patch_size'] + down_stride = params['down_stride'] + down_pad = params['down_pad'] + vit_num = params['vit_num'] super().__init__(hidden_sizes) self.task = task.lower() From 89db112dd467217849dbd7f5edb88eaf287da166 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Tue, 14 Nov 2023 14:10:30 +0900 Subject: [PATCH 100/167] Change MobileViT model config --- .../mobilevit/mobilevit-s-classification.yaml | 79 ++++++++++++++----- .../backbones/experimental/mobilevit.py | 65 ++++++++------- 2 files changed, 98 insertions(+), 46 deletions(-) diff --git a/config/model/mobilevit/mobilevit-s-classification.yaml b/config/model/mobilevit/mobilevit-s-classification.yaml index c3cb1cdfb..5dc91c29f 100644 --- a/config/model/mobilevit/mobilevit-s-classification.yaml +++ b/config/model/mobilevit/mobilevit-s-classification.yaml @@ -8,24 +8,67 @@ model: full: ~ # auto backbone: name: mobilevit - out_channels: [32, 64, 96, 128, 160] - block_type: ['mv2', 'mv2', 'mobilevit', 'mobilevit', 'mobilevit'] - num_blocks: [1, 3, None, None, None] - stride: [1, 2, 2, 2, 2] - hidden_size: [None, None, 144, 192, 240] - intermediate_size: [None, None, 288, 384, 480] - num_transformer_blocks: [None, None, 2, 4, 3] - dilate: [None, None, False, False, False] - expand_ratio: [4, 4, 4, 4, 4] # [mv2_exp_mult] * 4 - patch_embedding_out_channels: 16 - local_kernel_size: 3 - patch_size: 2 - num_attention_heads: 4 # num_heads - attention_dropout_prob: 0.1 - hidden_dropout_prob: 0.0 - exp_factor: 4 - layer_norm_eps: 1e-5 - use_fusion_layer: True + params: + patch_embedding_out_channels: 16 + local_kernel_size: 3 + patch_size: 2 + num_attention_heads: 4 # num_heads + attention_dropout_prob: 0.1 + hidden_dropout_prob: 0.0 + exp_factor: 4 + layer_norm_eps: 1e-5 + use_fusion_layer: True + stage_params: + - + out_channels: 32 + block_type: 'mv2' + num_blocks: 1 + stride: 1 + hidden_size: None + intermediate_size: None + num_transformer_blocks: None + dilate: None + expand_ratio: 4 # [mv2_exp_mult] * 4 + - + out_channels: 64 + block_type: 'mv2' + num_blocks: 3 + stride: 2 + hidden_size: None + intermediate_size: None + num_transformer_blocks: None + dilate: None + expand_ratio: 4 # [mv2_exp_mult] * 4 + - + out_channels: 96 + block_type: 'mobilevit' + num_blocks: None + stride: 2 + hidden_size: 144 + intermediate_size: 288 + num_transformer_blocks: 2 + dilate: False + expand_ratio: 4 # [mv2_exp_mult] * 4 + - + out_channels: 128 + block_type: 'mobilevit' + num_blocks: None + stride: 2 + hidden_size: 192 + intermediate_size: 384 + num_transformer_blocks: 4 + dilate: False + expand_ratio: 4 # [mv2_exp_mult] * 4 + - + out_channels: 160 + block_type: 'mobilevit' + num_blocks: None + stride: 2 + hidden_size: 240 + intermediate_size: 480 + num_transformer_blocks: 3 + dilate: False + expand_ratio: 4 # [mv2_exp_mult] * 4 head: name: fc losses: diff --git a/src/netspresso_trainer/models/backbones/experimental/mobilevit.py b/src/netspresso_trainer/models/backbones/experimental/mobilevit.py index 1adbd28e0..719315859 100644 --- a/src/netspresso_trainer/models/backbones/experimental/mobilevit.py +++ b/src/netspresso_trainer/models/backbones/experimental/mobilevit.py @@ -5,7 +5,7 @@ import argparse import math -from typing import Any, Dict, Literal, Optional, Tuple, Union +from typing import Any, Dict, Literal, Optional, Tuple, Union, List import torch import torch.nn as nn @@ -252,27 +252,38 @@ def forward( return out class MobileViTEncoder(MetaFormerEncoder): - def __init__(self, out_channels, block_type, num_blocks, stride, hidden_size, intermediate_size, num_transformer_blocks, dilate, expand_ratio, - patch_embedding_out_channels, local_kernel_size, patch_size, - num_attention_heads, attention_dropout_prob, hidden_dropout_prob, layer_norm_eps, use_fusion_layer) -> None: + def __init__( + self, + params: Optional[List[Dict]], + stage_params: Optional[List[Dict]], + ) -> None: super().__init__() stages = [] self.dilation = 1 - self.local_kernel_size = local_kernel_size - self.patch_size = patch_size - self.num_attention_heads = num_attention_heads - self.attention_dropout_prob = attention_dropout_prob - self.hidden_dropout_prob = hidden_dropout_prob - self.layer_norm_eps = layer_norm_eps - self.use_fusion_layer = use_fusion_layer + self.local_kernel_size = params['local_kernel_size'] + self.patch_size = params['patch_size'] + self.num_attention_heads = params['num_attention_heads'] + self.attention_dropout_prob = params['attention_dropout_prob'] + self.hidden_dropout_prob = params['hidden_dropout_prob'] + self.layer_norm_eps = params['layer_norm_eps'] + self.use_fusion_layer = params['use_fusion_layer'] - in_channels = patch_embedding_out_channels - for idx in range(len(out_channels)): - stages.append(self._make_block(out_channels[idx], block_type[idx], num_blocks[idx], stride[idx], hidden_size[idx], - intermediate_size[idx], num_transformer_blocks[idx], dilate[idx], expand_ratio[idx], + in_channels = params['patch_embedding_out_channels'] + for stage in stage_params: + out_channels = stage['out_channels'] + block_type = stage['block_type'] + num_blocks = stage['num_blocks'] + stride = stage['stride'] + hidden_size = stage['hidden_size'] + intermediate_size = stage['intermediate_size'] + num_transformer_blocks = stage['num_transformer_blocks'] + dilate = stage['dilate'] + expand_ratio = stage['expand_ratio'] + stages.append(self._make_block(out_channels, block_type, num_blocks, stride, hidden_size, + intermediate_size, num_transformer_blocks, dilate, expand_ratio, in_channels)) - in_channels = out_channels[idx] + in_channels = out_channels self.blocks = nn.Sequential(*stages) def _make_block(self, out_channels, block_type: Literal['mv2', 'mobilevit'], num_blocks, stride, hidden_size, intermediate_size, num_transformer_blocks, dilate, expand_ratio, in_channels): @@ -346,26 +357,24 @@ def _make_mobilevit_blocks(self, num_transformer_blocks, in_channels, out_channe class MobileViT(MetaFormer): def __init__( - self, task, - out_channels, block_type, num_blocks, stride, hidden_size, intermediate_size, num_transformer_blocks, dilate, expand_ratio, - patch_embedding_out_channels, local_kernel_size, patch_size, - num_attention_heads, attention_dropout_prob, hidden_dropout_prob, - exp_factor, layer_norm_eps=1e-6, use_fusion_layer = True, - **kwargs + self, + task: str, + params: Optional[List[Dict]] = None, + stage_params: Optional[List[Dict]] = None, + **kwargs, ) -> None: - exp_channels = min(exp_factor * out_channels[-1], 960) - hidden_sizes = out_channels + [exp_channels] + exp_channels = min(params['exp_factor'] * stage_params[-1]['out_channels'], 960) + hidden_sizes = [stage['out_channels'] for stage in stage_params] + [exp_channels] super().__init__(hidden_sizes) self.task = task self.intermediate_features = self.task in ['segmentation', 'detection'] image_channels = 3 - self.patch_embed = MobileViTEmbeddings(image_channels, patch_embedding_out_channels) - self.encoder = MobileViTEncoder(out_channels, block_type, num_blocks, stride, hidden_size, intermediate_size, num_transformer_blocks, dilate, expand_ratio, - patch_embedding_out_channels, local_kernel_size, patch_size, num_attention_heads, attention_dropout_prob, hidden_dropout_prob, layer_norm_eps, use_fusion_layer) + self.patch_embed = MobileViTEmbeddings(image_channels, params['patch_embedding_out_channels']) + self.encoder = MobileViTEncoder(params=params, stage_params=stage_params) - self.conv_1x1_exp = ConvLayer(in_channels=out_channels[-1], out_channels=exp_channels, + self.conv_1x1_exp = ConvLayer(in_channels=stage_params[-1]['out_channels'], out_channels=exp_channels, kernel_size=1, stride=1, use_act=True, use_norm=True, act_type='silu') self.pool = GlobalPool(pool_type="mean", keep_dim=False) From 53d57bd3b49ac95c2dc21f2c7cf1e91e6c7b51b7 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Tue, 14 Nov 2023 14:39:25 +0900 Subject: [PATCH 101/167] Change YOLOX model config --- config/model/yolox/yolox-detection.yaml | 7 +++++-- .../models/backbones/experimental/darknet.py | 19 ++++++++++++------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/config/model/yolox/yolox-detection.yaml b/config/model/yolox/yolox-detection.yaml index 0f3c2e13b..49aef68a6 100644 --- a/config/model/yolox/yolox-detection.yaml +++ b/config/model/yolox/yolox-detection.yaml @@ -8,8 +8,11 @@ model: full: ~ # auto backbone: name: cspdarknet - dep_mul: 0.33 - wid_mul: 0.5 + params: + dep_mul: 0.33 + wid_mul: 0.5 + act_type: "silu" + stage_params: ~ head: name: yolox_head losses: diff --git a/src/netspresso_trainer/models/backbones/experimental/darknet.py b/src/netspresso_trainer/models/backbones/experimental/darknet.py index 3759c0c94..af3fdd909 100644 --- a/src/netspresso_trainer/models/backbones/experimental/darknet.py +++ b/src/netspresso_trainer/models/backbones/experimental/darknet.py @@ -2,6 +2,7 @@ Based on the Darknet implementation of Megvii. https://github.com/Megvii-BaseDetection/YOLOX/blob/main/yolox/models/darknet.py """ +from typing import Dict, Optional, List import torch from torch import nn @@ -14,22 +15,26 @@ class CSPDarknet(nn.Module): + def __init__( self, - task, - dep_mul, - wid_mul, - out_features=("dark3", "dark4", "dark5"), + task: str, + params: Optional[List[Dict]] = None, + stage_params: Optional[List[Dict]] = None, #depthwise=False, - act_type="silu", - **kwargs - ): + **kwargs, + ) -> None: super().__init__() + out_features=("dark3", "dark4", "dark5") assert out_features, "please provide output features of Darknet" self.task = task.lower() self.use_intermediate_features = self.task in ['segmentation', 'detection'] + dep_mul = params['dep_mul'] + wid_mul = params['wid_mul'] + act_type = params['act_type'] + self.out_features = out_features Conv = ConvLayer From f387364b2b916d8f4a4710cd63c200ea27a93adf Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Tue, 14 Nov 2023 14:46:49 +0900 Subject: [PATCH 102/167] Fix typo --- config/model/resnet/resnet50-classification.yaml | 12 ++++++++---- config/model/resnet/resnet50-segmentation.yaml | 12 ++++++++---- config/model/vit/vit-classification.yaml | 2 +- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/config/model/resnet/resnet50-classification.yaml b/config/model/resnet/resnet50-classification.yaml index 9bf1946a4..f4ec36095 100644 --- a/config/model/resnet/resnet50-classification.yaml +++ b/config/model/resnet/resnet50-classification.yaml @@ -14,15 +14,19 @@ model: groups: 1 width_per_group: 64 stage_params: - - plane: 64 + - + plane: 64 layers: 3 - - plane: 128 + - + plane: 128 layers: 4 replace_stride_with_dilation: False - - plane: 256 + - + plane: 256 layers: 6 replace_stride_with_dilation: False - - plane: 512 + - + plane: 512 layers: 3 replace_stride_with_dilation: False head: diff --git a/config/model/resnet/resnet50-segmentation.yaml b/config/model/resnet/resnet50-segmentation.yaml index 286a6cad0..448a2c19b 100644 --- a/config/model/resnet/resnet50-segmentation.yaml +++ b/config/model/resnet/resnet50-segmentation.yaml @@ -15,15 +15,19 @@ model: groups: 1 width_per_group: 64 stage_params: - - plane: 64 + - + plane: 64 layers: 3 - - plane: 128 + - + plane: 128 layers: 4 replace_stride_with_dilation: False - - plane: 256 + - + plane: 256 layers: 6 replace_stride_with_dilation: False - - plane: 512 + - + plane: 512 layers: 3 replace_stride_with_dilation: False head: diff --git a/config/model/vit/vit-classification.yaml b/config/model/vit/vit-classification.yaml index afcef1df4..e9d2c0050 100644 --- a/config/model/vit/vit-classification.yaml +++ b/config/model/vit/vit-classification.yaml @@ -16,7 +16,7 @@ model: attention_dropout_prob: 0.0 intermediate_size: 768 # hidden_size * 4 hidden_dropout_prob: 0.1 - stage_parmas: ~ + stage_params: ~ head: name: fc losses: From 674d86088b253b5454b8d1334effb457cfdbe312 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Tue, 14 Nov 2023 14:54:49 +0900 Subject: [PATCH 103/167] Remove defaults --- config/model/resnet/resnet50-classification.yaml | 2 ++ config/model/vit/vit-classification.yaml | 3 +++ .../models/backbones/experimental/resnet.py | 13 ++++++------- .../models/backbones/experimental/vit.py | 7 +++---- 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/config/model/resnet/resnet50-classification.yaml b/config/model/resnet/resnet50-classification.yaml index f4ec36095..5a2a1e627 100644 --- a/config/model/resnet/resnet50-classification.yaml +++ b/config/model/resnet/resnet50-classification.yaml @@ -13,6 +13,8 @@ model: norm_layer: batch_norm groups: 1 width_per_group: 64 + zero_init_residual: False + expansion: ~ stage_params: - plane: 64 diff --git a/config/model/vit/vit-classification.yaml b/config/model/vit/vit-classification.yaml index e9d2c0050..29b2bf84e 100644 --- a/config/model/vit/vit-classification.yaml +++ b/config/model/vit/vit-classification.yaml @@ -16,6 +16,9 @@ model: attention_dropout_prob: 0.0 intermediate_size: 768 # hidden_size * 4 hidden_dropout_prob: 0.1 + layer_norm_eps: 1e-6 + use_cls_token: True + vocab_size: 1000 stage_params: ~ head: name: fc diff --git a/src/netspresso_trainer/models/backbones/experimental/resnet.py b/src/netspresso_trainer/models/backbones/experimental/resnet.py index f50c9a824..239ffc95e 100644 --- a/src/netspresso_trainer/models/backbones/experimental/resnet.py +++ b/src/netspresso_trainer/models/backbones/experimental/resnet.py @@ -31,14 +31,13 @@ def __init__( **kwargs, ) -> None: super(ResNet, self).__init__() - # Mandatory fields + block: Literal['basicblock', 'bottleneck'] = params['block'] - # Fields with defaults - zero_init_residual: bool = params['zero_init_residual'] if 'zero_init_residual' in params else False - groups: int = params['groups'] if 'groups' in params else 1 - width_per_group: int = params['width_per_group'] if 'width_per_group' in params else 64 - norm_layer: Optional[str] = params['norm_layer'] if 'norm_layer' in params else None - expansion: Optional[int] = params['expansion'] if 'expansion' in params else None + zero_init_residual: bool = params['zero_init_residual'] + groups: int = params['groups'] + width_per_group: int = params['width_per_group'] + norm_layer: Optional[str] = params['norm_layer'] + expansion: Optional[int] = params['expansion'] self.task = task.lower() block = BLOCK_FROM_LITERAL[block.lower()] diff --git a/src/netspresso_trainer/models/backbones/experimental/vit.py b/src/netspresso_trainer/models/backbones/experimental/vit.py index da40d370e..07a14ef19 100644 --- a/src/netspresso_trainer/models/backbones/experimental/vit.py +++ b/src/netspresso_trainer/models/backbones/experimental/vit.py @@ -105,10 +105,9 @@ def __init__( attention_dropout_prob = params['attention_dropout_prob'] intermediate_size = params['intermediate_size'] hidden_dropout_prob = params['hidden_dropout_prob'] - - layer_norm_eps = params['layer_norm_eps'] if 'layer_norm_eps' in params else 1e-6 - use_cls_token = params['use_cls_token'] if 'use_cls_token' in params else True - vocab_size = params['vocab_size'] if 'vocab_size' in params else 1000 + layer_norm_eps = params['layer_norm_eps'] + use_cls_token = params['use_cls_token'] + vocab_size = params['vocab_size'] hidden_sizes = hidden_size if isinstance(hidden_size, list) else [hidden_size] * num_blocks super().__init__(hidden_sizes) From c32269f6bfec68bcfed2ef8f4dd2d6ed605f7407 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Tue, 14 Nov 2023 14:56:25 +0900 Subject: [PATCH 104/167] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 446e57776..7ffb9cc0d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ - Enable dataset augmentation customizing by `@illian01` in [PR 201](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/201) - Add postprocessor module by `@illian01` in [PR 223](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/223) +- Equalize the model backbone configuration format by `@illian01` in [PR 228](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/228) ## Other Changes: From 5a3ebb28b75fefc18966ef1d3f8eac208febbd3b Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Tue, 14 Nov 2023 18:41:20 +0900 Subject: [PATCH 105/167] Update cfg.model --- src/netspresso_trainer/cfg/model.py | 286 ++++++++++++++++++++-------- 1 file changed, 204 insertions(+), 82 deletions(-) diff --git a/src/netspresso_trainer/cfg/model.py b/src/netspresso_trainer/cfg/model.py index ad4a3aa12..518b0f4b4 100644 --- a/src/netspresso_trainer/cfg/model.py +++ b/src/netspresso_trainer/cfg/model.py @@ -44,26 +44,31 @@ class ModelConfig: class EfficientFormerArchitectureConfig(ArchitectureConfig): backbone: Dict[str, Any] = field(default_factory=lambda: { "name": "efficientformer", - "num_blocks": [3, 2, 6, 4], - "hidden_sizes": [48, 96, 224, 448], - "num_attention_heads": 8, - "attention_hidden_size": 256, # attention_hidden_size_splitted * num_attention_heads - "attention_dropout_prob": 0., - "attention_ratio": 4, - "attention_bias_resolution": 16, - "pool_size": 3, - "intermediate_ratio": 4, - "hidden_dropout_prob": 0., - "hidden_activation_type": 'gelu', - "layer_norm_eps": 1e-5, - "drop_path_rate": 0., - "use_layer_scale": True, - "layer_scale_init_value": 1e-5, - "downsamples": [True, True, True, True], - "down_patch_size": 3, - "down_stride": 2, - "down_pad": 1, - "vit_num": 1, + "params": { + "num_attention_heads": 8, + "attention_hidden_size": 256, + "attention_dropout_prob": 0., + "attention_ratio": 4, + "attention_bias_resolution": 16, + "pool_size": 3, + "intermediate_ratio": 4, + "hidden_dropout_prob": 0., + "hidden_activation_type": 'gelu', + "layer_norm_eps": 1e-5, + "drop_path_rate": 0., + "use_layer_scale": True, + "layer_scale_init_value": 1e-5, + "down_patch_size": 3, + "down_stride": 2, + "down_pad": 1, + "vit_num": 1, + }, + "stage_params": [ + {"num_blocks": 3, "hidden_sizes": 48, "downsamples": True}, + {"num_blocks": 2, "hidden_sizes": 96, "downsamples": True}, + {"num_blocks": 6, "hidden_sizes": 224, "downsamples": True}, + {"num_blocks": 4, "hidden_sizes": 448, "downsamples": True}, + ], }) @@ -71,29 +76,49 @@ class EfficientFormerArchitectureConfig(ArchitectureConfig): class MobileNetV3ArchitectureConfig(ArchitectureConfig): backbone: Dict[str, Any] = field(default_factory=lambda: { "name": "mobilenetv3_small", - - # [in_channels, kernel, expended_channels, out_channels, use_se, activation, stride, dilation] - "block_info": [ - [ - [16, 3, 16, 16, True, "relu", 2, 1] - ], - [ - [16, 3, 72, 24, False, "relu", 2, 1], - [24, 3, 88, 24, False, "relu", 1, 1] - ], - [ - [24, 5, 96, 40, True, "hard_swish", 2, 1], - [40, 5, 240, 40, True, "hard_swish", 1, 1], - [40, 5, 240, 40, True, "hard_swish", 1, 1], - [40, 5, 120, 48, True, "hard_swish", 1, 1], - [48, 5, 144, 48, True, "hard_swish", 1, 1] - ], - [ - [48, 5, 288, 96, True, "hard_swish", 2, 1], - [96, 5, 576, 96, True, "hard_swish", 1, 1], - [96, 5, 576, 96, True, "hard_swish", 1, 1] - ] - ] + "params": None, + "stage_params": [ + { + "in_channels": [16], + "kernel": [3], + "expanded_channels": [16], + "out_channels": [16], + "use_se": [True], + "activation": ["relu"], + "stride": [2], + "dilation": [1], + }, + { + "in_channels": [16, 24], + "kernel": [3, 3], + "expanded_channels": [72, 88], + "out_channels": [24, 24], + "use_se": [False, False], + "activation": ["relu", "relu"], + "stride": [2, 1], + "dilation": [1, 1], + }, + { + "in_channels": [24, 40, 40, 40, 48], + "kernel": [5, 5, 5, 5, 5], + "expanded_channels": [96, 240, 240, 120, 144], + "out_channels": [40, 40, 40, 48, 48], + "use_se": [True, True, True, True, True], + "activation": ["hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish"], + "stride": [2, 1, 1, 1, 1], + "dilation": [1, 1, 1, 1, 1], + }, + { + "in_channels": [48, 96, 96], + "kernel": [5, 5, 5], + "expanded_channels": [288, 576, 576], + "out_channels": [96, 96, 96], + "use_se": [True, True, True], + "activation": ["hard_swish", "hard_swish", "hard_swish"], + "stride": [2, 1, 1], + "dilation": [1, 1, 1], + }, + ], }) @@ -101,24 +126,74 @@ class MobileNetV3ArchitectureConfig(ArchitectureConfig): class MobileViTArchitectureConfig(ArchitectureConfig): backbone: Dict[str, Any] = field(default_factory=lambda: { "name": "mobilevit", - "out_channels": [32, 64, 96, 128, 160], - "block_type": ['mv2', 'mv2', 'mobilevit', 'mobilevit', 'mobilevit'], - "num_blocks": [1, 3, None, None, None], - "stride": [1, 2, 2, 2, 2], - "hidden_size": [None, None, 144, 192, 240], - "intermediate_size": [None, None, 288, 384, 480], - "num_transformer_blocks": [None, None, 2, 4, 3], - "dilate": [None, None, False, False, False], - "expand_ratio": [4, 4, 4, 4, 4], # [mv2_exp_mult] * 4 - "patch_embedding_out_channels": 16, - "local_kernel_size": 3, - "patch_size": 2, - "num_attention_heads": 4, # num_heads - "attention_dropout_prob": 0.1, - "hidden_dropout_prob": 0.0, - "exp_factor": 4, - "layer_norm_eps": 1e-5, - "use_fusion_layer": True, + "params": { + "patch_embedding_out_channels": 16, + "local_kernel_size": 3, + "patch_size": 2, + "num_attention_heads": 4, + "attention_dropout_prob": 0.1, + "hidden_dropout_prob": 0.0, + "exp_factor": 4, + "layer_norm_eps": 1e-5, + "use_fusion_layer": True, + }, + "stage_params": [ + { + "out_channels": 32, + "block_type": "mv2", + "num_blocks": 1, + "stride": 1, + "hidden_size": None, + "intermediate_size": None, + "num_transformer_blocks": None, + "dilate": None, + "expand_ratio": 4, + }, + { + "out_channels": 64, + "block_type": "mv2", + "num_blocks": 3, + "stride": 2, + "hidden_size": None, + "intermediate_size": None, + "num_transformer_blocks": None, + "dilate": None, + "expand_ratio": 4, + }, + { + "out_channels": 96, + "block_type": "mobilevit", + "num_blocks": None, + "stride": 2, + "hidden_size": 144, + "intermediate_size": 288, + "num_transformer_blocks": 2, + "dilate": False, + "expand_ratio": 4, + }, + { + "out_channels": 128, + "block_type": "mobilevit", + "num_blocks": None, + "stride": 2, + "hidden_size": 192, + "intermediate_size": 384, + "num_transformer_blocks": 4, + "dilate": False, + "expand_ratio": 4, + }, + { + "out_channels": 160, + "block_type": "mobilevit", + "num_blocks": None, + "stride": 2, + "hidden_size": 240, + "intermediate_size": 480, + "num_transformer_blocks": 3, + "dilate": False, + "expand_ratio": 4, + }, + ] }) @@ -138,8 +213,20 @@ class PIDNetArchitectureConfig(ArchitectureConfig): class ResNetArchitectureConfig(ArchitectureConfig): backbone: Dict[str, Any] = field(default_factory=lambda: { "name": "resnet50", - "block": "bottleneck", - "layers": [3, 4, 6, 3], + "params": { + "block": "bottleneck", + "norm_layer": "batch_norm", + "groups": 1, + "width_per_group": 64, + "zero_init_residual": False, + "expansion": None, + }, + "stage_params": [ + {"plane": 64, "layers": 3}, + {"plane": 128, "layers": 4}, + {"plane": 256, "layers": 6}, + {"plane": 512, "layers": 3}, + ], }) @@ -147,18 +234,47 @@ class ResNetArchitectureConfig(ArchitectureConfig): class SegFormerArchitectureConfig(ArchitectureConfig): backbone: Dict[str, Any] = field(default_factory=lambda: { "name": "segformer", - "num_modules": 4, - "num_blocks": [2, 2, 2, 2], - "sr_ratios": [8, 4, 2, 1], - "hidden_sizes": [32, 64, 160, 256], - "embedding_patch_sizes": [7, 3, 3, 3], - "embedding_strides": [4, 2, 2, 2], - "num_attention_heads": [1, 2, 5, 8], - "intermediate_ratio": 4, - "hidden_activation_type": "gelu", - "hidden_dropout_prob": 0.0, - "attention_dropout_prob": 0.0, - "layer_norm_eps": 1e-5, + "params": { + "intermediate_ratio": 4, + "hidden_activation_type": "gelu", + "hidden_dropout_prob": 0.0, + "attention_dropout_prob": 0.0, + "layer_norm_eps": 1e-5, + }, + "stage_params": [ + { + "num_blocks": 2, + "sr_ratios": 8, + "hidden_sizes": 32, + "embedding_patch_sizes": 7, + "embedding_strides": 4, + "num_attention_heads": 1, + }, + { + "num_blocks": 2, + "sr_ratios": 4, + "hidden_sizes": 64, + "embedding_patch_sizes": 3, + "embedding_strides": 2, + "num_attention_heads": 2, + }, + { + "num_blocks": 2, + "sr_ratios": 2, + "hidden_sizes": 160, + "embedding_patch_sizes": 3, + "embedding_strides": 2, + "num_attention_heads": 5, + }, + { + "num_blocks": 2, + "sr_ratios": 1, + "hidden_sizes": 256, + "embedding_patch_sizes": 3, + "embedding_strides": 2, + "num_attention_heads": 8, + }, + ], }) @@ -166,13 +282,19 @@ class SegFormerArchitectureConfig(ArchitectureConfig): class ViTArchitectureConfig(ArchitectureConfig): backbone: Dict[str, Any] = field(default_factory=lambda: { "name": "vit", - "patch_size": 16, - "hidden_size": 192, - "num_blocks": 12, - "num_attention_heads": 3, - "attention_dropout_prob": 0.0, - "intermediate_size": 192 * 4, - "hidden_dropout_prob": 0.1, + "params": { + "patch_size": 16, + "hidden_size": 192, + "num_blocks": 12, + "num_attention_heads": 3, + "attention_dropout_prob": 0.0, + "intermediate_size": 768, + "hidden_dropout_prob": 0.1, + "layer_norm_eps": 1e-6, + "use_cls_token": True, + "vocab_size": 1000, + }, + "stage_params": None, }) From 72a1dfb78c203c8f2d054274d50f717fb66fbf69 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 15 Nov 2023 16:59:24 +0900 Subject: [PATCH 106/167] Update missing values --- config/model/resnet/resnet50-segmentation.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/config/model/resnet/resnet50-segmentation.yaml b/config/model/resnet/resnet50-segmentation.yaml index 448a2c19b..7b6d364f9 100644 --- a/config/model/resnet/resnet50-segmentation.yaml +++ b/config/model/resnet/resnet50-segmentation.yaml @@ -14,6 +14,8 @@ model: norm_layer: batch_norm groups: 1 width_per_group: 64 + zero_init_residual: False + expansion: ~ stage_params: - plane: 64 From 3475636730eec61fea3dde2fbbf0d66536efb33d Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 15 Nov 2023 17:07:37 +0900 Subject: [PATCH 107/167] Ruff fix --- src/netspresso_trainer/cfg/model.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/netspresso_trainer/cfg/model.py b/src/netspresso_trainer/cfg/model.py index 518b0f4b4..169efef1a 100644 --- a/src/netspresso_trainer/cfg/model.py +++ b/src/netspresso_trainer/cfg/model.py @@ -68,7 +68,7 @@ class EfficientFormerArchitectureConfig(ArchitectureConfig): {"num_blocks": 2, "hidden_sizes": 96, "downsamples": True}, {"num_blocks": 6, "hidden_sizes": 224, "downsamples": True}, {"num_blocks": 4, "hidden_sizes": 448, "downsamples": True}, - ], + ], }) @@ -79,11 +79,11 @@ class MobileNetV3ArchitectureConfig(ArchitectureConfig): "params": None, "stage_params": [ { - "in_channels": [16], - "kernel": [3], - "expanded_channels": [16], - "out_channels": [16], - "use_se": [True], + "in_channels": [16], + "kernel": [3], + "expanded_channels": [16], + "out_channels": [16], + "use_se": [True], "activation": ["relu"], "stride": [2], "dilation": [1], From 4723d8e73d58b1db17d825c9515d503fb7f8d2cf Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 15 Nov 2023 18:19:41 +0900 Subject: [PATCH 108/167] Remove to_object of model backbone config --- src/netspresso_trainer/models/base.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/netspresso_trainer/models/base.py b/src/netspresso_trainer/models/base.py index ec341eb23..2cb9880c0 100644 --- a/src/netspresso_trainer/models/base.py +++ b/src/netspresso_trainer/models/base.py @@ -22,8 +22,7 @@ def __init__(self, conf_model, task, backbone_name, head_name, num_classes, mode self.head_name = head_name backbone_fn: Callable[..., nn.Module] = MODEL_BACKBONE_DICT[backbone_name] - conf_model_backbone = OmegaConf.to_object(conf_model.architecture.backbone) - self.backbone: nn.Module = backbone_fn(task=self.task, conf_model_backbone=conf_model_backbone) + self.backbone: nn.Module = backbone_fn(task=self.task, conf_model_backbone=conf_model.architecture.backbone) self.backbone = load_from_checkpoint(self.backbone, model_checkpoint) From 70c98057a8b3dcf246a8ad38a55edb16ddd9ab44 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 15 Nov 2023 18:20:56 +0900 Subject: [PATCH 109/167] Update MobileNetV3 to receive OmegaConf --- .../models/backbones/experimental/mobilenetv3.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/netspresso_trainer/models/backbones/experimental/mobilenetv3.py b/src/netspresso_trainer/models/backbones/experimental/mobilenetv3.py index 3886a5dd9..fda8c1984 100644 --- a/src/netspresso_trainer/models/backbones/experimental/mobilenetv3.py +++ b/src/netspresso_trainer/models/backbones/experimental/mobilenetv3.py @@ -21,9 +21,8 @@ class MobileNetV3(nn.Module): def __init__( self, task: str, - params: Optional[List[Dict]] = None, + params: Optional[Dict] = None, stage_params: Optional[List[Dict]] = None, - **kwargs, ) -> None: super(MobileNetV3, self).__init__() @@ -33,7 +32,7 @@ def __init__( act_type = 'hard_swish' # building first layer - firstconv_output_channels = stage_params[0]['in_channels'][0] + firstconv_output_channels = stage_params[0].in_channels[0] self.conv_first = ConvLayer( in_channels=3, out_channels=firstconv_output_channels, @@ -46,14 +45,14 @@ def __init__( # building inverted residual blocks stages: List[nn.Module] = [] - lastconv_input_channels = stage_params[-1]['out_channels'][-1] + lastconv_input_channels = stage_params[-1].out_channels[-1] lastconv_output_channels = 6 * lastconv_input_channels for stg_idx, stage_info in enumerate(stage_params): stage: List[nn.Module] = [] - for block in zip(stage_info['in_channels'], stage_info['kernel'], stage_info['expanded_channels'], - stage_info['out_channels'], stage_info['use_se'], stage_info['activation'], - stage_info['stride'], stage_info['dilation']): + for block in zip(stage_info.in_channels, stage_info.kernel, stage_info.expanded_channels, + stage_info.out_channels, stage_info.use_se, stage_info.activation, + stage_info.stride, stage_info.dilation): in_channels, kernel_size, hidden_channels, out_channels, use_se, act_type_b, stride, dilation = block act_type_b = act_type_b.lower() stage.append( @@ -131,4 +130,4 @@ def task_support(self, task): def mobilenetv3_small(task, conf_model_backbone) -> MobileNetV3: - return MobileNetV3(task, **conf_model_backbone) + return MobileNetV3(task, conf_model_backbone.params, conf_model_backbone.stage_params) From 0a8088101a62d6705379ab99b639e4462930df08 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 15 Nov 2023 18:23:07 +0900 Subject: [PATCH 110/167] Update CSPDarknet to receive OmegaConf --- .../models/backbones/experimental/darknet.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/netspresso_trainer/models/backbones/experimental/darknet.py b/src/netspresso_trainer/models/backbones/experimental/darknet.py index af3fdd909..28ebf9c5c 100644 --- a/src/netspresso_trainer/models/backbones/experimental/darknet.py +++ b/src/netspresso_trainer/models/backbones/experimental/darknet.py @@ -19,10 +19,9 @@ class CSPDarknet(nn.Module): def __init__( self, task: str, - params: Optional[List[Dict]] = None, + params: Optional[Dict] = None, stage_params: Optional[List[Dict]] = None, #depthwise=False, - **kwargs, ) -> None: super().__init__() out_features=("dark3", "dark4", "dark5") @@ -31,9 +30,9 @@ def __init__( self.task = task.lower() self.use_intermediate_features = self.task in ['segmentation', 'detection'] - dep_mul = params['dep_mul'] - wid_mul = params['wid_mul'] - act_type = params['act_type'] + dep_mul = params.dep_mul + wid_mul = params.wid_mul + act_type = params.act_type self.out_features = out_features Conv = ConvLayer @@ -152,4 +151,4 @@ def task_support(self, task): def cspdarknet(task, conf_model_backbone) -> CSPDarknet: - return CSPDarknet(task, **conf_model_backbone) + return CSPDarknet(task, conf_model_backbone.params, conf_model_backbone.stage_params) From 6af5ac8603449396308fd96b3629c84a75380771 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 15 Nov 2023 18:26:03 +0900 Subject: [PATCH 111/167] Update EfficientFormer to receive OmegaConf --- .../backbones/experimental/efficientformer.py | 47 +++++++++---------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/src/netspresso_trainer/models/backbones/experimental/efficientformer.py b/src/netspresso_trainer/models/backbones/experimental/efficientformer.py index 01a13a4ab..754dc8f92 100644 --- a/src/netspresso_trainer/models/backbones/experimental/efficientformer.py +++ b/src/netspresso_trainer/models/backbones/experimental/efficientformer.py @@ -331,32 +331,31 @@ class EfficientFormer(MetaFormer): def __init__( self, task: str, - params: Optional[List[Dict]] = None, + params: Optional[Dict] = None, stage_params: Optional[List[Dict]] = None, - **kwargs, ) -> None: - num_blocks = [stage['num_blocks'] for stage in stage_params] - hidden_sizes = [stage['hidden_sizes'] for stage in stage_params] - downsamples = [stage['downsamples'] for stage in stage_params] - - num_attention_heads = params['num_attention_heads'] - attention_hidden_size = params['attention_hidden_size'] - attention_dropout_prob = params['attention_dropout_prob'] - attention_ratio = params['attention_ratio'] - attention_bias_resolution = params['attention_bias_resolution'] - pool_size = params['pool_size'] - intermediate_ratio = params['intermediate_ratio'] - hidden_dropout_prob = params['hidden_dropout_prob'] - hidden_activation_type = params['hidden_activation_type'] - layer_norm_eps = params['layer_norm_eps'] - drop_path_rate = params['drop_path_rate'] - use_layer_scale = params['use_layer_scale'] - layer_scale_init_value = params['layer_scale_init_value'] - down_patch_size = params['down_patch_size'] - down_stride = params['down_stride'] - down_pad = params['down_pad'] - vit_num = params['vit_num'] + num_blocks = [stage.num_blocks for stage in stage_params] + hidden_sizes = [stage.hidden_sizes for stage in stage_params] + downsamples = [stage.downsamples for stage in stage_params] + + num_attention_heads = params.num_attention_heads + attention_hidden_size = params.attention_hidden_size + attention_dropout_prob = params.attention_dropout_prob + attention_ratio = params.attention_ratio + attention_bias_resolution = params.attention_bias_resolution + pool_size = params.pool_size + intermediate_ratio = params.intermediate_ratio + hidden_dropout_prob = params.hidden_dropout_prob + hidden_activation_type = params.hidden_activation_type + layer_norm_eps = params.layer_norm_eps + drop_path_rate = params.drop_path_rate + use_layer_scale = params.use_layer_scale + layer_scale_init_value = params.layer_scale_init_value + down_patch_size = params.down_patch_size + down_stride = params.down_stride + down_pad = params.down_pad + vit_num = params.vit_num super().__init__(hidden_sizes) self.task = task.lower() @@ -393,4 +392,4 @@ def forward(self, x): def efficientformer(task, conf_model_backbone) -> EfficientFormer: - return EfficientFormer(task, **conf_model_backbone) + return EfficientFormer(task, conf_model_backbone.params, conf_model_backbone.stage_params) From 5085b4450c155c0d3178b3d70850f2def423bb15 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 15 Nov 2023 18:29:31 +0900 Subject: [PATCH 112/167] Update MobileViT to receive OmegaConf --- .../backbones/experimental/mobilevit.py | 49 +++++++++---------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/src/netspresso_trainer/models/backbones/experimental/mobilevit.py b/src/netspresso_trainer/models/backbones/experimental/mobilevit.py index 719315859..828879273 100644 --- a/src/netspresso_trainer/models/backbones/experimental/mobilevit.py +++ b/src/netspresso_trainer/models/backbones/experimental/mobilevit.py @@ -254,32 +254,32 @@ def forward( class MobileViTEncoder(MetaFormerEncoder): def __init__( self, - params: Optional[List[Dict]], + params: Optional[Dict], stage_params: Optional[List[Dict]], ) -> None: super().__init__() stages = [] self.dilation = 1 - self.local_kernel_size = params['local_kernel_size'] - self.patch_size = params['patch_size'] - self.num_attention_heads = params['num_attention_heads'] - self.attention_dropout_prob = params['attention_dropout_prob'] - self.hidden_dropout_prob = params['hidden_dropout_prob'] - self.layer_norm_eps = params['layer_norm_eps'] - self.use_fusion_layer = params['use_fusion_layer'] + self.local_kernel_size = params.local_kernel_size + self.patch_size = params.patch_size + self.num_attention_heads = params.num_attention_heads + self.attention_dropout_prob = params.attention_dropout_prob + self.hidden_dropout_prob = params.hidden_dropout_prob + self.layer_norm_eps = params.layer_norm_eps + self.use_fusion_layer = params.use_fusion_layer - in_channels = params['patch_embedding_out_channels'] + in_channels = params.patch_embedding_out_channels for stage in stage_params: - out_channels = stage['out_channels'] - block_type = stage['block_type'] - num_blocks = stage['num_blocks'] - stride = stage['stride'] - hidden_size = stage['hidden_size'] - intermediate_size = stage['intermediate_size'] - num_transformer_blocks = stage['num_transformer_blocks'] - dilate = stage['dilate'] - expand_ratio = stage['expand_ratio'] + out_channels = stage.out_channels + block_type = stage.block_type + num_blocks = stage.num_blocks + stride = stage.stride + hidden_size = stage.hidden_size + intermediate_size = stage.intermediate_size + num_transformer_blocks = stage.num_transformer_blocks + dilate = stage.dilate + expand_ratio = stage.expand_ratio stages.append(self._make_block(out_channels, block_type, num_blocks, stride, hidden_size, intermediate_size, num_transformer_blocks, dilate, expand_ratio, in_channels)) @@ -359,22 +359,21 @@ class MobileViT(MetaFormer): def __init__( self, task: str, - params: Optional[List[Dict]] = None, + params: Optional[Dict] = None, stage_params: Optional[List[Dict]] = None, - **kwargs, ) -> None: - exp_channels = min(params['exp_factor'] * stage_params[-1]['out_channels'], 960) - hidden_sizes = [stage['out_channels'] for stage in stage_params] + [exp_channels] + exp_channels = min(params.exp_factor * stage_params[-1].out_channels, 960) + hidden_sizes = [stage.out_channels for stage in stage_params] + [exp_channels] super().__init__(hidden_sizes) self.task = task self.intermediate_features = self.task in ['segmentation', 'detection'] image_channels = 3 - self.patch_embed = MobileViTEmbeddings(image_channels, params['patch_embedding_out_channels']) + self.patch_embed = MobileViTEmbeddings(image_channels, params.patch_embedding_out_channels) self.encoder = MobileViTEncoder(params=params, stage_params=stage_params) - self.conv_1x1_exp = ConvLayer(in_channels=stage_params[-1]['out_channels'], out_channels=exp_channels, + self.conv_1x1_exp = ConvLayer(in_channels=stage_params[-1].out_channels, out_channels=exp_channels, kernel_size=1, stride=1, use_act=True, use_norm=True, act_type='silu') self.pool = GlobalPool(pool_type="mean", keep_dim=False) @@ -389,4 +388,4 @@ def forward(self, x: FXTensorType): return BackboneOutput(last_feature=feat) def mobilevit(task, conf_model_backbone): - return MobileViT(task, **conf_model_backbone) \ No newline at end of file + return MobileViT(task, conf_model_backbone.params, conf_model_backbone.stage_params) \ No newline at end of file From aafc976a016176ada7d26d0613ca003036c41083 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 15 Nov 2023 18:31:37 +0900 Subject: [PATCH 113/167] Update ResNet to receive OmegaConf --- .../models/backbones/experimental/resnet.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/netspresso_trainer/models/backbones/experimental/resnet.py b/src/netspresso_trainer/models/backbones/experimental/resnet.py index 239ffc95e..a92a6c3b8 100644 --- a/src/netspresso_trainer/models/backbones/experimental/resnet.py +++ b/src/netspresso_trainer/models/backbones/experimental/resnet.py @@ -26,18 +26,17 @@ class ResNet(nn.Module): def __init__( self, task: str, - params: Optional[List[Dict]] = None, + params: Optional[Dict] = None, stage_params: Optional[List[Dict]] = None, - **kwargs, ) -> None: super(ResNet, self).__init__() - block: Literal['basicblock', 'bottleneck'] = params['block'] - zero_init_residual: bool = params['zero_init_residual'] - groups: int = params['groups'] - width_per_group: int = params['width_per_group'] - norm_layer: Optional[str] = params['norm_layer'] - expansion: Optional[int] = params['expansion'] + block: Literal['basicblock', 'bottleneck'] = params.block + zero_init_residual: bool = params.zero_init_residual + groups: int = params.groups + width_per_group: int = params.width_per_group + norm_layer: Optional[str] = params.norm_layer + expansion: Optional[int] = params.expansion self.task = task.lower() block = BLOCK_FROM_LITERAL[block.lower()] @@ -162,4 +161,4 @@ def resnet50(task, conf_model_backbone) -> ResNet: """ ResNet-50 model from "Deep Residual Learning for Image Recognition" https://arxiv.org/pdf/1512.03385.pdf. """ - return ResNet(task, **conf_model_backbone) + return ResNet(task, conf_model_backbone.params, conf_model_backbone.stage_params) From 43083564e3b639cecee28cc905ef242d7582ae53 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 15 Nov 2023 18:34:40 +0900 Subject: [PATCH 114/167] Update SegFormer to receive OmegaConf --- .../backbones/experimental/segformer.py | 29 +++++++++---------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/src/netspresso_trainer/models/backbones/experimental/segformer.py b/src/netspresso_trainer/models/backbones/experimental/segformer.py index 77b74df25..d4a9903ea 100644 --- a/src/netspresso_trainer/models/backbones/experimental/segformer.py +++ b/src/netspresso_trainer/models/backbones/experimental/segformer.py @@ -138,30 +138,29 @@ class SegFormer(MetaFormer): def __init__( self, task: str, - params: Optional[List[Dict]] = None, + params: Optional[Dict] = None, stage_params: Optional[List[Dict]] = None, - **kwargs, ) -> None: - super().__init__([stage['hidden_sizes'] for stage in stage_params]) + super().__init__([stage.hidden_sizes for stage in stage_params]) self.task = task self.use_intermediate_features = self.task in ['segmentation', 'detection'] - intermediate_ratio = params['intermediate_ratio'] - hidden_activation_type = params['hidden_activation_type'] - hidden_dropout_prob = params['hidden_dropout_prob'] - attention_dropout_prob = params['attention_dropout_prob'] - layer_norm_eps = params['layer_norm_eps'] + intermediate_ratio = params.intermediate_ratio + hidden_activation_type = params.hidden_activation_type + hidden_dropout_prob = params.hidden_dropout_prob + attention_dropout_prob = params.attention_dropout_prob + layer_norm_eps = params.layer_norm_eps in_channels = 3 self.encoder_modules = nn.ModuleList() for blocks in stage_params: - num_blocks = blocks['num_blocks'] - sr_ratios = blocks['sr_ratios'] - hidden_sizes = blocks['hidden_sizes'] - embedding_patch_sizes = blocks['embedding_patch_sizes'] - embedding_strides = blocks['embedding_strides'] - num_attention_heads = blocks['num_attention_heads'] + num_blocks = blocks.num_blocks + sr_ratios = blocks.sr_ratios + hidden_sizes = blocks.hidden_sizes + embedding_patch_sizes = blocks.embedding_patch_sizes + embedding_strides = blocks.embedding_strides + num_attention_heads = blocks.num_attention_heads module = nn.ModuleDict( { @@ -212,4 +211,4 @@ def forward(self, x): def segformer(task, conf_model_backbone) -> SegformerEncoder: - return SegFormer(task, **conf_model_backbone) + return SegFormer(task, conf_model_backbone.params, conf_model_backbone.stage_params) From 3bad5f3fbd0b4886b49ebf8d666b2152a2d8a221 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 15 Nov 2023 18:37:04 +0900 Subject: [PATCH 115/167] Update ViT to receive OmegaConf --- .../models/backbones/experimental/vit.py | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/src/netspresso_trainer/models/backbones/experimental/vit.py b/src/netspresso_trainer/models/backbones/experimental/vit.py index 07a14ef19..bce563688 100644 --- a/src/netspresso_trainer/models/backbones/experimental/vit.py +++ b/src/netspresso_trainer/models/backbones/experimental/vit.py @@ -94,20 +94,19 @@ class VisionTransformer(MetaFormer): def __init__( self, task: str, - params: Optional[List[Dict]] = None, + params: Optional[Dict] = None, stage_params: Optional[List[Dict]] = None, - **kwargs, ) -> None: - patch_size = params['patch_size'] - hidden_size = params['hidden_size'] - num_blocks = params['num_blocks'] - num_attention_heads = params['num_attention_heads'] - attention_dropout_prob = params['attention_dropout_prob'] - intermediate_size = params['intermediate_size'] - hidden_dropout_prob = params['hidden_dropout_prob'] - layer_norm_eps = params['layer_norm_eps'] - use_cls_token = params['use_cls_token'] - vocab_size = params['vocab_size'] + patch_size = params.patch_size + hidden_size = params.hidden_size + num_blocks = params.num_blocks + num_attention_heads = params.num_attention_heads + attention_dropout_prob = params.attention_dropout_prob + intermediate_size = params.intermediate_size + hidden_dropout_prob = params.hidden_dropout_prob + layer_norm_eps = params.layer_norm_eps + use_cls_token = params.use_cls_token + vocab_size = params.vocab_size hidden_sizes = hidden_size if isinstance(hidden_size, list) else [hidden_size] * num_blocks super().__init__(hidden_sizes) @@ -122,4 +121,4 @@ def __init__( def vit(task, conf_model_backbone): # ViT tiny - return VisionTransformer(task, **conf_model_backbone) \ No newline at end of file + return VisionTransformer(task, conf_model_backbone.params, conf_model_backbone.stage_params) \ No newline at end of file From 6ee84f77220871e89fd4587bbdfe6d07f464892e Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 15 Nov 2023 19:00:19 +0900 Subject: [PATCH 116/167] Handle postprocessor for full model --- src/netspresso_trainer/postprocessors/builder.py | 5 ++++- src/netspresso_trainer/postprocessors/register.py | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/netspresso_trainer/postprocessors/builder.py b/src/netspresso_trainer/postprocessors/builder.py index 9b4e45b67..fb5ecbff2 100644 --- a/src/netspresso_trainer/postprocessors/builder.py +++ b/src/netspresso_trainer/postprocessors/builder.py @@ -2,7 +2,10 @@ def build_postprocessor(task: str, conf_model): - head_name = conf_model.architecture.head.name + if conf_model.single_task_model: + head_name = conf_model.architecture.full.name + else: + head_name = conf_model.architecture.head.name if head_name not in POSTPROCESSOR_DICT: return None return POSTPROCESSOR_DICT[head_name](conf_model) diff --git a/src/netspresso_trainer/postprocessors/register.py b/src/netspresso_trainer/postprocessors/register.py index 0c705efe6..fc137c790 100644 --- a/src/netspresso_trainer/postprocessors/register.py +++ b/src/netspresso_trainer/postprocessors/register.py @@ -8,4 +8,5 @@ 'fc': ClassificationPostprocessor, 'all_mlp_decoder': SegmentationPostprocessor, 'yolox_head': DetectionPostprocessor, + 'pidnet': SegmentationPostprocessor, } From c78aa4d1444f5d0e73be18ddcc5ad4f36d2f2761 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 15 Nov 2023 19:01:05 +0900 Subject: [PATCH 117/167] Update PIDNet to receive OmegaConf --- src/netspresso_trainer/models/builder.py | 3 +-- .../models/full/experimental/pidnet.py | 19 ++++++++++++++++--- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/src/netspresso_trainer/models/builder.py b/src/netspresso_trainer/models/builder.py index 78e9d13f4..acc642a43 100644 --- a/src/netspresso_trainer/models/builder.py +++ b/src/netspresso_trainer/models/builder.py @@ -16,8 +16,7 @@ def load_full_model(conf_model, model_name, num_classes, model_checkpoint): model_fn: Callable[..., nn.Module] = MODEL_FULL_DICT[model_name] - conf_model_full = OmegaConf.to_object(conf_model.architecture.full) - model: nn.Module = model_fn(num_classes=num_classes, conf_model_full=conf_model_full) + model: nn.Module = model_fn(num_classes=num_classes, conf_model_full=conf_model.architecture.full) model = load_from_checkpoint(model, model_checkpoint) return model diff --git a/src/netspresso_trainer/models/full/experimental/pidnet.py b/src/netspresso_trainer/models/full/experimental/pidnet.py index b3b630c96..10d3ac047 100644 --- a/src/netspresso_trainer/models/full/experimental/pidnet.py +++ b/src/netspresso_trainer/models/full/experimental/pidnet.py @@ -3,6 +3,7 @@ # ------------------------------------------------------------------------------ import logging import time +from typing import Optional, List, Dict import torch import torch.nn as nn @@ -17,8 +18,19 @@ class PIDNet(nn.Module): - def __init__(self, num_classes=19, m=2, n=3, planes=64, ppm_planes=96, head_planes=128, is_training=True, **kwargs): + def __init__( + self, + params: Optional[Dict] = None + ) -> None: super(PIDNet, self).__init__() + num_classes = params.num_classes + m = params.m + n = params.n + planes = params.planes + ppm_planes = params.ppm_planes + head_planes = params.head_planes + is_training = params.is_training + self.is_training = is_training # I Branch @@ -195,5 +207,6 @@ def forward(self, x: FXTensorType, label_size=None) -> PIDNetModelOutput: def pidnet(num_classes: int, conf_model_full) -> PIDNet: # PIDNet-S - return PIDNet(num_classes=num_classes, is_training=True, **conf_model_full) - + conf_model_full.num_classes = num_classes + conf_model_full.is_training = True + return PIDNet(params=conf_model_full) \ No newline at end of file From c2ab7f9cb2d3a3a6a48b1c39f65750a423eede9e Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 15 Nov 2023 19:01:44 +0900 Subject: [PATCH 118/167] Ruff fix --- src/netspresso_trainer/postprocessors/builder.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/netspresso_trainer/postprocessors/builder.py b/src/netspresso_trainer/postprocessors/builder.py index fb5ecbff2..abb93ffe5 100644 --- a/src/netspresso_trainer/postprocessors/builder.py +++ b/src/netspresso_trainer/postprocessors/builder.py @@ -2,10 +2,7 @@ def build_postprocessor(task: str, conf_model): - if conf_model.single_task_model: - head_name = conf_model.architecture.full.name - else: - head_name = conf_model.architecture.head.name + head_name = conf_model.architecture.full.name if conf_model.single_task_model else conf_model.architecture.head.name if head_name not in POSTPROCESSOR_DICT: return None return POSTPROCESSOR_DICT[head_name](conf_model) From 4211caee2d929c67cdda76fce1b56e96d8bbdf7d Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Thu, 16 Nov 2023 09:58:04 +0900 Subject: [PATCH 119/167] Change Dict -> DictCinfig, List[Dict] -> List --- .../models/backbones/experimental/darknet.py | 5 +++-- .../models/backbones/experimental/efficientformer.py | 5 +++-- .../models/backbones/experimental/mobilenetv3.py | 5 +++-- .../models/backbones/experimental/mobilevit.py | 11 ++++++----- .../models/backbones/experimental/resnet.py | 5 +++-- .../models/backbones/experimental/segformer.py | 5 +++-- .../models/backbones/experimental/vit.py | 5 +++-- 7 files changed, 24 insertions(+), 17 deletions(-) diff --git a/src/netspresso_trainer/models/backbones/experimental/darknet.py b/src/netspresso_trainer/models/backbones/experimental/darknet.py index 28ebf9c5c..49b134e8c 100644 --- a/src/netspresso_trainer/models/backbones/experimental/darknet.py +++ b/src/netspresso_trainer/models/backbones/experimental/darknet.py @@ -4,6 +4,7 @@ """ from typing import Dict, Optional, List +from omegaconf import DictConfig import torch from torch import nn @@ -19,8 +20,8 @@ class CSPDarknet(nn.Module): def __init__( self, task: str, - params: Optional[Dict] = None, - stage_params: Optional[List[Dict]] = None, + params: Optional[DictConfig] = None, + stage_params: Optional[List] = None, #depthwise=False, ) -> None: super().__init__() diff --git a/src/netspresso_trainer/models/backbones/experimental/efficientformer.py b/src/netspresso_trainer/models/backbones/experimental/efficientformer.py index 754dc8f92..23ee2ae7a 100644 --- a/src/netspresso_trainer/models/backbones/experimental/efficientformer.py +++ b/src/netspresso_trainer/models/backbones/experimental/efficientformer.py @@ -7,6 +7,7 @@ import os from typing import Dict, Optional, List +from omegaconf import DictConfig import torch import torch.nn as nn @@ -331,8 +332,8 @@ class EfficientFormer(MetaFormer): def __init__( self, task: str, - params: Optional[Dict] = None, - stage_params: Optional[List[Dict]] = None, + params: Optional[DictConfig] = None, + stage_params: Optional[List] = None, ) -> None: num_blocks = [stage.num_blocks for stage in stage_params] diff --git a/src/netspresso_trainer/models/backbones/experimental/mobilenetv3.py b/src/netspresso_trainer/models/backbones/experimental/mobilenetv3.py index fda8c1984..0a12213fa 100644 --- a/src/netspresso_trainer/models/backbones/experimental/mobilenetv3.py +++ b/src/netspresso_trainer/models/backbones/experimental/mobilenetv3.py @@ -4,6 +4,7 @@ """ from typing import List, Dict, Optional +from omegaconf import DictConfig import torch import torch.nn as nn from torch import Tensor @@ -21,8 +22,8 @@ class MobileNetV3(nn.Module): def __init__( self, task: str, - params: Optional[Dict] = None, - stage_params: Optional[List[Dict]] = None, + params: Optional[DictConfig] = None, + stage_params: Optional[List] = None, ) -> None: super(MobileNetV3, self).__init__() diff --git a/src/netspresso_trainer/models/backbones/experimental/mobilevit.py b/src/netspresso_trainer/models/backbones/experimental/mobilevit.py index 828879273..5a8ed0e90 100644 --- a/src/netspresso_trainer/models/backbones/experimental/mobilevit.py +++ b/src/netspresso_trainer/models/backbones/experimental/mobilevit.py @@ -7,6 +7,7 @@ import math from typing import Any, Dict, Literal, Optional, Tuple, Union, List +from omegaconf import DictConfig import torch import torch.nn as nn import torch.nn.functional as F @@ -253,9 +254,9 @@ def forward( class MobileViTEncoder(MetaFormerEncoder): def __init__( - self, - params: Optional[Dict], - stage_params: Optional[List[Dict]], + self, + params: Optional[DictConfig] = None, + stage_params: Optional[List] = None, ) -> None: super().__init__() stages = [] @@ -359,8 +360,8 @@ class MobileViT(MetaFormer): def __init__( self, task: str, - params: Optional[Dict] = None, - stage_params: Optional[List[Dict]] = None, + params: Optional[DictConfig] = None, + stage_params: Optional[List] = None, ) -> None: exp_channels = min(params.exp_factor * stage_params[-1].out_channels, 960) hidden_sizes = [stage.out_channels for stage in stage_params] + [exp_channels] diff --git a/src/netspresso_trainer/models/backbones/experimental/resnet.py b/src/netspresso_trainer/models/backbones/experimental/resnet.py index a92a6c3b8..34e87a6c4 100644 --- a/src/netspresso_trainer/models/backbones/experimental/resnet.py +++ b/src/netspresso_trainer/models/backbones/experimental/resnet.py @@ -4,6 +4,7 @@ """ from typing import Dict, List, Literal, Optional, Type, Union +from omegaconf import DictConfig import torch import torch.nn as nn from torch import Tensor @@ -26,8 +27,8 @@ class ResNet(nn.Module): def __init__( self, task: str, - params: Optional[Dict] = None, - stage_params: Optional[List[Dict]] = None, + params: Optional[DictConfig] = None, + stage_params: Optional[List] = None, ) -> None: super(ResNet, self).__init__() diff --git a/src/netspresso_trainer/models/backbones/experimental/segformer.py b/src/netspresso_trainer/models/backbones/experimental/segformer.py index d4a9903ea..8dee0d825 100644 --- a/src/netspresso_trainer/models/backbones/experimental/segformer.py +++ b/src/netspresso_trainer/models/backbones/experimental/segformer.py @@ -1,6 +1,7 @@ import math from typing import Optional, List, Dict +from omegaconf import DictConfig import torch import torch.nn as nn @@ -138,8 +139,8 @@ class SegFormer(MetaFormer): def __init__( self, task: str, - params: Optional[Dict] = None, - stage_params: Optional[List[Dict]] = None, + params: Optional[DictConfig] = None, + stage_params: Optional[List] = None, ) -> None: super().__init__([stage.hidden_sizes for stage in stage_params]) self.task = task diff --git a/src/netspresso_trainer/models/backbones/experimental/vit.py b/src/netspresso_trainer/models/backbones/experimental/vit.py index bce563688..31cedd8a7 100644 --- a/src/netspresso_trainer/models/backbones/experimental/vit.py +++ b/src/netspresso_trainer/models/backbones/experimental/vit.py @@ -5,6 +5,7 @@ import argparse from typing import Any, Dict, Optional, Tuple, Union, List +from omegaconf import DictConfig import torch import torch.nn as nn @@ -94,8 +95,8 @@ class VisionTransformer(MetaFormer): def __init__( self, task: str, - params: Optional[Dict] = None, - stage_params: Optional[List[Dict]] = None, + params: Optional[DictConfig] = None, + stage_params: Optional[List] = None, ) -> None: patch_size = params.patch_size hidden_size = params.hidden_size From 8de983e9286bcbd84f1261fc6bfd16ca57f74d86 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Thu, 16 Nov 2023 18:19:18 +0900 Subject: [PATCH 120/167] Init MixNet for classification --- .../model/mixnet/mixnet-s-classification.yaml | 36 +++ .../models/backbones/__init__.py | 1 + .../models/backbones/experimental/mixnet.py | 284 ++++++++++++++++++ src/netspresso_trainer/models/registry.py | 5 +- 4 files changed, 324 insertions(+), 2 deletions(-) create mode 100644 config/model/mixnet/mixnet-s-classification.yaml create mode 100644 src/netspresso_trainer/models/backbones/experimental/mixnet.py diff --git a/config/model/mixnet/mixnet-s-classification.yaml b/config/model/mixnet/mixnet-s-classification.yaml new file mode 100644 index 000000000..6d15eb133 --- /dev/null +++ b/config/model/mixnet/mixnet-s-classification.yaml @@ -0,0 +1,36 @@ +model: + task: classification + checkpoint: ./weights/mixnet/mixnet_s.pth + fx_model_checkpoint: ~ + resume_optimizer_checkpoint: ~ + freeze_backbone: False + architecture: + full: ~ # auto + backbone: + name: mixnet + params: + stem_planes: 16 + width_multi: 1.0 + depth_multi: 1.0 + dropout_rate: 0. + stage_params: + [ + # t, c, n, k, ek, pk, s, d, a, se + [1, 16, 1, [3], [1], [1], 1, 1, "relu", ~], + [6, 24, 1, [3], [1, 1], [1, 1], 2, 1, "relu", ~], + [3, 24, 1, [3], [1, 1], [1, 1], 1, 1, "relu", ~], + [6, 40, 1, [3, 5, 7], [1], [1], 2, 1, "swish", 2], + [6, 40, 3, [3, 5], [1, 1], [1, 1], 1, 1, "swish", 2], + [6, 80, 1, [3, 5, 7], [1], [1, 1], 2, 1, "swish", 4], + [6, 80, 2, [3, 5], [1], [1, 1], 1, 1, "swish", 4], + [6, 120, 1, [3, 5, 7], [1, 1], [1, 1], 1, 1, "swish", 2], + [3, 120, 2, [3, 5, 7, 9], [1, 1], [1, 1], 1, 1, "swish", 2], + [6, 200, 1, [3, 5, 7, 9, 11], [1], [1], 2, 1, "swish", 2], + [6, 200, 2, [3, 5, 7, 9], [1], [1, 1], 1, 1, "swish", 2] + ] + head: + name: fc + losses: + - criterion: cross_entropy + label_smoothing: 0.1 + weight: ~ \ No newline at end of file diff --git a/src/netspresso_trainer/models/backbones/__init__.py b/src/netspresso_trainer/models/backbones/__init__.py index 591a70d1d..8d921cb91 100644 --- a/src/netspresso_trainer/models/backbones/__init__.py +++ b/src/netspresso_trainer/models/backbones/__init__.py @@ -6,3 +6,4 @@ from .experimental.resnet import resnet50 from .experimental.segformer import segformer from .experimental.vit import vit +from .experimental.mixnet import mixnet diff --git a/src/netspresso_trainer/models/backbones/experimental/mixnet.py b/src/netspresso_trainer/models/backbones/experimental/mixnet.py new file mode 100644 index 000000000..f962a9cea --- /dev/null +++ b/src/netspresso_trainer/models/backbones/experimental/mixnet.py @@ -0,0 +1,284 @@ +""" +Based on the publicly available MixNet-PyTorch repository. +https://github.com/romulus0914/MixNet-PyTorch/blob/master/mixnet.py +""" +from typing import List, Dict, Optional + +from omegaconf import DictConfig +from torch.nn import functional as F +from collections import OrderedDict +from ...op.registry import ACTIVATION_REGISTRY +from ...op.custom import ConvLayer +Swish = ACTIVATION_REGISTRY['swish'] +from ...utils import BackboneOutput + +from torch import nn +import torch +import math + + +# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # +# SEBlock: Squeeze & Excitation (SCSE) +# namely, Channel-wise Attention +# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # +class SEBlock(nn.Module): + def __init__(self, in_planes, reduced_dim, act_type="swish"): + super(SEBlock, self).__init__() + self.channel_se = nn.Sequential(OrderedDict([ + ("linear1", nn.Conv2d(in_planes, reduced_dim, kernel_size=1, stride=1, padding=0, bias=True)), + ("act", Swish() if act_type == "swish" else nn.ReLU()), + ("linear2", nn.Conv2d(reduced_dim, in_planes, kernel_size=1, stride=1, padding=0, bias=True)) + ])) + + def forward(self, x): + x_se = torch.sigmoid(self.channel_se(F.adaptive_avg_pool2d(x, output_size=(1, 1)))) + return torch.mul(x, x_se) + +# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # +# GPConv: Grouped Point-wise Convolution for MixDepthBlock +# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # +class GPConv(nn.Module): + def __init__(self, in_planes, out_planes, kernel_sizes): + super(GPConv, self).__init__() + self.num_groups = len(kernel_sizes) + assert in_planes % self.num_groups == 0 + sub_in_dim = in_planes // self.num_groups + sub_out_dim = out_planes // self.num_groups + + self.group_point_wise = nn.ModuleList() + for _ in kernel_sizes: + self.group_point_wise.append(nn.Conv2d(sub_in_dim, sub_out_dim, + kernel_size=1, stride=1, padding=0, + groups=1, dilation=1, bias=False)) + + def forward(self, x): + if self.num_groups == 1: + return self.group_point_wise[0](x) + + chunks = torch.chunk(x, chunks=self.num_groups, dim=1) + mix = [self.group_point_wise[stream](chunks[stream]) for stream in range(self.num_groups)] + return torch.cat(mix, dim=1) + + +# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # +# MDConv: Mixed Depth-wise Convolution for MixDepthBlock +# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # +class MDConv(nn.Module): + def __init__(self, in_planes, kernel_sizes, stride=1, dilate=1): + super(MDConv, self).__init__() + self.num_groups = len(kernel_sizes) + assert in_planes % self.num_groups == 0 + sub_hidden_dim = in_planes // self.num_groups + + assert stride in [1, 2] + dilate = 1 if stride > 1 else dilate + + self.mixed_depth_wise = nn.ModuleList() + for kernel_size in kernel_sizes: + padding = ((kernel_size - 1) // 2) * dilate + self.mixed_depth_wise.append(nn.Conv2d(sub_hidden_dim, sub_hidden_dim, + kernel_size=kernel_size, stride=stride, padding=padding, + groups=sub_hidden_dim, dilation=dilate, bias=False)) + + def forward(self, x): + if self.num_groups == 1: + return self.mixed_depth_wise[0](x) + + chunks = torch.chunk(x, chunks=self.num_groups, dim=1) + mix = [self.mixed_depth_wise[stream](chunks[stream]) for stream in range(self.num_groups)] + return torch.cat(mix, dim=1) + + +# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # +# MixDepthBlock: MixDepthBlock for MixNet +# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # +class MixDepthBlock(nn.Module): + def __init__(self, in_planes, out_planes, + expand_ratio, exp_kernel_sizes, kernel_sizes, poi_kernel_sizes, stride, dilate, + reduction_ratio=4, dropout_rate=0.2, act_type="swish"): + super(MixDepthBlock, self).__init__() + self.dropout_rate = dropout_rate + self.expand_ratio = expand_ratio + + self.groups = len(kernel_sizes) + self.use_se = (reduction_ratio is not None) and (reduction_ratio > 1) + self.use_residual = in_planes == out_planes and stride == 1 + + assert stride in [1, 2] + dilate = 1 if stride > 1 else dilate + hidden_dim = in_planes * expand_ratio + + # step 1. Expansion phase/Point-wise convolution + if expand_ratio != 1: + self.expansion = nn.Sequential(OrderedDict([ + ("conv", GPConv(in_planes, hidden_dim, kernel_sizes=exp_kernel_sizes)), + ("norm", nn.BatchNorm2d(hidden_dim, eps=1e-3, momentum=0.01)), + ("act", Swish() if act_type == "swish" else nn.ReLU()) + ])) + + # step 2. Depth-wise convolution phase + self.depth_wise = nn.Sequential(OrderedDict([ + ("conv", MDConv(hidden_dim, kernel_sizes=kernel_sizes, stride=stride, dilate=dilate)), + ("norm", nn.BatchNorm2d(hidden_dim, eps=1e-3, momentum=0.01)), + ("act", Swish() if act_type == "swish" else nn.ReLU()) + ])) + + # step 3. Squeeze and Excitation + if self.use_se: + reduced_dim = max(1, int(in_planes / reduction_ratio)) + self.se_block = SEBlock(hidden_dim, reduced_dim, act_type=act_type) + + # step 4. Point-wise convolution phase + self.point_wise = nn.Sequential(OrderedDict([ + ("conv", GPConv(hidden_dim, out_planes, kernel_sizes=poi_kernel_sizes)), + ("norm", nn.BatchNorm2d(out_planes, eps=1e-3, momentum=0.01)) + ])) + + def forward(self, x): + res = x + + # step 1. Expansion phase/Point-wise convolution + if self.expand_ratio != 1: + x = self.expansion(x) + + # step 2. Depth-wise convolution phase + x = self.depth_wise(x) + + # step 3. Squeeze and Excitation + if self.use_se: + x = self.se_block(x) + + # step 4. Point-wise convolution phase + x = self.point_wise(x) + + # step 5. Skip connection and drop connect + if self.use_residual: + if self.training and (self.dropout_rate is not None): + x = F.dropout2d(input=x, p=self.dropout_rate, + training=self.training, ) + x = x + res + + return x + + +class MixNet(nn.Module): + def __init__( + self, + task: str, + params: Optional[DictConfig] = None, + stage_params: Optional[List] = None, + ): + super(MixNet, self).__init__() + + stem_planes = params.stem_planes + width_multi = params.width_multi + depth_multi = params.depth_multi + self.dropout_rate = params.dropout_rate + + settings = stage_params + + out_channels = self._round_filters(stem_planes, width_multi) + self.mod1 = ConvLayer(in_channels=3, out_channels=out_channels, kernel_size=3, + stride=2, groups=1, dilation=1, act_type="relu") + + in_channels = out_channels + drop_rate = self.dropout_rate + mod_id = 0 + for t, c, n, k, ek, pk, s, d, a, se in settings: + out_channels = self._round_filters(c, width_multi) + repeats = self._round_repeats(n, depth_multi) + + if self.dropout_rate: + drop_rate = self.dropout_rate * float(mod_id + 1) / len(settings) + + # Create blocks for module + blocks = [] + for block_id in range(repeats): + stride = s if block_id == 0 else 1 + dilate = d if stride == 1 else 1 + + blocks.append(("block%d" % (block_id + 1), MixDepthBlock(in_channels, out_channels, + expand_ratio=t, exp_kernel_sizes=ek, + kernel_sizes=k, poi_kernel_sizes=pk, + stride=stride, dilate=dilate, + reduction_ratio=se, + dropout_rate=drop_rate, + act_type=a))) + + in_channels = out_channels + self.add_module("mod%d" % (mod_id + 2), nn.Sequential(OrderedDict(blocks))) + mod_id += 1 + + self.last_channels = 1536 + self.last_feat = ConvLayer(in_channels=in_channels, out_channels=self.last_channels, + kernel_size=1, stride=1, groups=1, dilation=1, act_type="relu") + + self.avgpool = nn.AdaptiveAvgPool2d(1) + + self._feature_dim = self.last_channels + + self._initialize_weights() + + def _initialize_weights(self): + # weight initialization + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out') + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + fan_out = m.weight.size(0) + init_range = 1.0 / math.sqrt(fan_out) + nn.init.uniform_(m.weight, -init_range, init_range) + if m.bias is not None: + nn.init.zeros_(m.bias) + + @staticmethod + def _make_divisible(value, divisor=8): + new_value = max(divisor, int(value + divisor / 2) // divisor * divisor) + if new_value < 0.9 * value: + new_value += divisor + return new_value + + def _round_filters(self, filters, width_multi): + if width_multi == 1.0: + return filters + return int(self._make_divisible(filters * width_multi)) + + @staticmethod + def _round_repeats(repeats, depth_multi): + if depth_multi == 1.0: + return repeats + return int(math.ceil(depth_multi * repeats)) + + @property + def feature_dim(self): + return self._feature_dim + + @property + def intermediate_features_dim(self): + return self._intermediate_features_dim + + def forward(self, x): + x = self.mod2(self.mod1(x)) # (N, C, H/2, W/2) + x = self.mod4(self.mod3(x)) # (N, C, H/4, W/4) + x = self.mod6(self.mod5(x)) # (N, C, H/8, W/8) + x = self.mod10(self.mod9(self.mod8(self.mod7(x)))) # (N, C, H/16, W/16) + x = self.mod12(self.mod11(x)) # (N, C, H/32, W/32) + x = self.last_feat(x) + + x = self.avgpool(x) + x = torch.flatten(x, 1) + + if self.training and (self.dropout_rate is not None): + x = F.dropout(input=x, p=self.dropout_rate, + training=self.training, ) + + return BackboneOutput(last_feature=x) + + +def mixnet(task, conf_model_backbone) -> MixNet: + return MixNet(task, conf_model_backbone.params, conf_model_backbone.stage_params) \ No newline at end of file diff --git a/src/netspresso_trainer/models/registry.py b/src/netspresso_trainer/models/registry.py index c43ed0f47..f047af25f 100644 --- a/src/netspresso_trainer/models/registry.py +++ b/src/netspresso_trainer/models/registry.py @@ -3,7 +3,7 @@ import torch.nn as nn -from .backbones import cspdarknet, efficientformer, mobilenetv3_small, mobilevit, resnet50, segformer, vit +from .backbones import cspdarknet, efficientformer, mobilenetv3_small, mobilevit, resnet50, segformer, vit, mixnet from .full import pidnet from .heads.classification import fc from .heads.detection import faster_rcnn, yolox_head @@ -16,7 +16,8 @@ 'mobilevit': mobilevit, 'vit': vit, 'efficientformer': efficientformer, - 'cspdarknet': cspdarknet + 'cspdarknet': cspdarknet, + 'mixnet': mixnet, } MODEL_HEAD_DICT: Dict[str, Callable[..., nn.Module]] = { From d8f1bd3aaa44b48a87a29278bb3d3548c8b8af38 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Fri, 17 Nov 2023 10:56:17 +0900 Subject: [PATCH 121/167] Replace SEBlock with torchvision --- .../models/backbones/experimental/mixnet.py | 20 ++----------------- 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/src/netspresso_trainer/models/backbones/experimental/mixnet.py b/src/netspresso_trainer/models/backbones/experimental/mixnet.py index f962a9cea..00c08b565 100644 --- a/src/netspresso_trainer/models/backbones/experimental/mixnet.py +++ b/src/netspresso_trainer/models/backbones/experimental/mixnet.py @@ -6,6 +6,7 @@ from omegaconf import DictConfig from torch.nn import functional as F +from torchvision.ops.misc import SqueezeExcitation as SEBlock from collections import OrderedDict from ...op.registry import ACTIVATION_REGISTRY from ...op.custom import ConvLayer @@ -17,23 +18,6 @@ import math -# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # -# SEBlock: Squeeze & Excitation (SCSE) -# namely, Channel-wise Attention -# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # -class SEBlock(nn.Module): - def __init__(self, in_planes, reduced_dim, act_type="swish"): - super(SEBlock, self).__init__() - self.channel_se = nn.Sequential(OrderedDict([ - ("linear1", nn.Conv2d(in_planes, reduced_dim, kernel_size=1, stride=1, padding=0, bias=True)), - ("act", Swish() if act_type == "swish" else nn.ReLU()), - ("linear2", nn.Conv2d(reduced_dim, in_planes, kernel_size=1, stride=1, padding=0, bias=True)) - ])) - - def forward(self, x): - x_se = torch.sigmoid(self.channel_se(F.adaptive_avg_pool2d(x, output_size=(1, 1)))) - return torch.mul(x, x_se) - # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # # GPConv: Grouped Point-wise Convolution for MixDepthBlock # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # @@ -126,7 +110,7 @@ def __init__(self, in_planes, out_planes, # step 3. Squeeze and Excitation if self.use_se: reduced_dim = max(1, int(in_planes / reduction_ratio)) - self.se_block = SEBlock(hidden_dim, reduced_dim, act_type=act_type) + self.se_block = SEBlock(input_channels=hidden_dim, squeeze_channels=reduced_dim, activation=ACTIVATION_REGISTRY[act_type]) # step 4. Point-wise convolution phase self.point_wise = nn.Sequential(OrderedDict([ From 64fd38df50a747eb323cf5d4ae8035c6b240d82d Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Fri, 17 Nov 2023 10:58:11 +0900 Subject: [PATCH 122/167] Fix import order --- .../models/backbones/experimental/mixnet.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/netspresso_trainer/models/backbones/experimental/mixnet.py b/src/netspresso_trainer/models/backbones/experimental/mixnet.py index 00c08b565..64f105005 100644 --- a/src/netspresso_trainer/models/backbones/experimental/mixnet.py +++ b/src/netspresso_trainer/models/backbones/experimental/mixnet.py @@ -2,21 +2,21 @@ Based on the publicly available MixNet-PyTorch repository. https://github.com/romulus0914/MixNet-PyTorch/blob/master/mixnet.py """ -from typing import List, Dict, Optional +from collections import OrderedDict +import math +from typing import Dict, List, Optional from omegaconf import DictConfig +import torch +from torch import nn from torch.nn import functional as F from torchvision.ops.misc import SqueezeExcitation as SEBlock -from collections import OrderedDict + from ...op.registry import ACTIVATION_REGISTRY from ...op.custom import ConvLayer -Swish = ACTIVATION_REGISTRY['swish'] from ...utils import BackboneOutput -from torch import nn -import torch -import math - +Swish = ACTIVATION_REGISTRY['swish'] # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # # GPConv: Grouped Point-wise Convolution for MixDepthBlock From ca9ec6eb437b753692e6c266b76edf19fd862de6 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Fri, 17 Nov 2023 12:59:52 +0900 Subject: [PATCH 123/167] Fix MixNet to stage format --- .../model/mixnet/mixnet-s-classification.yaml | 28 +++--- .../models/backbones/experimental/mixnet.py | 88 +++++++++++-------- 2 files changed, 67 insertions(+), 49 deletions(-) diff --git a/config/model/mixnet/mixnet-s-classification.yaml b/config/model/mixnet/mixnet-s-classification.yaml index 6d15eb133..48b9f9829 100644 --- a/config/model/mixnet/mixnet-s-classification.yaml +++ b/config/model/mixnet/mixnet-s-classification.yaml @@ -14,20 +14,22 @@ model: depth_multi: 1.0 dropout_rate: 0. stage_params: - [ + - # t, c, n, k, ek, pk, s, d, a, se - [1, 16, 1, [3], [1], [1], 1, 1, "relu", ~], - [6, 24, 1, [3], [1, 1], [1, 1], 2, 1, "relu", ~], - [3, 24, 1, [3], [1, 1], [1, 1], 1, 1, "relu", ~], - [6, 40, 1, [3, 5, 7], [1], [1], 2, 1, "swish", 2], - [6, 40, 3, [3, 5], [1, 1], [1, 1], 1, 1, "swish", 2], - [6, 80, 1, [3, 5, 7], [1], [1, 1], 2, 1, "swish", 4], - [6, 80, 2, [3, 5], [1], [1, 1], 1, 1, "swish", 4], - [6, 120, 1, [3, 5, 7], [1, 1], [1, 1], 1, 1, "swish", 2], - [3, 120, 2, [3, 5, 7, 9], [1, 1], [1, 1], 1, 1, "swish", 2], - [6, 200, 1, [3, 5, 7, 9, 11], [1], [1], 2, 1, "swish", 2], - [6, 200, 2, [3, 5, 7, 9], [1], [1, 1], 1, 1, "swish", 2] - ] + - [1, 16, 1, [3], [1], [1], 1, 1, "relu", ~] + - [6, 24, 1, [3], [1, 1], [1, 1], 2, 1, "relu", ~] + - [3, 24, 1, [3], [1, 1], [1, 1], 1, 1, "relu", ~] + - + - [6, 40, 1, [3, 5, 7], [1], [1], 2, 1, "swish", 2] + - [6, 40, 3, [3, 5], [1, 1], [1, 1], 1, 1, "swish", 2] + - + - [6, 80, 1, [3, 5, 7], [1], [1, 1], 2, 1, "swish", 4] + - [6, 80, 2, [3, 5], [1], [1, 1], 1, 1, "swish", 4] + - [6, 120, 1, [3, 5, 7], [1, 1], [1, 1], 1, 1, "swish", 2] + - [3, 120, 2, [3, 5, 7, 9], [1, 1], [1, 1], 1, 1, "swish", 2] + - + - [6, 200, 1, [3, 5, 7, 9, 11], [1], [1], 2, 1, "swish", 2] + - [6, 200, 2, [3, 5, 7, 9], [1], [1, 1], 1, 1, "swish", 2] head: name: fc losses: diff --git a/src/netspresso_trainer/models/backbones/experimental/mixnet.py b/src/netspresso_trainer/models/backbones/experimental/mixnet.py index 64f105005..3d2cfb685 100644 --- a/src/netspresso_trainer/models/backbones/experimental/mixnet.py +++ b/src/netspresso_trainer/models/backbones/experimental/mixnet.py @@ -167,39 +167,51 @@ def __init__( in_channels = out_channels drop_rate = self.dropout_rate - mod_id = 0 - for t, c, n, k, ek, pk, s, d, a, se in settings: - out_channels = self._round_filters(c, width_multi) - repeats = self._round_repeats(n, depth_multi) - - if self.dropout_rate: - drop_rate = self.dropout_rate * float(mod_id + 1) / len(settings) - - # Create blocks for module - blocks = [] - for block_id in range(repeats): - stride = s if block_id == 0 else 1 - dilate = d if stride == 1 else 1 - - blocks.append(("block%d" % (block_id + 1), MixDepthBlock(in_channels, out_channels, - expand_ratio=t, exp_kernel_sizes=ek, - kernel_sizes=k, poi_kernel_sizes=pk, - stride=stride, dilate=dilate, - reduction_ratio=se, - dropout_rate=drop_rate, - act_type=a))) - - in_channels = out_channels - self.add_module("mod%d" % (mod_id + 2), nn.Sequential(OrderedDict(blocks))) - mod_id += 1 - - self.last_channels = 1536 - self.last_feat = ConvLayer(in_channels=in_channels, out_channels=self.last_channels, - kernel_size=1, stride=1, groups=1, dilation=1, act_type="relu") - + stages: List[nn.Module] = [] + for stg_idx, blocks in enumerate(stage_params): + + stage: List[nn.Module] = [] + for block in blocks: + t, c, n, k, ek, pk, s, d, a, se = block + out_channels = self._round_filters(c, width_multi) + repeats = self._round_repeats(n, depth_multi) + # Create blocks for module + blocks = [] + for block_id in range(repeats): + stride = s if block_id == 0 else 1 + dilate = d if stride == 1 else 1 + + stage.append(MixDepthBlock(in_channels, out_channels, + expand_ratio=t, exp_kernel_sizes=ek, + kernel_sizes=k, poi_kernel_sizes=pk, + stride=stride, dilate=dilate, + reduction_ratio=se, + dropout_rate=drop_rate, + act_type=a)) + + in_channels = out_channels + + # add last conv + if stg_idx == len(stage_params) - 1: + self.last_channels = 1536 + stage.append( + ConvLayer(in_channels=in_channels, + out_channels=self.last_channels, + kernel_size=1, + stride=1, + groups=1, + dilation=1, + act_type="relu") + ) + + stage = nn.Sequential(*stage) + stages.append(stage) + + self.stages = nn.ModuleList(stages) self.avgpool = nn.AdaptiveAvgPool2d(1) self._feature_dim = self.last_channels + self.use_intermediate_features = False self._initialize_weights() @@ -247,12 +259,16 @@ def intermediate_features_dim(self): return self._intermediate_features_dim def forward(self, x): - x = self.mod2(self.mod1(x)) # (N, C, H/2, W/2) - x = self.mod4(self.mod3(x)) # (N, C, H/4, W/4) - x = self.mod6(self.mod5(x)) # (N, C, H/8, W/8) - x = self.mod10(self.mod9(self.mod8(self.mod7(x)))) # (N, C, H/16, W/16) - x = self.mod12(self.mod11(x)) # (N, C, H/32, W/32) - x = self.last_feat(x) + x = self.mod1(x) + + all_hidden_states = () if self.use_intermediate_features else None + for stage in self.stages: + x = stage(x) + if self.use_intermediate_features: + all_hidden_states = all_hidden_states + (x, ) + + if self.use_intermediate_features: + return BackboneOutput(intermediate_features=all_hidden_states) x = self.avgpool(x) x = torch.flatten(x, 1) From 760fadb62cb27e820c4398d652b6fb1e787b3394 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Fri, 17 Nov 2023 13:31:55 +0900 Subject: [PATCH 124/167] Fix stage_params to keyword --- .../model/mixnet/mixnet-s-classification.yaml | 52 ++++++++++++++----- .../models/backbones/experimental/mixnet.py | 11 ++-- 2 files changed, 45 insertions(+), 18 deletions(-) diff --git a/config/model/mixnet/mixnet-s-classification.yaml b/config/model/mixnet/mixnet-s-classification.yaml index 48b9f9829..99204f91d 100644 --- a/config/model/mixnet/mixnet-s-classification.yaml +++ b/config/model/mixnet/mixnet-s-classification.yaml @@ -15,21 +15,49 @@ model: dropout_rate: 0. stage_params: - - # t, c, n, k, ek, pk, s, d, a, se - - [1, 16, 1, [3], [1], [1], 1, 1, "relu", ~] - - [6, 24, 1, [3], [1, 1], [1, 1], 2, 1, "relu", ~] - - [3, 24, 1, [3], [1, 1], [1, 1], 1, 1, "relu", ~] + expand_ratio: [1, 6, 3] + out_channels: [16, 24, 24] + num_blocks: [1, 1, 1] + kernel_sizes: [[3], [3], [3]] + exp_kernel_sizes: [[1], [1, 1], [1, 1]] + poi_kernel_sizes: [[1], [1, 1], [1, 1]] + stride: [1, 2, 1] + dilation: [1, 1, 1] + act_type: ["relu", "relu", "relu"] + se_reduction_ratio: [~, ~, ~] - - - [6, 40, 1, [3, 5, 7], [1], [1], 2, 1, "swish", 2] - - [6, 40, 3, [3, 5], [1, 1], [1, 1], 1, 1, "swish", 2] + expand_ratio: [6, 6] + out_channels: [40, 40] + num_blocks: [1, 3] + kernel_sizes: [[3, 5, 7], [3, 5]] + exp_kernel_sizes: [[1], [1, 1]] + poi_kernel_sizes: [[1], [1, 1]] + stride: [2, 1] + dilation: [1, 1] + act_type: ["swish", "swish"] + se_reduction_ratio: [2, 2] - - - [6, 80, 1, [3, 5, 7], [1], [1, 1], 2, 1, "swish", 4] - - [6, 80, 2, [3, 5], [1], [1, 1], 1, 1, "swish", 4] - - [6, 120, 1, [3, 5, 7], [1, 1], [1, 1], 1, 1, "swish", 2] - - [3, 120, 2, [3, 5, 7, 9], [1, 1], [1, 1], 1, 1, "swish", 2] + expand_ratio: [6, 6, 6, 3] + out_channels: [80, 80, 120, 120] + num_blocks: [1, 2, 1, 2] + kernel_sizes: [[3, 5, 7], [3, 5], [3, 5, 7], [3, 5, 7, 9]] + exp_kernel_sizes: [[1], [1], [1, 1], [1, 1]] + poi_kernel_sizes: [[1, 1], [1, 1], [1, 1], [1, 1]] + stride: [2, 1, 1, 1] + dilation: [1, 1, 1, 1] + act_type: ["swish", "swish", "swish", "swish"] + se_reduction_ratio: [4, 4, 2, 2] - - - [6, 200, 1, [3, 5, 7, 9, 11], [1], [1], 2, 1, "swish", 2] - - [6, 200, 2, [3, 5, 7, 9], [1], [1, 1], 1, 1, "swish", 2] + expand_ratio: [6, 6] + out_channels: [200, 200] + num_blocks: [1, 2] + kernel_sizes: [[3, 5, 7, 9, 11], [3, 5, 7, 9]] + exp_kernel_sizes: [[1], [1]] + poi_kernel_sizes: [[1], [1, 1]] + stride: [2, 1] + dilation: [1, 1] + act_type: ["swish", "swish"] + se_reduction_ratio: [2, 2] head: name: fc losses: diff --git a/src/netspresso_trainer/models/backbones/experimental/mixnet.py b/src/netspresso_trainer/models/backbones/experimental/mixnet.py index 3d2cfb685..9f57ed44f 100644 --- a/src/netspresso_trainer/models/backbones/experimental/mixnet.py +++ b/src/netspresso_trainer/models/backbones/experimental/mixnet.py @@ -159,8 +159,6 @@ def __init__( depth_multi = params.depth_multi self.dropout_rate = params.dropout_rate - settings = stage_params - out_channels = self._round_filters(stem_planes, width_multi) self.mod1 = ConvLayer(in_channels=3, out_channels=out_channels, kernel_size=3, stride=2, groups=1, dilation=1, act_type="relu") @@ -168,15 +166,16 @@ def __init__( in_channels = out_channels drop_rate = self.dropout_rate stages: List[nn.Module] = [] - for stg_idx, blocks in enumerate(stage_params): + for stg_idx, stage_info in enumerate(stage_params): stage: List[nn.Module] = [] - for block in blocks: + for block in zip(stage_info.expand_ratio, stage_info.out_channels, stage_info.num_blocks, + stage_info.kernel_sizes, stage_info.exp_kernel_sizes, stage_info.poi_kernel_sizes, + stage_info.stride, stage_info.dilation, stage_info.act_type, stage_info.se_reduction_ratio): t, c, n, k, ek, pk, s, d, a, se = block out_channels = self._round_filters(c, width_multi) repeats = self._round_repeats(n, depth_multi) - # Create blocks for module - blocks = [] + for block_id in range(repeats): stride = s if block_id == 0 else 1 dilate = d if stride == 1 else 1 From 9e79899a2553dcfc2e62ad83d6eca2dac6be0dcd Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Fri, 17 Nov 2023 13:54:20 +0900 Subject: [PATCH 125/167] Fix activation to get from registry --- .../models/backbones/experimental/mixnet.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/netspresso_trainer/models/backbones/experimental/mixnet.py b/src/netspresso_trainer/models/backbones/experimental/mixnet.py index 9f57ed44f..3600e5412 100644 --- a/src/netspresso_trainer/models/backbones/experimental/mixnet.py +++ b/src/netspresso_trainer/models/backbones/experimental/mixnet.py @@ -16,7 +16,6 @@ from ...op.custom import ConvLayer from ...utils import BackboneOutput -Swish = ACTIVATION_REGISTRY['swish'] # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # # GPConv: Grouped Point-wise Convolution for MixDepthBlock @@ -97,14 +96,14 @@ def __init__(self, in_planes, out_planes, self.expansion = nn.Sequential(OrderedDict([ ("conv", GPConv(in_planes, hidden_dim, kernel_sizes=exp_kernel_sizes)), ("norm", nn.BatchNorm2d(hidden_dim, eps=1e-3, momentum=0.01)), - ("act", Swish() if act_type == "swish" else nn.ReLU()) + ("act", ACTIVATION_REGISTRY[act_type]()) ])) # step 2. Depth-wise convolution phase self.depth_wise = nn.Sequential(OrderedDict([ ("conv", MDConv(hidden_dim, kernel_sizes=kernel_sizes, stride=stride, dilate=dilate)), ("norm", nn.BatchNorm2d(hidden_dim, eps=1e-3, momentum=0.01)), - ("act", Swish() if act_type == "swish" else nn.ReLU()) + ("act", ACTIVATION_REGISTRY[act_type]()) ])) # step 3. Squeeze and Excitation From 11d3898f9a5a5734ec4463555e571c4a2656ee29 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Fri, 17 Nov 2023 16:17:07 +0900 Subject: [PATCH 126/167] Enable to produce intermediate features --- .../models/backbones/experimental/mixnet.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/netspresso_trainer/models/backbones/experimental/mixnet.py b/src/netspresso_trainer/models/backbones/experimental/mixnet.py index 3600e5412..ba4c57b6a 100644 --- a/src/netspresso_trainer/models/backbones/experimental/mixnet.py +++ b/src/netspresso_trainer/models/backbones/experimental/mixnet.py @@ -82,6 +82,7 @@ def __init__(self, in_planes, out_planes, super(MixDepthBlock, self).__init__() self.dropout_rate = dropout_rate self.expand_ratio = expand_ratio + self.out_channels = out_planes self.groups = len(kernel_sizes) self.use_se = (reduction_ratio is not None) and (reduction_ratio > 1) @@ -152,6 +153,8 @@ def __init__( stage_params: Optional[List] = None, ): super(MixNet, self).__init__() + self.task = task.lower() + self.use_intermediate_features = self.task in ['segmentation', 'detection'] stem_planes = params.stem_planes width_multi = params.width_multi @@ -209,7 +212,8 @@ def __init__( self.avgpool = nn.AdaptiveAvgPool2d(1) self._feature_dim = self.last_channels - self.use_intermediate_features = False + self._intermediate_features_dim = [s[-1].out_channels for s in self.stages[:-1]] + self._intermediate_features_dim += [self.last_channels] self._initialize_weights() From 1cec4ce8b4d440ac84b498f1284a0fcd0c79d818 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Fri, 17 Nov 2023 16:37:16 +0900 Subject: [PATCH 127/167] Add mixnet model configs --- .../model/mixnet/mixnet-l-classification.yaml | 66 +++++++++++++++++++ .../model/mixnet/mixnet-l-segmentation.yaml | 66 +++++++++++++++++++ .../model/mixnet/mixnet-m-classification.yaml | 66 +++++++++++++++++++ .../model/mixnet/mixnet-m-segmentation.yaml | 66 +++++++++++++++++++ .../model/mixnet/mixnet-s-segmentation.yaml | 66 +++++++++++++++++++ 5 files changed, 330 insertions(+) create mode 100644 config/model/mixnet/mixnet-l-classification.yaml create mode 100644 config/model/mixnet/mixnet-l-segmentation.yaml create mode 100644 config/model/mixnet/mixnet-m-classification.yaml create mode 100644 config/model/mixnet/mixnet-m-segmentation.yaml create mode 100644 config/model/mixnet/mixnet-s-segmentation.yaml diff --git a/config/model/mixnet/mixnet-l-classification.yaml b/config/model/mixnet/mixnet-l-classification.yaml new file mode 100644 index 000000000..2ebc0e500 --- /dev/null +++ b/config/model/mixnet/mixnet-l-classification.yaml @@ -0,0 +1,66 @@ +model: + task: classification + checkpoint: ./weights/mixnet/mixnet_l.pth + fx_model_checkpoint: ~ + resume_optimizer_checkpoint: ~ + freeze_backbone: False + architecture: + full: ~ # auto + backbone: + name: mixnet + params: + stem_planes: 24 + width_multi: 1.3 + depth_multi: 1.0 + dropout_rate: 0. + stage_params: + - + expand_ratio: [1, 6, 3] + out_channels: [24, 32, 32] + num_blocks: [1, 1, 1] + kernel_sizes: [[3], [3, 5, 7], [3]] + exp_kernel_sizes: [[1], [1, 1], [1, 1]] + poi_kernel_sizes: [[1], [1, 1], [1, 1]] + stride: [1, 2, 1] + dilation: [1, 1, 1] + act_type: ["relu", "relu", "relu"] + se_reduction_ratio: [~, ~, ~] + - + expand_ratio: [6, 6] + out_channels: [40, 40] + num_blocks: [1, 3] + kernel_sizes: [[3, 5, 7, 9], [3, 5]] + exp_kernel_sizes: [[1], [1, 1]] + poi_kernel_sizes: [[1], [1, 1]] + stride: [2, 1] + dilation: [1, 1] + act_type: ["swish", "swish"] + se_reduction_ratio: [2, 2] + - + expand_ratio: [6, 6, 6, 3] + out_channels: [80, 80, 120, 120] + num_blocks: [1, 3, 1, 3] + kernel_sizes: [[3, 5, 7], [3, 5, 7, 9], [3], [3, 5, 7, 9]] + exp_kernel_sizes: [[1], [1, 1], [1], [1, 1]] + poi_kernel_sizes: [[1], [1, 1], [1], [1, 1]] + stride: [2, 1, 1, 1] + dilation: [1, 1, 1, 1] + act_type: ["swish", "swish", "swish", "swish"] + se_reduction_ratio: [4, 4, 2, 2] + - + expand_ratio: [6, 6] + out_channels: [200, 200] + num_blocks: [1, 3] + kernel_sizes: [[3, 5, 7, 9], [3, 5, 7, 9]] + exp_kernel_sizes: [[1], [1]] + poi_kernel_sizes: [[1], [1, 1]] + stride: [2, 1] + dilation: [1, 1] + act_type: ["swish", "swish"] + se_reduction_ratio: [2, 2] + head: + name: fc + losses: + - criterion: cross_entropy + label_smoothing: 0.1 + weight: ~ \ No newline at end of file diff --git a/config/model/mixnet/mixnet-l-segmentation.yaml b/config/model/mixnet/mixnet-l-segmentation.yaml new file mode 100644 index 000000000..8d01b59f8 --- /dev/null +++ b/config/model/mixnet/mixnet-l-segmentation.yaml @@ -0,0 +1,66 @@ +model: + task: segmentation + checkpoint: ./weights/mixnet/mixnet_l.pth + fx_model_checkpoint: ~ + resume_optimizer_checkpoint: ~ + freeze_backbone: False + architecture: + full: ~ # auto + backbone: + name: mixnet + params: + stem_planes: 24 + width_multi: 1.3 + depth_multi: 1.0 + dropout_rate: 0. + stage_params: + - + expand_ratio: [1, 6, 3] + out_channels: [24, 32, 32] + num_blocks: [1, 1, 1] + kernel_sizes: [[3], [3, 5, 7], [3]] + exp_kernel_sizes: [[1], [1, 1], [1, 1]] + poi_kernel_sizes: [[1], [1, 1], [1, 1]] + stride: [1, 2, 1] + dilation: [1, 1, 1] + act_type: ["relu", "relu", "relu"] + se_reduction_ratio: [~, ~, ~] + - + expand_ratio: [6, 6] + out_channels: [40, 40] + num_blocks: [1, 3] + kernel_sizes: [[3, 5, 7, 9], [3, 5]] + exp_kernel_sizes: [[1], [1, 1]] + poi_kernel_sizes: [[1], [1, 1]] + stride: [2, 1] + dilation: [1, 1] + act_type: ["swish", "swish"] + se_reduction_ratio: [2, 2] + - + expand_ratio: [6, 6, 6, 3] + out_channels: [80, 80, 120, 120] + num_blocks: [1, 3, 1, 3] + kernel_sizes: [[3, 5, 7], [3, 5, 7, 9], [3], [3, 5, 7, 9]] + exp_kernel_sizes: [[1], [1, 1], [1], [1, 1]] + poi_kernel_sizes: [[1], [1, 1], [1], [1, 1]] + stride: [2, 1, 1, 1] + dilation: [1, 1, 1, 1] + act_type: ["swish", "swish", "swish", "swish"] + se_reduction_ratio: [4, 4, 2, 2] + - + expand_ratio: [6, 6] + out_channels: [200, 200] + num_blocks: [1, 3] + kernel_sizes: [[3, 5, 7, 9], [3, 5, 7, 9]] + exp_kernel_sizes: [[1], [1]] + poi_kernel_sizes: [[1], [1, 1]] + stride: [2, 1] + dilation: [1, 1] + act_type: ["swish", "swish"] + se_reduction_ratio: [2, 2] + head: + name: all_mlp_decoder + losses: + - criterion: cross_entropy + weight: ~ + ignore_index: 255 \ No newline at end of file diff --git a/config/model/mixnet/mixnet-m-classification.yaml b/config/model/mixnet/mixnet-m-classification.yaml new file mode 100644 index 000000000..9f9e495d2 --- /dev/null +++ b/config/model/mixnet/mixnet-m-classification.yaml @@ -0,0 +1,66 @@ +model: + task: classification + checkpoint: ./weights/mixnet/mixnet_m.pth + fx_model_checkpoint: ~ + resume_optimizer_checkpoint: ~ + freeze_backbone: False + architecture: + full: ~ # auto + backbone: + name: mixnet + params: + stem_planes: 24 + width_multi: 1.0 + depth_multi: 1.0 + dropout_rate: 0. + stage_params: + - + expand_ratio: [1, 6, 3] + out_channels: [24, 32, 32] + num_blocks: [1, 1, 1] + kernel_sizes: [[3], [3, 5, 7], [3]] + exp_kernel_sizes: [[1], [1, 1], [1, 1]] + poi_kernel_sizes: [[1], [1, 1], [1, 1]] + stride: [1, 2, 1] + dilation: [1, 1, 1] + act_type: ["relu", "relu", "relu"] + se_reduction_ratio: [~, ~, ~] + - + expand_ratio: [6, 6] + out_channels: [40, 40] + num_blocks: [1, 3] + kernel_sizes: [[3, 5, 7, 9], [3, 5]] + exp_kernel_sizes: [[1], [1, 1]] + poi_kernel_sizes: [[1], [1, 1]] + stride: [2, 1] + dilation: [1, 1] + act_type: ["swish", "swish"] + se_reduction_ratio: [2, 2] + - + expand_ratio: [6, 6, 6, 3] + out_channels: [80, 80, 120, 120] + num_blocks: [1, 3, 1, 3] + kernel_sizes: [[3, 5, 7], [3, 5, 7, 9], [3], [3, 5, 7, 9]] + exp_kernel_sizes: [[1], [1, 1], [1], [1, 1]] + poi_kernel_sizes: [[1], [1, 1], [1], [1, 1]] + stride: [2, 1, 1, 1] + dilation: [1, 1, 1, 1] + act_type: ["swish", "swish", "swish", "swish"] + se_reduction_ratio: [4, 4, 2, 2] + - + expand_ratio: [6, 6] + out_channels: [200, 200] + num_blocks: [1, 3] + kernel_sizes: [[3, 5, 7, 9], [3, 5, 7, 9]] + exp_kernel_sizes: [[1], [1]] + poi_kernel_sizes: [[1], [1, 1]] + stride: [2, 1] + dilation: [1, 1] + act_type: ["swish", "swish"] + se_reduction_ratio: [2, 2] + head: + name: fc + losses: + - criterion: cross_entropy + label_smoothing: 0.1 + weight: ~ \ No newline at end of file diff --git a/config/model/mixnet/mixnet-m-segmentation.yaml b/config/model/mixnet/mixnet-m-segmentation.yaml new file mode 100644 index 000000000..adad16bd6 --- /dev/null +++ b/config/model/mixnet/mixnet-m-segmentation.yaml @@ -0,0 +1,66 @@ +model: + task: segmentation + checkpoint: ./weights/mixnet/mixnet_m.pth + fx_model_checkpoint: ~ + resume_optimizer_checkpoint: ~ + freeze_backbone: False + architecture: + full: ~ # auto + backbone: + name: mixnet + params: + stem_planes: 24 + width_multi: 1.0 + depth_multi: 1.0 + dropout_rate: 0. + stage_params: + - + expand_ratio: [1, 6, 3] + out_channels: [24, 32, 32] + num_blocks: [1, 1, 1] + kernel_sizes: [[3], [3, 5, 7], [3]] + exp_kernel_sizes: [[1], [1, 1], [1, 1]] + poi_kernel_sizes: [[1], [1, 1], [1, 1]] + stride: [1, 2, 1] + dilation: [1, 1, 1] + act_type: ["relu", "relu", "relu"] + se_reduction_ratio: [~, ~, ~] + - + expand_ratio: [6, 6] + out_channels: [40, 40] + num_blocks: [1, 3] + kernel_sizes: [[3, 5, 7, 9], [3, 5]] + exp_kernel_sizes: [[1], [1, 1]] + poi_kernel_sizes: [[1], [1, 1]] + stride: [2, 1] + dilation: [1, 1] + act_type: ["swish", "swish"] + se_reduction_ratio: [2, 2] + - + expand_ratio: [6, 6, 6, 3] + out_channels: [80, 80, 120, 120] + num_blocks: [1, 3, 1, 3] + kernel_sizes: [[3, 5, 7], [3, 5, 7, 9], [3], [3, 5, 7, 9]] + exp_kernel_sizes: [[1], [1, 1], [1], [1, 1]] + poi_kernel_sizes: [[1], [1, 1], [1], [1, 1]] + stride: [2, 1, 1, 1] + dilation: [1, 1, 1, 1] + act_type: ["swish", "swish", "swish", "swish"] + se_reduction_ratio: [4, 4, 2, 2] + - + expand_ratio: [6, 6] + out_channels: [200, 200] + num_blocks: [1, 3] + kernel_sizes: [[3, 5, 7, 9], [3, 5, 7, 9]] + exp_kernel_sizes: [[1], [1]] + poi_kernel_sizes: [[1], [1, 1]] + stride: [2, 1] + dilation: [1, 1] + act_type: ["swish", "swish"] + se_reduction_ratio: [2, 2] + head: + name: all_mlp_decoder + losses: + - criterion: cross_entropy + weight: ~ + ignore_index: 255 \ No newline at end of file diff --git a/config/model/mixnet/mixnet-s-segmentation.yaml b/config/model/mixnet/mixnet-s-segmentation.yaml new file mode 100644 index 000000000..506a19ae4 --- /dev/null +++ b/config/model/mixnet/mixnet-s-segmentation.yaml @@ -0,0 +1,66 @@ +model: + task: segmentation + checkpoint: ./weights/mixnet/mixnet_s.pth + fx_model_checkpoint: ~ + resume_optimizer_checkpoint: ~ + freeze_backbone: False + architecture: + full: ~ # auto + backbone: + name: mixnet + params: + stem_planes: 16 + width_multi: 1.0 + depth_multi: 1.0 + dropout_rate: 0. + stage_params: + - + expand_ratio: [1, 6, 3] + out_channels: [16, 24, 24] + num_blocks: [1, 1, 1] + kernel_sizes: [[3], [3], [3]] + exp_kernel_sizes: [[1], [1, 1], [1, 1]] + poi_kernel_sizes: [[1], [1, 1], [1, 1]] + stride: [1, 2, 1] + dilation: [1, 1, 1] + act_type: ["relu", "relu", "relu"] + se_reduction_ratio: [~, ~, ~] + - + expand_ratio: [6, 6] + out_channels: [40, 40] + num_blocks: [1, 3] + kernel_sizes: [[3, 5, 7], [3, 5]] + exp_kernel_sizes: [[1], [1, 1]] + poi_kernel_sizes: [[1], [1, 1]] + stride: [2, 1] + dilation: [1, 1] + act_type: ["swish", "swish"] + se_reduction_ratio: [2, 2] + - + expand_ratio: [6, 6, 6, 3] + out_channels: [80, 80, 120, 120] + num_blocks: [1, 2, 1, 2] + kernel_sizes: [[3, 5, 7], [3, 5], [3, 5, 7], [3, 5, 7, 9]] + exp_kernel_sizes: [[1], [1], [1, 1], [1, 1]] + poi_kernel_sizes: [[1, 1], [1, 1], [1, 1], [1, 1]] + stride: [2, 1, 1, 1] + dilation: [1, 1, 1, 1] + act_type: ["swish", "swish", "swish", "swish"] + se_reduction_ratio: [4, 4, 2, 2] + - + expand_ratio: [6, 6] + out_channels: [200, 200] + num_blocks: [1, 2] + kernel_sizes: [[3, 5, 7, 9, 11], [3, 5, 7, 9]] + exp_kernel_sizes: [[1], [1]] + poi_kernel_sizes: [[1], [1, 1]] + stride: [2, 1] + dilation: [1, 1] + act_type: ["swish", "swish"] + se_reduction_ratio: [2, 2] + head: + name: all_mlp_decoder + losses: + - criterion: cross_entropy + weight: ~ + ignore_index: 255 \ No newline at end of file From f1f98a3b575274ea8fcbe295defcc1803138b593 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Fri, 17 Nov 2023 16:42:39 +0900 Subject: [PATCH 128/167] Ruff fix --- src/netspresso_trainer/models/backbones/__init__.py | 2 +- src/netspresso_trainer/models/registry.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/netspresso_trainer/models/backbones/__init__.py b/src/netspresso_trainer/models/backbones/__init__.py index 8d921cb91..363ff9e1d 100644 --- a/src/netspresso_trainer/models/backbones/__init__.py +++ b/src/netspresso_trainer/models/backbones/__init__.py @@ -1,9 +1,9 @@ # from .core import * from .experimental.darknet import cspdarknet from .experimental.efficientformer import efficientformer +from .experimental.mixnet import mixnet from .experimental.mobilenetv3 import mobilenetv3_small from .experimental.mobilevit import mobilevit from .experimental.resnet import resnet50 from .experimental.segformer import segformer from .experimental.vit import vit -from .experimental.mixnet import mixnet diff --git a/src/netspresso_trainer/models/registry.py b/src/netspresso_trainer/models/registry.py index f047af25f..9ae83fa22 100644 --- a/src/netspresso_trainer/models/registry.py +++ b/src/netspresso_trainer/models/registry.py @@ -3,7 +3,7 @@ import torch.nn as nn -from .backbones import cspdarknet, efficientformer, mobilenetv3_small, mobilevit, resnet50, segformer, vit, mixnet +from .backbones import cspdarknet, efficientformer, mixnet, mobilenetv3_small, mobilevit, resnet50, segformer, vit from .full import pidnet from .heads.classification import fc from .heads.detection import faster_rcnn, yolox_head From 171a94bbd0c8355b4cd5fe52e9d5e3e5940b5608 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Fri, 17 Nov 2023 16:43:18 +0900 Subject: [PATCH 129/167] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ffb9cc0d..b8ba940e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ - Add a gpu option in `train_with_config` (only single-GPU supported) by `@deepkyu` in [PR 219](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/219) - Support augmentation for classification task: cutmix, mixup by `@illian01` in [PR 221](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/221) +- Add model: MixNet by `@illian01` in [PR 229](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/229) ## Bug Fixes: From 0c5af46f1431fdc244ab16475f5a7dbe09b945bb Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 20 Nov 2023 10:49:49 +0900 Subject: [PATCH 130/167] Init neck module --- src/netspresso_trainer/models/necks/__init__.py | 0 src/netspresso_trainer/models/necks/core/.gitkeep | 0 src/netspresso_trainer/models/necks/core/__init__.py | 0 src/netspresso_trainer/models/necks/experimental/__init__.py | 0 4 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/netspresso_trainer/models/necks/__init__.py create mode 100644 src/netspresso_trainer/models/necks/core/.gitkeep create mode 100644 src/netspresso_trainer/models/necks/core/__init__.py create mode 100644 src/netspresso_trainer/models/necks/experimental/__init__.py diff --git a/src/netspresso_trainer/models/necks/__init__.py b/src/netspresso_trainer/models/necks/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/netspresso_trainer/models/necks/core/.gitkeep b/src/netspresso_trainer/models/necks/core/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/src/netspresso_trainer/models/necks/core/__init__.py b/src/netspresso_trainer/models/necks/core/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/netspresso_trainer/models/necks/experimental/__init__.py b/src/netspresso_trainer/models/necks/experimental/__init__.py new file mode 100644 index 000000000..e69de29bb From 98c1bf965024ee8ef7b71f84ec2499df214a1d40 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 20 Nov 2023 11:29:54 +0900 Subject: [PATCH 131/167] Move fpn to fpn module --- .../heads/detection/experimental/fpn.py | 142 --------------- .../models/necks/__init__.py | 1 + .../models/necks/experimental/fpn.py | 166 ++++++++++++++++++ src/netspresso_trainer/models/registry.py | 5 + 4 files changed, 172 insertions(+), 142 deletions(-) create mode 100644 src/netspresso_trainer/models/necks/experimental/fpn.py diff --git a/src/netspresso_trainer/models/heads/detection/experimental/fpn.py b/src/netspresso_trainer/models/heads/detection/experimental/fpn.py index 27de479c3..4567457e2 100644 --- a/src/netspresso_trainer/models/heads/detection/experimental/fpn.py +++ b/src/netspresso_trainer/models/heads/detection/experimental/fpn.py @@ -5,148 +5,6 @@ from ....op.custom import ConvLayer, CSPLayer -class FPN(nn.Module): - - def __init__(self, - in_channels, - out_channels, - num_outs, - start_level=0, - end_level=-1, - add_extra_convs=False, - relu_before_extra_convs=False, - no_norm_on_lateral=False, - conv_cfg=None, - norm_cfg=None, - act_cfg=None, - upsample_cfg=None, - init_cfg=None): - if init_cfg is None: - init_cfg = {'type': 'Xavier', 'layer': 'Conv2d', 'distribution': 'uniform'} - if upsample_cfg is None: - upsample_cfg = {'mode': 'nearest'} - super(FPN, self).__init__() - assert isinstance(in_channels, list) - self.in_channels = in_channels - self.out_channels = out_channels - self.num_ins = len(in_channels) - self.num_outs = num_outs - self.relu_before_extra_convs = relu_before_extra_convs - self.no_norm_on_lateral = no_norm_on_lateral - self.fp16_enabled = False - self.upsample_cfg = upsample_cfg.copy() - - if end_level == -1 or end_level == self.num_ins - 1: - self.backbone_end_level = self.num_ins - assert num_outs >= self.num_ins - start_level - else: - # if end_level is not the last level, no extra level is allowed - self.backbone_end_level = end_level + 1 - assert end_level < self.num_ins - assert num_outs == end_level - start_level + 1 - self.start_level = start_level - self.end_level = end_level - self.add_extra_convs = add_extra_convs - assert isinstance(add_extra_convs, (str, bool)) - if isinstance(add_extra_convs, str): - # Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output' - assert add_extra_convs in ('on_input', 'on_lateral', 'on_output') - elif add_extra_convs: # True - self.add_extra_convs = 'on_input' - - self.lateral_convs = nn.ModuleList() - self.fpn_convs = nn.ModuleList() - - for i in range(self.start_level, self.backbone_end_level): - l_conv = nn.Conv2d(in_channels[i], out_channels, kernel_size=1, stride=1, padding=0) - # ConvModule( - # in_channels[i], - # out_channels, - # 1, - # conv_cfg=conv_cfg, - # norm_cfg=norm_cfg if not self.no_norm_on_lateral else None, - # act_cfg=act_cfg, - # inplace=False) - fpn_conv = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1) - # ConvModule( - # out_channels, - # out_channels, - # 3, - # padding=1, - # conv_cfg=conv_cfg, - # norm_cfg=norm_cfg, - # act_cfg=act_cfg, - # inplace=False) - - self.lateral_convs.append(l_conv) - self.fpn_convs.append(fpn_conv) - - # add extra conv layers (e.g., RetinaNet) - extra_levels = num_outs - self.backbone_end_level + self.start_level - if self.add_extra_convs and extra_levels >= 1: - for i in range(extra_levels): - if i == 0 and self.add_extra_convs == 'on_input': - in_channels = self.in_channels[self.backbone_end_level - 1] - else: - in_channels = out_channels - extra_fpn_conv = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=2, padding=1) - self.fpn_convs.append(extra_fpn_conv) - - def forward(self, inputs): - """Forward function.""" - assert len(inputs) == len(self.in_channels) - - # build laterals - laterals = [ - lateral_conv(inputs[i + self.start_level]) - for i, lateral_conv in enumerate(self.lateral_convs) - ] - - # build top-down path - used_backbone_levels = len(laterals) - for i in range(used_backbone_levels - 1, 0, -1): - # In some cases, fixing `scale factor` (e.g. 2) is preferred, but - # it cannot co-exist with `size` in `F.interpolate`. - if 'scale_factor' in self.upsample_cfg: - # fix runtime error of "+=" inplace operation in PyTorch 1.10 - laterals[i - 1] = laterals[i - 1] + F.interpolate( - laterals[i], **self.upsample_cfg) - else: - prev_shape = laterals[i - 1].shape[2:] - laterals[i - 1] = laterals[i - 1] + F.interpolate( - laterals[i], size=prev_shape, **self.upsample_cfg) - - # build outputs - # part 1: from original levels - outs = [ - self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels) - ] - # part 2: add extra levels - if self.num_outs > len(outs): - # use max pool to get more levels on top of outputs - # (e.g., Faster R-CNN, Mask R-CNN) - if not self.add_extra_convs: - for i in range(self.num_outs - used_backbone_levels): - outs.append(F.max_pool2d(outs[-1], 1, stride=2)) - # add conv layers on top of original feature maps (RetinaNet) - else: - if self.add_extra_convs == 'on_input': - extra_source = inputs[self.backbone_end_level - 1] - elif self.add_extra_convs == 'on_lateral': - extra_source = laterals[-1] - elif self.add_extra_convs == 'on_output': - extra_source = outs[-1] - else: - raise NotImplementedError - outs.append(self.fpn_convs[used_backbone_levels](extra_source)) - for i in range(used_backbone_levels + 1, self.num_outs): - if self.relu_before_extra_convs: - outs.append(self.fpn_convs[i](F.relu(outs[-1]))) - else: - outs.append(self.fpn_convs[i](outs[-1])) - return outs - - class PAFPN(nn.Module): """ YOLOv3 model. Darknet 53 is the default backbone of this model. diff --git a/src/netspresso_trainer/models/necks/__init__.py b/src/netspresso_trainer/models/necks/__init__.py index e69de29bb..81ae6c34b 100644 --- a/src/netspresso_trainer/models/necks/__init__.py +++ b/src/netspresso_trainer/models/necks/__init__.py @@ -0,0 +1 @@ +from .experimental.fpn import fpn \ No newline at end of file diff --git a/src/netspresso_trainer/models/necks/experimental/fpn.py b/src/netspresso_trainer/models/necks/experimental/fpn.py new file mode 100644 index 000000000..e0886c813 --- /dev/null +++ b/src/netspresso_trainer/models/necks/experimental/fpn.py @@ -0,0 +1,166 @@ +import torch.nn as nn +import torch.nn.functional as F + +from ...utils import BackboneOutput + + +class FPN(nn.Module): + + def __init__(self, + in_channels, + out_channels, + num_outs, + start_level=0, + end_level=-1, + add_extra_convs=False, + relu_before_extra_convs=False, + no_norm_on_lateral=False, + conv_cfg=None, + norm_cfg=None, + act_cfg=None, + upsample_cfg=None, + init_cfg=None): + if init_cfg is None: + init_cfg = {'type': 'Xavier', 'layer': 'Conv2d', 'distribution': 'uniform'} + if upsample_cfg is None: + upsample_cfg = {'mode': 'nearest'} + super(FPN, self).__init__() + assert isinstance(in_channels, list) + self.in_channels = in_channels + self.out_channels = out_channels + self.num_ins = len(in_channels) + self.num_outs = num_outs + self.relu_before_extra_convs = relu_before_extra_convs + self.no_norm_on_lateral = no_norm_on_lateral + self.fp16_enabled = False + self.upsample_cfg = upsample_cfg.copy() + + if end_level == -1 or end_level == self.num_ins - 1: + self.backbone_end_level = self.num_ins + assert num_outs >= self.num_ins - start_level + else: + # if end_level is not the last level, no extra level is allowed + self.backbone_end_level = end_level + 1 + assert end_level < self.num_ins + assert num_outs == end_level - start_level + 1 + self.start_level = start_level + self.end_level = end_level + self.add_extra_convs = add_extra_convs + assert isinstance(add_extra_convs, (str, bool)) + if isinstance(add_extra_convs, str): + # Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output' + assert add_extra_convs in ('on_input', 'on_lateral', 'on_output') + elif add_extra_convs: # True + self.add_extra_convs = 'on_input' + + self.lateral_convs = nn.ModuleList() + self.fpn_convs = nn.ModuleList() + + for i in range(self.start_level, self.backbone_end_level): + l_conv = nn.Conv2d(in_channels[i], out_channels, kernel_size=1, stride=1, padding=0) + # ConvModule( + # in_channels[i], + # out_channels, + # 1, + # conv_cfg=conv_cfg, + # norm_cfg=norm_cfg if not self.no_norm_on_lateral else None, + # act_cfg=act_cfg, + # inplace=False) + fpn_conv = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1) + # ConvModule( + # out_channels, + # out_channels, + # 3, + # padding=1, + # conv_cfg=conv_cfg, + # norm_cfg=norm_cfg, + # act_cfg=act_cfg, + # inplace=False) + + self.lateral_convs.append(l_conv) + self.fpn_convs.append(fpn_conv) + + # add extra conv layers (e.g., RetinaNet) + extra_levels = num_outs - self.backbone_end_level + self.start_level + if self.add_extra_convs and extra_levels >= 1: + for i in range(extra_levels): + if i == 0 and self.add_extra_convs == 'on_input': + in_channels = self.in_channels[self.backbone_end_level - 1] + else: + in_channels = out_channels + extra_fpn_conv = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=2, padding=1) + self.fpn_convs.append(extra_fpn_conv) + + def forward(self, inputs): + """Forward function.""" + assert len(inputs) == len(self.in_channels) + + # build laterals + laterals = [ + lateral_conv(inputs[i + self.start_level]) + for i, lateral_conv in enumerate(self.lateral_convs) + ] + + # build top-down path + used_backbone_levels = len(laterals) + for i in range(used_backbone_levels - 1, 0, -1): + # In some cases, fixing `scale factor` (e.g. 2) is preferred, but + # it cannot co-exist with `size` in `F.interpolate`. + if 'scale_factor' in self.upsample_cfg: + # fix runtime error of "+=" inplace operation in PyTorch 1.10 + laterals[i - 1] = laterals[i - 1] + F.interpolate( + laterals[i], **self.upsample_cfg) + else: + prev_shape = laterals[i - 1].shape[2:] + laterals[i - 1] = laterals[i - 1] + F.interpolate( + laterals[i], size=prev_shape, **self.upsample_cfg) + + # build outputs + # part 1: from original levels + outs = [ + self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels) + ] + # part 2: add extra levels + if self.num_outs > len(outs): + # use max pool to get more levels on top of outputs + # (e.g., Faster R-CNN, Mask R-CNN) + if not self.add_extra_convs: + for i in range(self.num_outs - used_backbone_levels): + outs.append(F.max_pool2d(outs[-1], 1, stride=2)) + # add conv layers on top of original feature maps (RetinaNet) + else: + if self.add_extra_convs == 'on_input': + extra_source = inputs[self.backbone_end_level - 1] + elif self.add_extra_convs == 'on_lateral': + extra_source = laterals[-1] + elif self.add_extra_convs == 'on_output': + extra_source = outs[-1] + else: + raise NotImplementedError + outs.append(self.fpn_convs[used_backbone_levels](extra_source)) + for i in range(used_backbone_levels + 1, self.num_outs): + if self.relu_before_extra_convs: + outs.append(self.fpn_convs[i](F.relu(outs[-1]))) + else: + outs.append(self.fpn_convs[i](outs[-1])) + return BackboneOutput(intermediate_features=outs) + + +def fpn(intermediate_features_dim, **kwargs): + configuration = { + 'num_outs': 4, + 'start_level': 0, + 'end_level': -1, + 'add_extra_convs': False, + 'relu_before_extra_convs': False, + 'no_norm_on_lateral': False, + 'conv_cfg': None, + 'norm_cfg': None, + 'act_cfg': None, + 'upsample_cfg': None, + 'init_cfg': None + } + + return FPN(in_channels=intermediate_features_dim, + out_channels=intermediate_features_dim[-1], + **configuration) diff --git a/src/netspresso_trainer/models/registry.py b/src/netspresso_trainer/models/registry.py index 9ae83fa22..3fa4e8770 100644 --- a/src/netspresso_trainer/models/registry.py +++ b/src/netspresso_trainer/models/registry.py @@ -8,6 +8,7 @@ from .heads.classification import fc from .heads.detection import faster_rcnn, yolox_head from .heads.segmentation import all_mlp_decoder +from .necks import fpn MODEL_BACKBONE_DICT: Dict[str, Callable[..., nn.Module]] = { 'resnet50': resnet50, @@ -20,6 +21,10 @@ 'mixnet': mixnet, } +MODEL_NECK_DICT: Dict[str, Callable[..., nn.Module]] = { + 'fpn': fpn, +} + MODEL_HEAD_DICT: Dict[str, Callable[..., nn.Module]] = { 'classification': { 'fc': fc, From 1d34398a949081529530e70e34ffaa237dfea4fd Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 20 Nov 2023 11:35:09 +0900 Subject: [PATCH 132/167] Add intermediate_features_dim --- src/netspresso_trainer/models/necks/experimental/fpn.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/netspresso_trainer/models/necks/experimental/fpn.py b/src/netspresso_trainer/models/necks/experimental/fpn.py index e0886c813..16ca0717c 100644 --- a/src/netspresso_trainer/models/necks/experimental/fpn.py +++ b/src/netspresso_trainer/models/necks/experimental/fpn.py @@ -91,6 +91,8 @@ def __init__(self, extra_fpn_conv = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=2, padding=1) self.fpn_convs.append(extra_fpn_conv) + self._intermediate_features_dim = [out_channels for _ in range(num_outs)] + def forward(self, inputs): """Forward function.""" assert len(inputs) == len(self.in_channels) @@ -144,6 +146,10 @@ def forward(self, inputs): else: outs.append(self.fpn_convs[i](outs[-1])) return BackboneOutput(intermediate_features=outs) + + @property + def intermediate_features_dim(self): + return self._intermediate_features_dim def fpn(intermediate_features_dim, **kwargs): From 22a3f5654684df657e653ccd2c187a27addc2888 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 20 Nov 2023 11:41:26 +0900 Subject: [PATCH 133/167] Remove FPN on RCNN --- .../detection/experimental/detection/generalized_rcnn.py | 6 +----- .../models/heads/detection/experimental/faster_rcnn.py | 7 ++----- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/src/netspresso_trainer/models/heads/detection/experimental/detection/generalized_rcnn.py b/src/netspresso_trainer/models/heads/detection/experimental/detection/generalized_rcnn.py index 54095e8a8..70436fcde 100644 --- a/src/netspresso_trainer/models/heads/detection/experimental/detection/generalized_rcnn.py +++ b/src/netspresso_trainer/models/heads/detection/experimental/detection/generalized_rcnn.py @@ -19,19 +19,15 @@ class GeneralizedRCNN(nn.Module): detections / masks from it. """ - def __init__(self, neck:nn.Module, rpn: nn.Module, roi_heads: nn.Module, image_size: Tuple[int, int]) -> None: + def __init__(self, rpn: nn.Module, roi_heads: nn.Module, image_size: Tuple[int, int]) -> None: super().__init__() # _log_api_usage_once(self) - self.neck = neck self.rpn = rpn self.roi_heads = roi_heads self.image_size = image_size def forward(self, features: FXTensorListType) -> DetectionModelOutput: - if self.neck: - features = self.neck(features) - features = {str(k): v for k, v in enumerate(features)} rpn_features = self.rpn(features, self.image_size) roi_features = self.roi_heads(features, rpn_features['boxes'], self.image_size) diff --git a/src/netspresso_trainer/models/heads/detection/experimental/faster_rcnn.py b/src/netspresso_trainer/models/heads/detection/experimental/faster_rcnn.py index 5127d4f20..6b9d46a0f 100644 --- a/src/netspresso_trainer/models/heads/detection/experimental/faster_rcnn.py +++ b/src/netspresso_trainer/models/heads/detection/experimental/faster_rcnn.py @@ -2,7 +2,6 @@ import torch.nn.functional as F from .detection import AnchorGenerator, RPNHead, RegionProposalNetwork, RoIHeads, GeneralizedRCNN, MultiScaleRoIAlign -from .fpn import FPN IMAGE_SIZE = (512, 512) # TODO: Get from configuration @@ -43,8 +42,6 @@ def __init__( ): assert fpn_num_outs == len(anchor_sizes) - neck = FPN(in_channels=intermediate_features_dim, out_channels=intermediate_features_dim[-1], num_outs=fpn_num_outs) - out_channels = intermediate_features_dim[-1] aspect_ratios = (aspect_ratios,) * len(anchor_sizes) @@ -65,7 +62,7 @@ def __init__( score_thresh=rpn_score_thresh, ) - featmap_names = [str(i) for i in range(neck.num_outs)] + featmap_names = [str(i) for i in range(len(intermediate_features_dim))] box_roi_pool = MultiScaleRoIAlign(featmap_names=featmap_names, output_size=roi_output_size, sampling_ratio=roi_sampling_ratio) box_head = TwoMLPHead(out_channels * roi_output_size**2, roi_representation_size) @@ -87,7 +84,7 @@ def __init__( box_detections_per_img, ) - super().__init__(neck, rpn, roi_heads, IMAGE_SIZE) + super().__init__(rpn, roi_heads, IMAGE_SIZE) class TwoMLPHead(nn.Module): From 531ceca37af54716f3c4a3b94ed218f842508dd8 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 20 Nov 2023 11:42:23 +0900 Subject: [PATCH 134/167] Add neck module on TaskModel --- src/netspresso_trainer/models/base.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/netspresso_trainer/models/base.py b/src/netspresso_trainer/models/base.py index 2cb9880c0..a063dae5d 100644 --- a/src/netspresso_trainer/models/base.py +++ b/src/netspresso_trainer/models/base.py @@ -7,7 +7,7 @@ import torch.nn as nn from omegaconf import OmegaConf -from .registry import MODEL_BACKBONE_DICT, MODEL_HEAD_DICT +from .registry import MODEL_BACKBONE_DICT, MODEL_HEAD_DICT, MODEL_NECK_DICT from .utils import BackboneOutput, DetectionModelOutput, ModelOutput, load_from_checkpoint logger = logging.getLogger("netspresso_trainer") @@ -26,13 +26,20 @@ def __init__(self, conf_model, task, backbone_name, head_name, num_classes, mode self.backbone = load_from_checkpoint(self.backbone, model_checkpoint) + intermediate_features_dim = self.backbone.intermediate_features_dim + if getattr(conf_model.architecture, 'neck', None): + neck_name = conf_model.architecture.neck.name + neck_fn: Callable[..., nn.Module] = MODEL_NECK_DICT[neck_name] + self.neck = neck_fn(intermediate_features_dim=self.backbone.intermediate_features_dim) + intermediate_features_dim = self.neck.intermediate_features_dim + head_module = MODEL_HEAD_DICT[self.task][head_name] if task == 'classification': self.head = head_module(num_classes=num_classes, feature_dim=self.backbone.feature_dim) elif task in ['segmentation', 'detection']: img_size = img_size if isinstance(img_size, (int, None)) else tuple(img_size) self.head = head_module(num_classes=num_classes, - intermediate_features_dim=self.backbone.intermediate_features_dim, + intermediate_features_dim=intermediate_features_dim, label_size=img_size) if freeze_backbone: @@ -73,6 +80,8 @@ def __init__(self, conf_model, task, backbone_name, head_name, num_classes, mode def forward(self, x, label_size=None, targets=None): features: BackboneOutput = self.backbone(x) + if self.neck: + features: BackboneOutput = self.neck(features['intermediate_features']) out: ModelOutput = self.head(features['intermediate_features']) return out @@ -84,5 +93,7 @@ def __init__(self, conf_model, task, backbone_name, head_name, num_classes, mode def forward(self, x, label_size=None, targets=None): features: BackboneOutput = self.backbone(x) + if self.neck: + features: BackboneOutput = self.neck(features['intermediate_features']) out: DetectionModelOutput = self.head(features['intermediate_features']) return out From c2b6dcf20aca2a61c94645f2f9c7d1aabbb397ad Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 20 Nov 2023 11:42:52 +0900 Subject: [PATCH 135/167] Modify train_step of two-stage detector --- src/netspresso_trainer/pipelines/detection.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/netspresso_trainer/pipelines/detection.py b/src/netspresso_trainer/pipelines/detection.py index 52f90e2d1..22ceb4a47 100644 --- a/src/netspresso_trainer/pipelines/detection.py +++ b/src/netspresso_trainer/pipelines/detection.py @@ -44,11 +44,11 @@ def train_step(self, batch): # forward to rpn backbone = self.model.backbone + neck = self.model.neck head = self.model.head features = backbone(images)['intermediate_features'] - if head.neck: - features = head.neck(features) + features = neck(features)['intermediate_features'] features = {str(k): v for k, v in enumerate(features)} rpn_features = head.rpn(features, head.image_size) From 9d085b519269a9c2fe80a890d685d1a55c129bc3 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 20 Nov 2023 13:25:33 +0900 Subject: [PATCH 136/167] Move pafpn --- src/netspresso_trainer/models/necks/__init__.py | 3 ++- .../fpn.py => necks/experimental/pafpn.py} | 10 ++++++++-- src/netspresso_trainer/models/registry.py | 3 ++- 3 files changed, 12 insertions(+), 4 deletions(-) rename src/netspresso_trainer/models/{heads/detection/experimental/fpn.py => necks/experimental/pafpn.py} (93%) diff --git a/src/netspresso_trainer/models/necks/__init__.py b/src/netspresso_trainer/models/necks/__init__.py index 81ae6c34b..03954db2c 100644 --- a/src/netspresso_trainer/models/necks/__init__.py +++ b/src/netspresso_trainer/models/necks/__init__.py @@ -1 +1,2 @@ -from .experimental.fpn import fpn \ No newline at end of file +from .experimental.fpn import fpn +from .experimental.pafpn import pafpn \ No newline at end of file diff --git a/src/netspresso_trainer/models/heads/detection/experimental/fpn.py b/src/netspresso_trainer/models/necks/experimental/pafpn.py similarity index 93% rename from src/netspresso_trainer/models/heads/detection/experimental/fpn.py rename to src/netspresso_trainer/models/necks/experimental/pafpn.py index 4567457e2..b309845f5 100644 --- a/src/netspresso_trainer/models/heads/detection/experimental/fpn.py +++ b/src/netspresso_trainer/models/necks/experimental/pafpn.py @@ -1,8 +1,7 @@ import torch import torch.nn as nn -import torch.nn.functional as F -from ....op.custom import ConvLayer, CSPLayer +from ...op.custom import ConvLayer, CSPLayer class PAFPN(nn.Module): @@ -117,3 +116,10 @@ def forward(self, inputs): outputs = (pan_out2, pan_out1, pan_out0) return outputs + +def pafpn(intermediate_features_dim, **kwargs): + configuration = { + 'act_type': 'silu', + } + + return PAFPN(in_channels=intermediate_features_dim, **configuration) diff --git a/src/netspresso_trainer/models/registry.py b/src/netspresso_trainer/models/registry.py index 3fa4e8770..f39698c08 100644 --- a/src/netspresso_trainer/models/registry.py +++ b/src/netspresso_trainer/models/registry.py @@ -8,7 +8,7 @@ from .heads.classification import fc from .heads.detection import faster_rcnn, yolox_head from .heads.segmentation import all_mlp_decoder -from .necks import fpn +from .necks import fpn, pafpn MODEL_BACKBONE_DICT: Dict[str, Callable[..., nn.Module]] = { 'resnet50': resnet50, @@ -23,6 +23,7 @@ MODEL_NECK_DICT: Dict[str, Callable[..., nn.Module]] = { 'fpn': fpn, + 'pafpn': pafpn, } MODEL_HEAD_DICT: Dict[str, Callable[..., nn.Module]] = { From 27ad114906600fe4388a3011824e64d35475d810 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 20 Nov 2023 13:27:10 +0900 Subject: [PATCH 137/167] Set pafpn property and output format --- .../models/necks/experimental/pafpn.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/netspresso_trainer/models/necks/experimental/pafpn.py b/src/netspresso_trainer/models/necks/experimental/pafpn.py index b309845f5..50de72280 100644 --- a/src/netspresso_trainer/models/necks/experimental/pafpn.py +++ b/src/netspresso_trainer/models/necks/experimental/pafpn.py @@ -2,6 +2,7 @@ import torch.nn as nn from ...op.custom import ConvLayer, CSPLayer +from ...utils import BackboneOutput class PAFPN(nn.Module): @@ -85,6 +86,8 @@ def __init__( act_type=act_type, ) + self._intermediate_features_dim = in_channels + def forward(self, inputs): """ Args: @@ -115,7 +118,11 @@ def forward(self, inputs): pan_out0 = self.C3_n4(p_out0) # 1024->1024/32 outputs = (pan_out2, pan_out1, pan_out0) - return outputs + return BackboneOutput(intermediate_features=outputs) + + @property + def intermediate_features_dim(self): + return self._intermediate_features_dim def pafpn(intermediate_features_dim, **kwargs): configuration = { From 551cb5087df725ec4c390e363681d8c5cfa8d0b8 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 20 Nov 2023 13:35:19 +0900 Subject: [PATCH 138/167] Remove PAFPN on YOLOXHead --- .../models/heads/detection/experimental/yolox_head.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/netspresso_trainer/models/heads/detection/experimental/yolox_head.py b/src/netspresso_trainer/models/heads/detection/experimental/yolox_head.py index dfcf6fdcb..7135775b7 100644 --- a/src/netspresso_trainer/models/heads/detection/experimental/yolox_head.py +++ b/src/netspresso_trainer/models/heads/detection/experimental/yolox_head.py @@ -7,7 +7,6 @@ from ....op.custom import ConvLayer from ....utils import ModelOutput -from .fpn import PAFPN class YOLOXHead(nn.Module): @@ -26,8 +25,6 @@ def __init__( self.num_classes = num_classes - self.neck = PAFPN(in_channels=intermediate_features_dim, act_type=act_type) - self.cls_convs = nn.ModuleList() self.reg_convs = nn.ModuleList() self.cls_preds = nn.ModuleList() @@ -117,7 +114,6 @@ def __init__( def forward(self, xin): outputs = [] - xin = self.neck(xin) for k, (cls_conv, reg_conv, x) in enumerate(zip(self.cls_convs, self.reg_convs, xin)): x = self.stems[k](x) From 83b999c104ef9803fdaeab79eb1f233802068428 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 20 Nov 2023 13:55:06 +0900 Subject: [PATCH 139/167] Update yaml config --- config/model/efficientformer/efficientformer-l1-detection.yaml | 2 ++ config/model/yolox/yolox-detection.yaml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/config/model/efficientformer/efficientformer-l1-detection.yaml b/config/model/efficientformer/efficientformer-l1-detection.yaml index d62a461b5..0cb919058 100644 --- a/config/model/efficientformer/efficientformer-l1-detection.yaml +++ b/config/model/efficientformer/efficientformer-l1-detection.yaml @@ -43,6 +43,8 @@ model: num_blocks: 4 hidden_sizes: 448 downsamples: True + neck: + name: fpn head: name: faster_rcnn losses: diff --git a/config/model/yolox/yolox-detection.yaml b/config/model/yolox/yolox-detection.yaml index 49aef68a6..d0e16d492 100644 --- a/config/model/yolox/yolox-detection.yaml +++ b/config/model/yolox/yolox-detection.yaml @@ -13,6 +13,8 @@ model: wid_mul: 0.5 act_type: "silu" stage_params: ~ + neck: + name: pafpn head: name: yolox_head losses: From ed60b92fb132d3712e0844c4bfba31aa396f4f7a Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 20 Nov 2023 14:10:40 +0900 Subject: [PATCH 140/167] Update cfg module --- src/netspresso_trainer/cfg/model.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/netspresso_trainer/cfg/model.py b/src/netspresso_trainer/cfg/model.py index 169efef1a..f56c085e2 100644 --- a/src/netspresso_trainer/cfg/model.py +++ b/src/netspresso_trainer/cfg/model.py @@ -24,6 +24,7 @@ class ArchitectureConfig: full: Optional[Dict[str, Any]] = None backbone: Optional[Dict[str, Any]] = None + neck: Optional[Dict[str, Any]] = None head: Optional[Dict[str, Any]] = None def __post_init__(self): @@ -327,6 +328,7 @@ class DetectionEfficientFormerModelConfig(ModelConfig): task: str = "detection" checkpoint: Optional[Union[Path, str]] = "./weights/efficientformer/efficientformer_l1_1000d.pth" architecture: ArchitectureConfig = field(default_factory=lambda: EfficientFormerArchitectureConfig( + neck={"name": "fpn"}, head={"name": "faster_rcnn"} )) losses: List[Dict[str, Any]] = field(default_factory=lambda: [ From d2a9b49903b71ba220749ce7174e4a8e36751999 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 20 Nov 2023 14:35:12 +0900 Subject: [PATCH 141/167] Add neck module to ruff exclude --- pyproject.toml | 2 ++ src/netspresso_trainer/models/necks/__init__.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ab66ec722..ca0b3de6a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,8 @@ extend-exclude = [ "docs/*.py", "src/netspresso_trainer/models/backbones/core", "src/netspresso_trainer/models/backbones/experimental", + "src/netspresso_trainer/models/necks/core", + "src/netspresso_trainer/models/necks/experimental", "src/netspresso_trainer/models/heads/classification", "src/netspresso_trainer/models/heads/detection", "src/netspresso_trainer/models/heads/segmentation", diff --git a/src/netspresso_trainer/models/necks/__init__.py b/src/netspresso_trainer/models/necks/__init__.py index 03954db2c..dfbeec689 100644 --- a/src/netspresso_trainer/models/necks/__init__.py +++ b/src/netspresso_trainer/models/necks/__init__.py @@ -1,2 +1,2 @@ from .experimental.fpn import fpn -from .experimental.pafpn import pafpn \ No newline at end of file +from .experimental.pafpn import pafpn From fb124c5e4ca07c782131f45d6d864d749355353a Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Mon, 20 Nov 2023 14:35:59 +0900 Subject: [PATCH 142/167] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b8ba940e7..548bdb87a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ - Enable dataset augmentation customizing by `@illian01` in [PR 201](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/201) - Add postprocessor module by `@illian01` in [PR 223](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/223) - Equalize the model backbone configuration format by `@illian01` in [PR 228](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/228) +- Separate FPN and PAFPN as neck module by `@illian01` in [PR 234](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/234) ## Other Changes: From bdb8ebf2ebfe67ffb99aa62e3f4bb8cf8ba6e914 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 22 Nov 2023 13:36:54 +0900 Subject: [PATCH 143/167] Fix default criterion value of classification --- src/netspresso_trainer/cfg/model.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/netspresso_trainer/cfg/model.py b/src/netspresso_trainer/cfg/model.py index 169efef1a..bc534c98f 100644 --- a/src/netspresso_trainer/cfg/model.py +++ b/src/netspresso_trainer/cfg/model.py @@ -306,7 +306,7 @@ class ClassificationEfficientFormerModelConfig(ModelConfig): head={"name": "fc"} )) losses: List[Dict[str, Any]] = field(default_factory=lambda: [ - {"criterion": "label_smoothing_cross_entropy", "smoothing": 0.1, "weight": None} + {"criterion": "cross_entropy", "smoothing": 0.1, "weight": None} ]) @@ -343,7 +343,7 @@ class ClassificationMobileNetV3ModelConfig(ModelConfig): head={"name": "fc"} )) losses: List[Dict[str, Any]] = field(default_factory=lambda: [ - {"criterion": "label_smoothing_cross_entropy", "smoothing": 0.1, "weight": None} + {"criterion": "cross_entropy", "smoothing": 0.1, "weight": None} ]) @@ -367,7 +367,7 @@ class ClassificationMobileViTModelConfig(ModelConfig): head={"name": "fc"} )) losses: List[Dict[str, Any]] = field(default_factory=lambda: [ - {"criterion": "label_smoothing_cross_entropy", "smoothing": 0.1, "weight": None} + {"criterion": "cross_entropy", "smoothing": 0.1, "weight": None} ]) @@ -391,7 +391,7 @@ class ClassificationResNetModelConfig(ModelConfig): head={"name": "fc"} )) losses: List[Dict[str, Any]] = field(default_factory=lambda: [ - {"criterion": "label_smoothing_cross_entropy", "smoothing": 0.1, "weight": None} + {"criterion": "cross_entropy", "smoothing": 0.1, "weight": None} ]) @@ -415,7 +415,7 @@ class ClassificationSegFormerModelConfig(ModelConfig): head={"name": "fc"} )) losses: List[Dict[str, Any]] = field(default_factory=lambda: [ - {"criterion": "label_smoothing_cross_entropy", "smoothing": 0.1, "weight": None} + {"criterion": "cross_entropy", "smoothing": 0.1, "weight": None} ]) @@ -439,6 +439,6 @@ class ClassificationViTModelConfig(ModelConfig): head={"name": "fc"} )) losses: List[Dict[str, Any]] = field(default_factory=lambda: [ - {"criterion": "label_smoothing_cross_entropy", "smoothing": 0.1, "weight": None} + {"criterion": "cross_entropy", "smoothing": 0.1, "weight": None} ]) From c02e75beb0559bf93fbf053a63b1c9274489db98 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 22 Nov 2023 13:53:09 +0900 Subject: [PATCH 144/167] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b8ba940e7..9d86149c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ ## Bug Fixes: - Fix PIDNet model dataclass task field by `@illian01` in [PR 220](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/220) +- Fix default criterion value of classification `@illian01` in [PR 238](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/238) ## Breaking Changes: From 4a5dd31ab1876ccfc617c2554479e4edce89584d Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 22 Nov 2023 14:27:17 +0900 Subject: [PATCH 145/167] Fix model access to compat with distributed env --- src/netspresso_trainer/pipelines/detection.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/netspresso_trainer/pipelines/detection.py b/src/netspresso_trainer/pipelines/detection.py index 52f90e2d1..32bbfc5af 100644 --- a/src/netspresso_trainer/pipelines/detection.py +++ b/src/netspresso_trainer/pipelines/detection.py @@ -33,6 +33,13 @@ def __init__(self, conf, task, model_name, model, devices, train_dataloader, eva model = model.to(device=devices) self.model = model + if conf.distributed: + self.backbone_to_train = model.module.backbone + self.head_to_train = model.module.head + else: + self.backbone_to_train = model.backbone + self.head_to_train = model.head + def train_step(self, batch): self.model.train() images, labels, bboxes = batch['pixel_values'], batch['label'], batch['bbox'] @@ -43,8 +50,8 @@ def train_step(self, batch): self.optimizer.zero_grad() # forward to rpn - backbone = self.model.backbone - head = self.model.head + backbone = self.backbone_to_train + head = self.head_to_train features = backbone(images)['intermediate_features'] if head.neck: @@ -86,7 +93,7 @@ def valid_step(self, batch): out = self.model(images) # Compute loss - head = self.model.head + head = self.head_to_train matched_idxs, roi_head_labels = head.roi_heads.assign_targets_to_proposals(out['boxes'], bboxes, labels) matched_gt_boxes = [bbox[idx] for idx, bbox in zip(matched_idxs, bboxes)] regression_targets = head.roi_heads.box_coder.encode(matched_gt_boxes, out['boxes']) From d850c3dd5c31266876f78a875c6a617b7046daac Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 22 Nov 2023 14:31:22 +0900 Subject: [PATCH 146/167] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b8ba940e7..e4a9db6c6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ ## Bug Fixes: - Fix PIDNet model dataclass task field by `@illian01` in [PR 220](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/220) +- Fix model access of 2-stage detection pipeline to compat with distributed environment by `@illian` in [PR 239](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/239) ## Breaking Changes: From 19c1926f9d7f050312e1b25cbe1f560660e039a4 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Wed, 22 Nov 2023 15:16:05 +0900 Subject: [PATCH 147/167] Add removed neck on merge conflict point --- src/netspresso_trainer/pipelines/detection.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/netspresso_trainer/pipelines/detection.py b/src/netspresso_trainer/pipelines/detection.py index 4e4c12884..b3897fc81 100644 --- a/src/netspresso_trainer/pipelines/detection.py +++ b/src/netspresso_trainer/pipelines/detection.py @@ -35,9 +35,11 @@ def __init__(self, conf, task, model_name, model, devices, train_dataloader, eva if conf.distributed: self.backbone_to_train = model.module.backbone + self.neck_to_train = model.module.neck self.head_to_train = model.module.head else: self.backbone_to_train = model.backbone + self.neck = model.neck self.head_to_train = model.head def train_step(self, batch): @@ -51,6 +53,7 @@ def train_step(self, batch): # forward to rpn backbone = self.backbone_to_train + neck = self.neck_to_train head = self.head_to_train features = backbone(images)['intermediate_features'] From 5049e6fe19b33feae814803121cecd72a70c3b45 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Thu, 23 Nov 2023 12:50:58 +0900 Subject: [PATCH 148/167] Add RandomErasing --- .../dataloaders/augmentation/custom.py | 55 ++++++++++++++++++- .../dataloaders/augmentation/registry.py | 2 + 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/src/netspresso_trainer/dataloaders/augmentation/custom.py b/src/netspresso_trainer/dataloaders/augmentation/custom.py index 4078569cd..ac7e5c054 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/custom.py +++ b/src/netspresso_trainer/dataloaders/augmentation/custom.py @@ -1,11 +1,13 @@ import math import random from collections.abc import Sequence -from typing import Dict, Optional +from typing import Dict, Optional, List, Tuple import numpy as np +from omegaconf import ListConfig import PIL.Image as Image import torch +from torch import Tensor import torchvision.transforms as T import torchvision.transforms.functional as F from torch.nn import functional as F_torch @@ -351,6 +353,57 @@ def __repr__(self): return format_string +class RandomErasing(T.RandomErasing): + + def __init__(self, p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=0, inplace=False): + if isinstance(scale, ListConfig): + scale = tuple(scale) + if isinstance(ratio, ListConfig): + ratio = tuple(ratio) + if isinstance(value, ListConfig): + value = tuple(value) + super().__init__(p, scale, ratio, value, inplace) + + @staticmethod + def get_params( + img, scale: Tuple[float, float], ratio: Tuple[float, float], value: Optional[int] = None + ): + img_w, img_h = img.size + + area = img_h * img_w + + log_ratio = torch.log(torch.tensor(ratio)) + for _ in range(10): + erase_area = area * torch.empty(1).uniform_(scale[0], scale[1]).item() + aspect_ratio = torch.exp(torch.empty(1).uniform_(log_ratio[0], log_ratio[1])).item() + + h = int(round(math.sqrt(erase_area * aspect_ratio))) + w = int(round(math.sqrt(erase_area / aspect_ratio))) + if not (h < img_h and w < img_w): + continue + + if value is None: + v = np.random.randint(255, size=(h, w)).astype('uint8') + v = Image.fromarray(v).convert(img.mode) + else: + v = Image.new(img.mode, (w, h), value) + + i = torch.randint(0, img_h - h + 1, size=(1,)).item() + j = torch.randint(0, img_w - w + 1, size=(1,)).item() + return i, j, v + + # Return original image + return 0, 0, img + + def forward(self, image, mask=None, bbox=None): + if torch.rand(1) < self.p: + x, y, v = self.get_params(image, scale=self.scale, ratio=self.ratio, value=self.value) + image.paste(v, (y, x)) + # TODO: Object-aware + return image, mask, bbox + return image, mask, bbox + + class RandomMixup: """ Based on the RandomMixup implementation of ml_cvnets. diff --git a/src/netspresso_trainer/dataloaders/augmentation/registry.py b/src/netspresso_trainer/dataloaders/augmentation/registry.py index 0251b4de8..63387c232 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/registry.py +++ b/src/netspresso_trainer/dataloaders/augmentation/registry.py @@ -9,6 +9,7 @@ RandomMixup, RandomResizedCrop, RandomVerticalFlip, + RandomErasing, Resize, ) @@ -19,6 +20,7 @@ 'randomresizedcrop': RandomResizedCrop, 'randomhorizontalflip': RandomHorizontalFlip, 'randomverticalflip': RandomVerticalFlip, + 'randomerasing': RandomErasing, 'resize': Resize, 'mixup': RandomMixup, 'cutmix': RandomCutmix From 14868b32968e8624a8886eff58e1bcafce495cb5 Mon Sep 17 00:00:00 2001 From: deepkyu Date: Thu, 23 Nov 2023 16:09:53 +0900 Subject: [PATCH 149/167] #206 add auto-download with aws s3 url --- src/netspresso_trainer/models/utils.py | 35 +++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/src/netspresso_trainer/models/utils.py b/src/netspresso_trainer/models/utils.py index 0307eb6aa..7d7f48367 100644 --- a/src/netspresso_trainer/models/utils.py +++ b/src/netspresso_trainer/models/utils.py @@ -13,6 +13,18 @@ FXTensorType = Union[Tensor, Proxy] FXTensorListType = Union[List[Tensor], List[Proxy]] +MODEL_CHECKPOINT_URL_DICT = { + 'resnet50': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/resnet/resnet50.pth", + 'mobilenetv3_small': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/mobilenetv3/mobilenet_v3_small.pth", + 'segformer': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/segformer/segformer.pth", + 'mobilevit': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/mobilevit/mobilevit_s.pth", + 'vit': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/vit/vit-tiny.pth", + 'efficientformer': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/efficientformer/efficientformer_l1_1000d.pth", + 'mixnet_s': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/mixnet/mixnet_s.pth", + 'mixnet_m': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/mixnet/mixnet_m.pth", + 'mixnet_l': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/mixnet/mixnet_l.pth", +} + class BackboneOutput(TypedDict): intermediate_features: Optional[FXTensorListType] @@ -43,8 +55,29 @@ class PIDNetModelOutput(ModelOutput): extra_d: Optional[FXTensorType] -def load_from_checkpoint(model: nn.Module, model_checkpoint: Optional[Union[str, Path]]) -> nn.Module: +def download_model_checkpoint(model_checkpoint: Union[str, Path], model_name: str) -> Path: + checkpoint_url = MODEL_CHECKPOINT_URL_DICT[model_name] + model_checkpoint = Path(model_checkpoint) + model_checkpoint.parent.mkdir(parents=True, exist_ok=True) + # Safer switch: only extension, user can use the custom name for checkpoint file + model_checkpoint = model_checkpoint.with_suffix(Path(checkpoint_url).suffix) + if not model_checkpoint.exists(): + torch.hub.download_url_to_file(checkpoint_url, model_checkpoint) + + return model_checkpoint + + +def load_from_checkpoint( + model: nn.Module, + model_checkpoint: Optional[Union[str, Path]] +) -> nn.Module: if model_checkpoint is not None: + if not Path(model_checkpoint).exists(): + model_name = Path(model_checkpoint).stem + assert model_name in MODEL_CHECKPOINT_URL_DICT, \ + f"model_name {model_name} in path {model_checkpoint} is not valid name!" + model_checkpoint = download_model_checkpoint(model_checkpoint, model_name) + model_state_dict = torch.load(model_checkpoint, map_location='cpu') missing_keys, unexpected_keys = model.load_state_dict(model_state_dict, strict=False) From f5457a965e3f813abe7d1ff6b140a075c01322fa Mon Sep 17 00:00:00 2001 From: deepkyu Date: Thu, 23 Nov 2023 16:22:43 +0900 Subject: [PATCH 150/167] #206 hotfix: update url dict key --- src/netspresso_trainer/models/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/netspresso_trainer/models/utils.py b/src/netspresso_trainer/models/utils.py index 7d7f48367..aa34c6a5c 100644 --- a/src/netspresso_trainer/models/utils.py +++ b/src/netspresso_trainer/models/utils.py @@ -15,7 +15,7 @@ MODEL_CHECKPOINT_URL_DICT = { 'resnet50': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/resnet/resnet50.pth", - 'mobilenetv3_small': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/mobilenetv3/mobilenet_v3_small.pth", + 'mobilenet_v3_small': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/mobilenetv3/mobilenet_v3_small.pth", 'segformer': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/segformer/segformer.pth", 'mobilevit': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/mobilevit/mobilevit_s.pth", 'vit': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/vit/vit-tiny.pth", From 91d48e4d00c2c6322a9290c8d0ae114219b38292 Mon Sep 17 00:00:00 2001 From: deepkyu Date: Thu, 23 Nov 2023 18:18:25 +0900 Subject: [PATCH 151/167] #233 hotfix: config field name --- src/netspresso_trainer/cfg/model.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/netspresso_trainer/cfg/model.py b/src/netspresso_trainer/cfg/model.py index ed300746b..e49eca864 100644 --- a/src/netspresso_trainer/cfg/model.py +++ b/src/netspresso_trainer/cfg/model.py @@ -20,6 +20,7 @@ "ClassificationViTModelConfig", ] + @dataclass class ArchitectureConfig: full: Optional[Dict[str, Any]] = None @@ -30,6 +31,7 @@ class ArchitectureConfig: def __post_init__(self): assert bool(self.full) != bool(self.backbone), "Only one of full or backbone should be given." + @dataclass class ModelConfig: task: str = MISSING @@ -307,7 +309,7 @@ class ClassificationEfficientFormerModelConfig(ModelConfig): head={"name": "fc"} )) losses: List[Dict[str, Any]] = field(default_factory=lambda: [ - {"criterion": "cross_entropy", "smoothing": 0.1, "weight": None} + {"criterion": "cross_entropy", "label_smoothing": 0.1, "weight": None} ]) @@ -345,7 +347,7 @@ class ClassificationMobileNetV3ModelConfig(ModelConfig): head={"name": "fc"} )) losses: List[Dict[str, Any]] = field(default_factory=lambda: [ - {"criterion": "cross_entropy", "smoothing": 0.1, "weight": None} + {"criterion": "cross_entropy", "label_smoothing": 0.1, "weight": None} ]) @@ -369,7 +371,7 @@ class ClassificationMobileViTModelConfig(ModelConfig): head={"name": "fc"} )) losses: List[Dict[str, Any]] = field(default_factory=lambda: [ - {"criterion": "cross_entropy", "smoothing": 0.1, "weight": None} + {"criterion": "cross_entropy", "label_smoothing": 0.1, "weight": None} ]) @@ -393,7 +395,7 @@ class ClassificationResNetModelConfig(ModelConfig): head={"name": "fc"} )) losses: List[Dict[str, Any]] = field(default_factory=lambda: [ - {"criterion": "cross_entropy", "smoothing": 0.1, "weight": None} + {"criterion": "cross_entropy", "label_smoothing": 0.1, "weight": None} ]) @@ -417,7 +419,7 @@ class ClassificationSegFormerModelConfig(ModelConfig): head={"name": "fc"} )) losses: List[Dict[str, Any]] = field(default_factory=lambda: [ - {"criterion": "cross_entropy", "smoothing": 0.1, "weight": None} + {"criterion": "cross_entropy", "label_smoothing": 0.1, "weight": None} ]) @@ -441,6 +443,5 @@ class ClassificationViTModelConfig(ModelConfig): head={"name": "fc"} )) losses: List[Dict[str, Any]] = field(default_factory=lambda: [ - {"criterion": "cross_entropy", "smoothing": 0.1, "weight": None} + {"criterion": "cross_entropy", "label_smoothing": 0.1, "weight": None} ]) - From 38199cfe2953c7411af711cfbbc0bed5a60b9e52 Mon Sep 17 00:00:00 2001 From: deepkyu Date: Thu, 23 Nov 2023 18:25:32 +0900 Subject: [PATCH 152/167] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 74c20025c..f5bed333d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -129,6 +129,7 @@ This change is applied at [PR 151](https://github.com/Nota-NetsPresso/netspresso - Initialize loss and metric at same time with optimizer and lr schedulers by `@deepkyu` in [PR 138](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/138) - Hotfix the error which shows 0 for validation loss and metrics by fixing the variable name by `@deepkyu` in [PR 140](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/140) - Add missing field, `save_optimizer_state`, in `logging.yaml` by `@illian01` in [PR 149](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/149) +- Hotfix for pythonic config name (classification loss) by `@deepkyu` in [PR 242](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/242) ## Breaking Changes: From e5679ff03a02725bce8b943032e4f4e360dc8f45 Mon Sep 17 00:00:00 2001 From: deepkyu Date: Fri, 24 Nov 2023 13:56:26 +0900 Subject: [PATCH 153/167] Merge remote-tracking branch 'public/dev' --- CHANGELOG.md | 1 + src/netspresso_trainer/cfg/model.py | 15 ++++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 74c20025c..f5bed333d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -129,6 +129,7 @@ This change is applied at [PR 151](https://github.com/Nota-NetsPresso/netspresso - Initialize loss and metric at same time with optimizer and lr schedulers by `@deepkyu` in [PR 138](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/138) - Hotfix the error which shows 0 for validation loss and metrics by fixing the variable name by `@deepkyu` in [PR 140](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/140) - Add missing field, `save_optimizer_state`, in `logging.yaml` by `@illian01` in [PR 149](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/149) +- Hotfix for pythonic config name (classification loss) by `@deepkyu` in [PR 242](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/242) ## Breaking Changes: diff --git a/src/netspresso_trainer/cfg/model.py b/src/netspresso_trainer/cfg/model.py index ed300746b..e49eca864 100644 --- a/src/netspresso_trainer/cfg/model.py +++ b/src/netspresso_trainer/cfg/model.py @@ -20,6 +20,7 @@ "ClassificationViTModelConfig", ] + @dataclass class ArchitectureConfig: full: Optional[Dict[str, Any]] = None @@ -30,6 +31,7 @@ class ArchitectureConfig: def __post_init__(self): assert bool(self.full) != bool(self.backbone), "Only one of full or backbone should be given." + @dataclass class ModelConfig: task: str = MISSING @@ -307,7 +309,7 @@ class ClassificationEfficientFormerModelConfig(ModelConfig): head={"name": "fc"} )) losses: List[Dict[str, Any]] = field(default_factory=lambda: [ - {"criterion": "cross_entropy", "smoothing": 0.1, "weight": None} + {"criterion": "cross_entropy", "label_smoothing": 0.1, "weight": None} ]) @@ -345,7 +347,7 @@ class ClassificationMobileNetV3ModelConfig(ModelConfig): head={"name": "fc"} )) losses: List[Dict[str, Any]] = field(default_factory=lambda: [ - {"criterion": "cross_entropy", "smoothing": 0.1, "weight": None} + {"criterion": "cross_entropy", "label_smoothing": 0.1, "weight": None} ]) @@ -369,7 +371,7 @@ class ClassificationMobileViTModelConfig(ModelConfig): head={"name": "fc"} )) losses: List[Dict[str, Any]] = field(default_factory=lambda: [ - {"criterion": "cross_entropy", "smoothing": 0.1, "weight": None} + {"criterion": "cross_entropy", "label_smoothing": 0.1, "weight": None} ]) @@ -393,7 +395,7 @@ class ClassificationResNetModelConfig(ModelConfig): head={"name": "fc"} )) losses: List[Dict[str, Any]] = field(default_factory=lambda: [ - {"criterion": "cross_entropy", "smoothing": 0.1, "weight": None} + {"criterion": "cross_entropy", "label_smoothing": 0.1, "weight": None} ]) @@ -417,7 +419,7 @@ class ClassificationSegFormerModelConfig(ModelConfig): head={"name": "fc"} )) losses: List[Dict[str, Any]] = field(default_factory=lambda: [ - {"criterion": "cross_entropy", "smoothing": 0.1, "weight": None} + {"criterion": "cross_entropy", "label_smoothing": 0.1, "weight": None} ]) @@ -441,6 +443,5 @@ class ClassificationViTModelConfig(ModelConfig): head={"name": "fc"} )) losses: List[Dict[str, Any]] = field(default_factory=lambda: [ - {"criterion": "cross_entropy", "smoothing": 0.1, "weight": None} + {"criterion": "cross_entropy", "label_smoothing": 0.1, "weight": None} ]) - From 68e8cc1a85cc326b5fa4e972c30fc1c77cbaf503 Mon Sep 17 00:00:00 2001 From: deepkyu Date: Fri, 24 Nov 2023 13:57:10 +0900 Subject: [PATCH 154/167] #241 add name in model configuration --- .../model/efficientformer/efficientformer-l1-classification.yaml | 1 + config/model/efficientformer/efficientformer-l1-detection.yaml | 1 + .../model/efficientformer/efficientformer-l1-segmentation.yaml | 1 + config/model/mixnet/mixnet-l-classification.yaml | 1 + config/model/mixnet/mixnet-l-segmentation.yaml | 1 + config/model/mixnet/mixnet-m-classification.yaml | 1 + config/model/mixnet/mixnet-m-segmentation.yaml | 1 + config/model/mixnet/mixnet-s-classification.yaml | 1 + config/model/mixnet/mixnet-s-segmentation.yaml | 1 + config/model/mobilenetv3/mobilenetv3-small-classification.yaml | 1 + config/model/mobilenetv3/mobilenetv3-small-segmentation.yaml | 1 + config/model/mobilevit/mobilevit-s-classification.yaml | 1 + config/model/pidnet/pidnet-s-segmentation.yaml | 1 + config/model/resnet/resnet50-classification.yaml | 1 + config/model/resnet/resnet50-segmentation.yaml | 1 + config/model/segformer/segformer-classification.yaml | 1 + config/model/segformer/segformer-segmentation.yaml | 1 + config/model/vit/vit-classification.yaml | 1 + 18 files changed, 18 insertions(+) diff --git a/config/model/efficientformer/efficientformer-l1-classification.yaml b/config/model/efficientformer/efficientformer-l1-classification.yaml index d1eca5c1a..b7f51e530 100644 --- a/config/model/efficientformer/efficientformer-l1-classification.yaml +++ b/config/model/efficientformer/efficientformer-l1-classification.yaml @@ -1,5 +1,6 @@ model: task: classification + name: efficientformer_l1 checkpoint: ./weights/efficientformer/efficientformer_l1_1000d.pth fx_model_checkpoint: ~ resume_optimizer_checkpoint: ~ diff --git a/config/model/efficientformer/efficientformer-l1-detection.yaml b/config/model/efficientformer/efficientformer-l1-detection.yaml index 0cb919058..9a3b339f8 100644 --- a/config/model/efficientformer/efficientformer-l1-detection.yaml +++ b/config/model/efficientformer/efficientformer-l1-detection.yaml @@ -1,5 +1,6 @@ model: task: detection + name: efficientformer_l1 checkpoint: ./weights/efficientformer/efficientformer_l1_1000d.pth fx_model_checkpoint: ~ resume_optimizer_checkpoint: ~ diff --git a/config/model/efficientformer/efficientformer-l1-segmentation.yaml b/config/model/efficientformer/efficientformer-l1-segmentation.yaml index cb5e8c729..b28718f94 100644 --- a/config/model/efficientformer/efficientformer-l1-segmentation.yaml +++ b/config/model/efficientformer/efficientformer-l1-segmentation.yaml @@ -1,5 +1,6 @@ model: task: segmentation + name: efficientformer_l1 checkpoint: ./weights/efficientformer/efficientformer_l1_1000d.pth fx_model_checkpoint: ~ resume_optimizer_checkpoint: ~ diff --git a/config/model/mixnet/mixnet-l-classification.yaml b/config/model/mixnet/mixnet-l-classification.yaml index 2ebc0e500..80aed0f48 100644 --- a/config/model/mixnet/mixnet-l-classification.yaml +++ b/config/model/mixnet/mixnet-l-classification.yaml @@ -1,5 +1,6 @@ model: task: classification + name: mixnet_l checkpoint: ./weights/mixnet/mixnet_l.pth fx_model_checkpoint: ~ resume_optimizer_checkpoint: ~ diff --git a/config/model/mixnet/mixnet-l-segmentation.yaml b/config/model/mixnet/mixnet-l-segmentation.yaml index 8d01b59f8..623e4675c 100644 --- a/config/model/mixnet/mixnet-l-segmentation.yaml +++ b/config/model/mixnet/mixnet-l-segmentation.yaml @@ -1,5 +1,6 @@ model: task: segmentation + name: mixnet_l checkpoint: ./weights/mixnet/mixnet_l.pth fx_model_checkpoint: ~ resume_optimizer_checkpoint: ~ diff --git a/config/model/mixnet/mixnet-m-classification.yaml b/config/model/mixnet/mixnet-m-classification.yaml index 9f9e495d2..c41088ef1 100644 --- a/config/model/mixnet/mixnet-m-classification.yaml +++ b/config/model/mixnet/mixnet-m-classification.yaml @@ -1,5 +1,6 @@ model: task: classification + name: mixnet_m checkpoint: ./weights/mixnet/mixnet_m.pth fx_model_checkpoint: ~ resume_optimizer_checkpoint: ~ diff --git a/config/model/mixnet/mixnet-m-segmentation.yaml b/config/model/mixnet/mixnet-m-segmentation.yaml index adad16bd6..affd2b9a9 100644 --- a/config/model/mixnet/mixnet-m-segmentation.yaml +++ b/config/model/mixnet/mixnet-m-segmentation.yaml @@ -1,5 +1,6 @@ model: task: segmentation + name: mixnet_m checkpoint: ./weights/mixnet/mixnet_m.pth fx_model_checkpoint: ~ resume_optimizer_checkpoint: ~ diff --git a/config/model/mixnet/mixnet-s-classification.yaml b/config/model/mixnet/mixnet-s-classification.yaml index 99204f91d..e4c54cc00 100644 --- a/config/model/mixnet/mixnet-s-classification.yaml +++ b/config/model/mixnet/mixnet-s-classification.yaml @@ -1,5 +1,6 @@ model: task: classification + name: mixnet_s checkpoint: ./weights/mixnet/mixnet_s.pth fx_model_checkpoint: ~ resume_optimizer_checkpoint: ~ diff --git a/config/model/mixnet/mixnet-s-segmentation.yaml b/config/model/mixnet/mixnet-s-segmentation.yaml index 506a19ae4..dd8cdeb9f 100644 --- a/config/model/mixnet/mixnet-s-segmentation.yaml +++ b/config/model/mixnet/mixnet-s-segmentation.yaml @@ -1,5 +1,6 @@ model: task: segmentation + name: mixnet_s checkpoint: ./weights/mixnet/mixnet_s.pth fx_model_checkpoint: ~ resume_optimizer_checkpoint: ~ diff --git a/config/model/mobilenetv3/mobilenetv3-small-classification.yaml b/config/model/mobilenetv3/mobilenetv3-small-classification.yaml index a58793c54..aa623aceb 100644 --- a/config/model/mobilenetv3/mobilenetv3-small-classification.yaml +++ b/config/model/mobilenetv3/mobilenetv3-small-classification.yaml @@ -1,5 +1,6 @@ model: task: classification + name: mobilenet_v3_small checkpoint: ./weights/mobilenetv3/mobilenet_v3_small.pth fx_model_checkpoint: ~ resume_optimizer_checkpoint: ~ diff --git a/config/model/mobilenetv3/mobilenetv3-small-segmentation.yaml b/config/model/mobilenetv3/mobilenetv3-small-segmentation.yaml index 6a9f1cdc5..be42afce3 100644 --- a/config/model/mobilenetv3/mobilenetv3-small-segmentation.yaml +++ b/config/model/mobilenetv3/mobilenetv3-small-segmentation.yaml @@ -1,5 +1,6 @@ model: task: segmentation + name: mobilenet_v3_small checkpoint: ./weights/mobilenetv3/mobilenet_v3_small.pth fx_model_checkpoint: ~ resume_optimizer_checkpoint: ~ diff --git a/config/model/mobilevit/mobilevit-s-classification.yaml b/config/model/mobilevit/mobilevit-s-classification.yaml index 5dc91c29f..6e21b48c2 100644 --- a/config/model/mobilevit/mobilevit-s-classification.yaml +++ b/config/model/mobilevit/mobilevit-s-classification.yaml @@ -1,5 +1,6 @@ model: task: classification + name: mobilevit_s checkpoint: ./weights/mobilevit/mobilevit_s.pth fx_model_checkpoint: ~ resume_optimizer_checkpoint: ~ diff --git a/config/model/pidnet/pidnet-s-segmentation.yaml b/config/model/pidnet/pidnet-s-segmentation.yaml index 52223fcfb..2cbaf1791 100644 --- a/config/model/pidnet/pidnet-s-segmentation.yaml +++ b/config/model/pidnet/pidnet-s-segmentation.yaml @@ -1,5 +1,6 @@ model: task: segmentation + name: pidnet_s checkpoint: ./weights/pidnet/pidnet_s.pth fx_model_checkpoint: ~ resume_optimizer_checkpoint: ~ diff --git a/config/model/resnet/resnet50-classification.yaml b/config/model/resnet/resnet50-classification.yaml index 5a2a1e627..acc715022 100644 --- a/config/model/resnet/resnet50-classification.yaml +++ b/config/model/resnet/resnet50-classification.yaml @@ -1,5 +1,6 @@ model: task: classification + name: resnet50 checkpoint: ./weights/resnet/resnet50.pth fx_model_checkpoint: ~ resume_optimizer_checkpoint: ~ diff --git a/config/model/resnet/resnet50-segmentation.yaml b/config/model/resnet/resnet50-segmentation.yaml index 7b6d364f9..5baf10f28 100644 --- a/config/model/resnet/resnet50-segmentation.yaml +++ b/config/model/resnet/resnet50-segmentation.yaml @@ -1,5 +1,6 @@ model: task: segmentation + name: resnet50 checkpoint: ./weights/resnet/resnet50.pth fx_model_checkpoint: ~ resume_optimizer_checkpoint: ~ diff --git a/config/model/segformer/segformer-classification.yaml b/config/model/segformer/segformer-classification.yaml index 75469a4cd..72eea4840 100644 --- a/config/model/segformer/segformer-classification.yaml +++ b/config/model/segformer/segformer-classification.yaml @@ -1,5 +1,6 @@ model: task: classification + name: segformer checkpoint: ./weights/segformer/segformer.pth fx_model_checkpoint: ~ resume_optimizer_checkpoint: ~ diff --git a/config/model/segformer/segformer-segmentation.yaml b/config/model/segformer/segformer-segmentation.yaml index c739d8f56..589d31acc 100644 --- a/config/model/segformer/segformer-segmentation.yaml +++ b/config/model/segformer/segformer-segmentation.yaml @@ -1,5 +1,6 @@ model: task: segmentation + name: segformer checkpoint: ./weights/segformer/segformer.pth fx_model_checkpoint: ~ resume_optimizer_checkpoint: ~ diff --git a/config/model/vit/vit-classification.yaml b/config/model/vit/vit-classification.yaml index 29b2bf84e..5b0e063fc 100644 --- a/config/model/vit/vit-classification.yaml +++ b/config/model/vit/vit-classification.yaml @@ -1,5 +1,6 @@ model: task: classification + name: vit_tiny checkpoint: ./weights/vit/vit-tiny.pth fx_model_checkpoint: ~ resume_optimizer_checkpoint: ~ From 6da6b211ca566b3ca28b518ce7fef85e3a22f062 Mon Sep 17 00:00:00 2001 From: deepkyu Date: Fri, 24 Nov 2023 13:57:58 +0900 Subject: [PATCH 155/167] #241 add name field --- config/model/yolox/yolox-detection.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/config/model/yolox/yolox-detection.yaml b/config/model/yolox/yolox-detection.yaml index d0e16d492..67137cfac 100644 --- a/config/model/yolox/yolox-detection.yaml +++ b/config/model/yolox/yolox-detection.yaml @@ -1,5 +1,6 @@ model: task: detection + name: yolox_s checkpoint: ./weights/yolox/yolox_s.pth fx_model_checkpoint: ~ resume_optimizer_checkpoint: ~ From baab9c0e7d60c51e338165e7ce4ffc14c0b35ff6 Mon Sep 17 00:00:00 2001 From: deepkyu Date: Fri, 24 Nov 2023 14:06:06 +0900 Subject: [PATCH 156/167] #241 update model name in cfg --- src/netspresso_trainer/cfg/model.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/netspresso_trainer/cfg/model.py b/src/netspresso_trainer/cfg/model.py index e49eca864..391f6bf46 100644 --- a/src/netspresso_trainer/cfg/model.py +++ b/src/netspresso_trainer/cfg/model.py @@ -35,6 +35,7 @@ def __post_init__(self): @dataclass class ModelConfig: task: str = MISSING + name: str = MISSING checkpoint: Optional[Union[Path, str]] = None fx_model_checkpoint: Optional[Union[Path, str]] = None resume_optimizer_checkpoint: Optional[Union[Path, str]] = None @@ -304,6 +305,7 @@ class ViTArchitectureConfig(ArchitectureConfig): @dataclass class ClassificationEfficientFormerModelConfig(ModelConfig): task: str = "classification" + name: str = "efficientformer_l1" checkpoint: Optional[Union[Path, str]] = "./weights/efficientformer/efficientformer_l1_1000d.pth" architecture: ArchitectureConfig = field(default_factory=lambda: EfficientFormerArchitectureConfig( head={"name": "fc"} @@ -316,6 +318,7 @@ class ClassificationEfficientFormerModelConfig(ModelConfig): @dataclass class SegmentationEfficientFormerModelConfig(ModelConfig): task: str = "segmentation" + name: str = "efficientformer_l1" checkpoint: Optional[Union[Path, str]] = "./weights/efficientformer/efficientformer_l1_1000d.pth" architecture: ArchitectureConfig = field(default_factory=lambda: EfficientFormerArchitectureConfig( head={"name": "all_mlp_decoder"} @@ -328,6 +331,7 @@ class SegmentationEfficientFormerModelConfig(ModelConfig): @dataclass class DetectionEfficientFormerModelConfig(ModelConfig): task: str = "detection" + name: str = "efficientformer_l1" checkpoint: Optional[Union[Path, str]] = "./weights/efficientformer/efficientformer_l1_1000d.pth" architecture: ArchitectureConfig = field(default_factory=lambda: EfficientFormerArchitectureConfig( neck={"name": "fpn"}, @@ -342,6 +346,7 @@ class DetectionEfficientFormerModelConfig(ModelConfig): @dataclass class ClassificationMobileNetV3ModelConfig(ModelConfig): task: str = "classification" + name: str = "mobilenet_v3_small" checkpoint: Optional[Union[Path, str]] = "./weights/mobilenetv3/mobilenet_v3_small.pth" architecture: ArchitectureConfig = field(default_factory=lambda: MobileNetV3ArchitectureConfig( head={"name": "fc"} @@ -354,6 +359,7 @@ class ClassificationMobileNetV3ModelConfig(ModelConfig): @dataclass class SegmentationMobileNetV3ModelConfig(ModelConfig): task: str = "segmentation" + name: str = "mobilenet_v3_small" checkpoint: Optional[Union[Path, str]] = "./weights/mobilenetv3/mobilenet_v3_small.pth" architecture: ArchitectureConfig = field(default_factory=lambda: MobileNetV3ArchitectureConfig( head={"name": "all_mlp_decoder"} @@ -366,6 +372,7 @@ class SegmentationMobileNetV3ModelConfig(ModelConfig): @dataclass class ClassificationMobileViTModelConfig(ModelConfig): task: str = "classification" + name: str = "mobilevit_s" checkpoint: Optional[Union[Path, str]] = "./weights/mobilevit/mobilevit_s.pth" architecture: ArchitectureConfig = field(default_factory=lambda: MobileViTArchitectureConfig( head={"name": "fc"} @@ -378,6 +385,7 @@ class ClassificationMobileViTModelConfig(ModelConfig): @dataclass class PIDNetModelConfig(ModelConfig): task: str = "segmentation" + name: str = "pidnet_s" checkpoint: Optional[Union[Path, str]] = "./weights/pidnet/pidnet_s.pth" architecture: ArchitectureConfig = field(default_factory=lambda: PIDNetArchitectureConfig()) losses: List[Dict[str, Any]] = field(default_factory=lambda: [ @@ -390,6 +398,7 @@ class PIDNetModelConfig(ModelConfig): @dataclass class ClassificationResNetModelConfig(ModelConfig): task: str = "classification" + name: str = "resnet50" checkpoint: Optional[Union[Path, str]] = "./weights/resnet/resnet50.pth" architecture: ArchitectureConfig = field(default_factory=lambda: ResNetArchitectureConfig( head={"name": "fc"} @@ -402,6 +411,7 @@ class ClassificationResNetModelConfig(ModelConfig): @dataclass class SegmentationResNetModelConfig(ModelConfig): task: str = "segmentation" + name: str = "resnet50" checkpoint: Optional[Union[Path, str]] = "./weights/resnet/resnet50.pth" architecture: ArchitectureConfig = field(default_factory=lambda: ResNetArchitectureConfig( head={"name": "all_mlp_decoder"} @@ -414,6 +424,7 @@ class SegmentationResNetModelConfig(ModelConfig): @dataclass class ClassificationSegFormerModelConfig(ModelConfig): task: str = "classification" + name: str = "segformer" checkpoint: Optional[Union[Path, str]] = "./weights/segformer/segformer.pth" architecture: ArchitectureConfig = field(default_factory=lambda: SegFormerArchitectureConfig( head={"name": "fc"} @@ -426,6 +437,7 @@ class ClassificationSegFormerModelConfig(ModelConfig): @dataclass class SegmentationSegFormerModelConfig(ModelConfig): task: str = "segmentation" + name: str = "segformer" checkpoint: Optional[Union[Path, str]] = "./weights/segformer/segformer.pth" architecture: ArchitectureConfig = field(default_factory=lambda: SegFormerArchitectureConfig( head={"name": "all_mlp_decoder"} @@ -438,6 +450,7 @@ class SegmentationSegFormerModelConfig(ModelConfig): @dataclass class ClassificationViTModelConfig(ModelConfig): task: str = "classification" + name: str = "vit_tiny" checkpoint: Optional[Union[Path, str]] = "./weights/vit/vit-tiny.pth" architecture: ArchitectureConfig = field(default_factory=lambda: ViTArchitectureConfig( head={"name": "fc"} From 9c77603b54063b58bceb450fd7f8779702bf5d0b Mon Sep 17 00:00:00 2001 From: deepkyu Date: Fri, 24 Nov 2023 14:28:08 +0900 Subject: [PATCH 157/167] #241 update pythonic cfg --- src/netspresso_trainer/cfg/__init__.py | 8 + src/netspresso_trainer/cfg/model.py | 301 +++++++++++++++++++++++++ 2 files changed, 309 insertions(+) diff --git a/src/netspresso_trainer/cfg/__init__.py b/src/netspresso_trainer/cfg/__init__.py index 3c13d8ef9..4315dab43 100644 --- a/src/netspresso_trainer/cfg/__init__.py +++ b/src/netspresso_trainer/cfg/__init__.py @@ -38,15 +38,22 @@ from .logging import LoggingConfig from .model import ( ClassificationEfficientFormerModelConfig, + ClassificationMixNetLargeModelConfig, + ClassificationMixNetMediumModelConfig, + ClassificationMixNetSmallModelConfig, ClassificationMobileNetV3ModelConfig, ClassificationMobileViTModelConfig, ClassificationResNetModelConfig, ClassificationSegFormerModelConfig, ClassificationViTModelConfig, DetectionEfficientFormerModelConfig, + DetectionYoloXModelConfig, ModelConfig, PIDNetModelConfig, SegmentationEfficientFormerModelConfig, + SegmentationMixNetLargeModelConfig, + SegmentationMixNetMediumModelConfig, + SegmentationMixNetSmallModelConfig, SegmentationMobileNetV3ModelConfig, SegmentationResNetModelConfig, SegmentationSegFormerModelConfig, @@ -65,6 +72,7 @@ 'detection': DetectionScheduleConfig } + @dataclass class TrainerConfig: task: str = field(default=MISSING, metadata={"omegaconf_ignore": True}) diff --git a/src/netspresso_trainer/cfg/model.py b/src/netspresso_trainer/cfg/model.py index 391f6bf46..98a220ae3 100644 --- a/src/netspresso_trainer/cfg/model.py +++ b/src/netspresso_trainer/cfg/model.py @@ -18,6 +18,13 @@ "ClassificationSegFormerModelConfig", "SegmentationSegFormerModelConfig", "ClassificationViTModelConfig", + "DetectionYoloXModelConfig", + "ClassificationMixNetSmallModelConfig", + "ClassificationMixNetMediumModelConfig", + "ClassificationMixNetLargeModelConfig", + "SegmentationMixNetSmallModelConfig", + "SegmentationMixNetMediumModelConfig", + "SegmentationMixNetLargeModelConfig", ] @@ -302,6 +309,208 @@ class ViTArchitectureConfig(ArchitectureConfig): }) +@dataclass +class MixNetSmallArchitectureConfig(ArchitectureConfig): + backbone: Dict[str, Any] = field(default_factory=lambda: { + "name": "mixnet", + "params": { + "stem_planes": 16, + "width_multi": 1.0, + "depth_multi": 1.0, + "dropout_rate": 0., + }, + "stage_params": [ + { + "expand_ratio": [1, 6, 3], + "out_channels": [16, 24, 24], + "num_blocks": [1, 1, 1], + "kernel_sizes": [[3], [3], [3]], + "exp_kernel_sizes": [[1], [1, 1], [1, 1]], + "poi_kernel_sizes": [[1], [1, 1], [1, 1]], + "stride": [1, 2, 1], + "dilation": [1, 1, 1], + "act_type": ["relu", "relu", "relu"], + "se_reduction_ratio": [None, None, None], + }, + { + "expand_ratio": [6, 6], + "out_channels": [40, 40], + "num_blocks": [1, 3], + "kernel_sizes": [[3, 5, 7], [3, 5]], + "exp_kernel_sizes": [[1], [1, 1]], + "poi_kernel_sizes": [[1], [1, 1]], + "stride": [2, 1], + "dilation": [1, 1], + "act_type": ["swish", "swish"], + "se_reduction_ratio": [2, 2], + }, + { + "expand_ratio": [6, 6, 6, 3], + "out_channels": [80, 80, 120, 120], + "num_blocks": [1, 2, 1, 2], + "kernel_sizes": [[3, 5, 7], [3, 5], [3, 5, 7], [3, 5, 7, 9]], + "exp_kernel_sizes": [[1], [1], [1, 1], [1, 1]], + "poi_kernel_sizes": [[1, 1], [1, 1], [1, 1], [1, 1]], + "stride": [2, 1, 1, 1], + "dilation": [1, 1, 1, 1], + "act_type": ["swish", "swish", "swish", "swish"], + "se_reduction_ratio": [4, 4, 2, 2], + }, + { + "expand_ratio": [6, 6], + "out_channels": [200, 200], + "num_blocks": [1, 2], + "kernel_sizes": [[3, 5, 7, 9, 11], [3, 5, 7, 9]], + "exp_kernel_sizes": [[1], [1]], + "poi_kernel_sizes": [[1], [1, 1]], + "stride": [2, 1], + "dilation": [1, 1], + "act_type": ["swish", "swish"], + "se_reduction_ratio": [2, 2], + }, + ], + }) + + +@dataclass +class MixNetMediumArchitectureConfig(ArchitectureConfig): + backbone: Dict[str, Any] = field(default_factory=lambda: { + "name": "mixnet", + "params": { + "stem_planes": 24, + "width_multi": 1.0, + "depth_multi": 1.0, + "dropout_rate": 0., + }, + "stage_params": [ + { + "expand_ratio": [1, 6, 3], + "out_channels": [24, 32, 32], + "num_blocks": [1, 1, 1], + "kernel_sizes": [[3], [3, 5, 7], [3]], + "exp_kernel_sizes": [[1], [1, 1], [1, 1]], + "poi_kernel_sizes": [[1], [1, 1], [1, 1]], + "stride": [1, 2, 1], + "dilation": [1, 1, 1], + "act_type": ["relu", "relu", "relu"], + "se_reduction_ratio": [None, None, None], + }, + { + "expand_ratio": [6, 6], + "out_channels": [40, 40], + "num_blocks": [1, 3], + "kernel_sizes": [[3, 5, 7, 9], [3, 5]], + "exp_kernel_sizes": [[1], [1, 1]], + "poi_kernel_sizes": [[1], [1, 1]], + "stride": [2, 1], + "dilation": [1, 1], + "act_type": ["swish", "swish"], + "se_reduction_ratio": [2, 2], + }, + { + "expand_ratio": [6, 6, 6, 3], + "out_channels": [80, 80, 120, 120], + "num_blocks": [1, 3, 1, 3], + "kernel_sizes": [[3, 5, 7], [3, 5, 7, 9], [3], [3, 5, 7, 9]], + "exp_kernel_sizes": [[1], [1, 1], [1], [1, 1]], + "poi_kernel_sizes": [[1], [1, 1], [1], [1, 1]], + "stride": [2, 1, 1, 1], + "dilation": [1, 1, 1, 1], + "act_type": ["swish", "swish", "swish", "swish"], + "se_reduction_ratio": [4, 4, 2, 2], + }, + { + "expand_ratio": [6, 6], + "out_channels": [200, 200], + "num_blocks": [1, 3], + "kernel_sizes": [[3, 5, 7, 9], [3, 5, 7, 9]], + "exp_kernel_sizes": [[1], [1]], + "poi_kernel_sizes": [[1], [1, 1]], + "stride": [2, 1], + "dilation": [1, 1], + "act_type": ["swish", "swish"], + "se_reduction_ratio": [2, 2], + }, + ], + }) + + +@dataclass +class MixNetLargeArchitectureConfig(ArchitectureConfig): + backbone: Dict[str, Any] = field(default_factory=lambda: { + "name": "mixnet", + "params": { + "stem_planes": 24, + "width_multi": 1.3, + "depth_multi": 1.0, + "dropout_rate": 0., + }, + "stage_params": [ + { + "expand_ratio": [1, 6, 3], + "out_channels": [24, 32, 32], + "num_blocks": [1, 1, 1], + "kernel_sizes": [[3], [3, 5, 7], [3]], + "exp_kernel_sizes": [[1], [1, 1], [1, 1]], + "poi_kernel_sizes": [[1], [1, 1], [1, 1]], + "stride": [1, 2, 1], + "dilation": [1, 1, 1], + "act_type": ["relu", "relu", "relu"], + "se_reduction_ratio": [None, None, None], + }, + { + "expand_ratio": [6, 6], + "out_channels": [40, 40], + "num_blocks": [1, 3], + "kernel_sizes": [[3, 5, 7, 9], [3, 5]], + "exp_kernel_sizes": [[1], [1, 1]], + "poi_kernel_sizes": [[1], [1, 1]], + "stride": [2, 1], + "dilation": [1, 1], + "act_type": ["swish", "swish"], + "se_reduction_ratio": [2, 2], + }, + { + "expand_ratio": [6, 6, 6, 3], + "out_channels": [80, 80, 120, 120], + "num_blocks": [1, 3, 1, 3], + "kernel_sizes": [[3, 5, 7], [3, 5, 7, 9], [3], [3, 5, 7, 9]], + "exp_kernel_sizes": [[1], [1, 1], [1], [1, 1]], + "poi_kernel_sizes": [[1], [1, 1], [1], [1, 1]], + "stride": [2, 1, 1, 1], + "dilation": [1, 1, 1, 1], + "act_type": ["swish", "swish", "swish", "swish"], + "se_reduction_ratio": [4, 4, 2, 2], + }, + { + "expand_ratio": [6, 6], + "out_channels": [200, 200], + "num_blocks": [1, 3], + "kernel_sizes": [[3, 5, 7, 9], [3, 5, 7, 9]], + "exp_kernel_sizes": [[1], [1]], + "poi_kernel_sizes": [[1], [1, 1]], + "stride": [2, 1], + "dilation": [1, 1], + "act_type": ["swish", "swish"], + "se_reduction_ratio": [2, 2], + }, + ], + }) + + +@dataclass +class CSPDarkNetSmallArchitectureConfig(ArchitectureConfig): + backbone: Dict[str, Any] = field(default_factory=lambda: { + "name": "cspdarknet", + "params": { + "dep_mul": 0.33, + "wid_mul": 0.5, + "act_type": "silu", + }, + "stage_params": None, + }) + + @dataclass class ClassificationEfficientFormerModelConfig(ModelConfig): task: str = "classification" @@ -458,3 +667,95 @@ class ClassificationViTModelConfig(ModelConfig): losses: List[Dict[str, Any]] = field(default_factory=lambda: [ {"criterion": "cross_entropy", "label_smoothing": 0.1, "weight": None} ]) + + +@dataclass +class DetectionYoloXModelConfig(ModelConfig): + task: str = "detection" + name: str = "yolox_s" + checkpoint: Optional[Union[Path, str]] = "./weights/yolox/yolox_s.pth" + architecture: ArchitectureConfig = field(default_factory=lambda: CSPDarkNetSmallArchitectureConfig( + neck={"name": "pafpn"}, + head={"name": "yolox_head"} + )) + losses: List[Dict[str, Any]] = field(default_factory=lambda: [ + {"criterion": "yolox_loss", "weight": None} + ]) + + +@dataclass +class ClassificationMixNetSmallModelConfig(ModelConfig): + task: str = "classification" + name: str = "mixnet_s" + checkpoint: Optional[Union[Path, str]] = "./weights/mixnet/mixnet_s.pth" + architecture: ArchitectureConfig = field(default_factory=lambda: MixNetSmallArchitectureConfig( + head={"name": "fc"} + )) + losses: List[Dict[str, Any]] = field(default_factory=lambda: [ + {"criterion": "cross_entropy", "label_smoothing": 0.1, "weight": None} + ]) + + +@dataclass +class SegmentationMixNetSmallModelConfig(ModelConfig): + task: str = "segmentation" + name: str = "mixnet_s" + checkpoint: Optional[Union[Path, str]] = "./weights/mixnet/mixnet_s.pth" + architecture: ArchitectureConfig = field(default_factory=lambda: MixNetSmallArchitectureConfig( + head={"name": "all_mlp_decoder"} + )) + losses: List[Dict[str, Any]] = field(default_factory=lambda: [ + {"criterion": "cross_entropy", "ignore_index": 255, "weight": None} + ]) + + +@dataclass +class ClassificationMixNetMediumModelConfig(ModelConfig): + task: str = "classification" + name: str = "mixnet_m" + checkpoint: Optional[Union[Path, str]] = "./weights/mixnet/mixnet_m.pth" + architecture: ArchitectureConfig = field(default_factory=lambda: MixNetMediumArchitectureConfig( + head={"name": "fc"} + )) + losses: List[Dict[str, Any]] = field(default_factory=lambda: [ + {"criterion": "cross_entropy", "label_smoothing": 0.1, "weight": None} + ]) + + +@dataclass +class SegmentationMixNetMediumModelConfig(ModelConfig): + task: str = "segmentation" + name: str = "mixnet_m" + checkpoint: Optional[Union[Path, str]] = "./weights/mixnet/mixnet_m.pth" + architecture: ArchitectureConfig = field(default_factory=lambda: MixNetMediumArchitectureConfig( + head={"name": "all_mlp_decoder"} + )) + losses: List[Dict[str, Any]] = field(default_factory=lambda: [ + {"criterion": "cross_entropy", "ignore_index": 255, "weight": None} + ]) + + +@dataclass +class ClassificationMixNetLargeModelConfig(ModelConfig): + task: str = "classification" + name: str = "mixnet_l" + checkpoint: Optional[Union[Path, str]] = "./weights/mixnet/mixnet_l.pth" + architecture: ArchitectureConfig = field(default_factory=lambda: MixNetLargeArchitectureConfig( + head={"name": "fc"} + )) + losses: List[Dict[str, Any]] = field(default_factory=lambda: [ + {"criterion": "cross_entropy", "label_smoothing": 0.1, "weight": None} + ]) + + +@dataclass +class SegmentationMixNetLargeModelConfig(ModelConfig): + task: str = "segmentation" + name: str = "mixnet_l" + checkpoint: Optional[Union[Path, str]] = "./weights/mixnet/mixnet_l.pth" + architecture: ArchitectureConfig = field(default_factory=lambda: MixNetLargeArchitectureConfig( + head={"name": "all_mlp_decoder"} + )) + losses: List[Dict[str, Any]] = field(default_factory=lambda: [ + {"criterion": "cross_entropy", "ignore_index": 255, "weight": None} + ]) From 0244bc3f5e5b6d5ef79cf58bf815718292a6dbc4 Mon Sep 17 00:00:00 2001 From: deepkyu Date: Fri, 24 Nov 2023 14:43:52 +0900 Subject: [PATCH 158/167] #241 substitute name field with model nickname --- .../dataloaders/augmentation/transforms.py | 2 +- .../dataloaders/segmentation/local.py | 2 +- src/netspresso_trainer/models/builder.py | 12 ++++++++---- src/netspresso_trainer/trainer_common.py | 3 +-- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/netspresso_trainer/dataloaders/augmentation/transforms.py b/src/netspresso_trainer/dataloaders/augmentation/transforms.py index 35336ee66..35adbc0ba 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/transforms.py +++ b/src/netspresso_trainer/dataloaders/augmentation/transforms.py @@ -84,6 +84,6 @@ def val_transforms_pidnet(conf_augmentation): def create_transform(model_name: str, is_training=False): - if model_name == 'pidnet': + if 'pidnet' in model_name: return train_transforms_pidnet if is_training else val_transforms_pidnet return transforms_custom_train if is_training else transforms_custom_eval diff --git a/src/netspresso_trainer/dataloaders/segmentation/local.py b/src/netspresso_trainer/dataloaders/segmentation/local.py index aea1f350e..b39bd8b18 100644 --- a/src/netspresso_trainer/dataloaders/segmentation/local.py +++ b/src/netspresso_trainer/dataloaders/segmentation/local.py @@ -51,7 +51,7 @@ def __getitem__(self, index): mask = Image.fromarray(mask, mode='L') # single mode array (PIL.Image) compatbile with torchvision transform API - if self.model_name == 'pidnet': + if 'pidnet' in self.model_name: edge = generate_edge(np.array(mask)) out = self.transform(self.conf_augmentation)(image=img, mask=mask, edge=edge) outputs.update({'pixel_values': out['image'], 'labels': out['mask'], 'edges': out['edge'].float(), 'name': img_path.name}) diff --git a/src/netspresso_trainer/models/builder.py b/src/netspresso_trainer/models/builder.py index acc642a43..6c9202a20 100644 --- a/src/netspresso_trainer/models/builder.py +++ b/src/netspresso_trainer/models/builder.py @@ -22,7 +22,8 @@ def load_full_model(conf_model, model_name, num_classes, model_checkpoint): return model -def load_backbone_and_head_model(conf_model, task, backbone_name, head_name, num_classes, model_checkpoint, img_size, freeze_backbone): +def load_backbone_and_head_model( + conf_model, task, backbone_name, head_name, num_classes, model_checkpoint, img_size, freeze_backbone): TASK_MODEL_DICT: Dict[str, Type[TaskModel]] = { 'classification': ClassificationModel, 'segmentation': SegmentationModel, @@ -30,9 +31,11 @@ def load_backbone_and_head_model(conf_model, task, backbone_name, head_name, num } if task not in TASK_MODEL_DICT: - raise ValueError(f"No such task(s) named: {task}. This should be included in SUPPORTING_TASK_LIST ({SUPPORTING_TASK_LIST})") + raise ValueError( + f"No such task(s) named: {task}. This should be included in SUPPORTING_TASK_LIST ({SUPPORTING_TASK_LIST})") - return TASK_MODEL_DICT[task](conf_model, task, backbone_name, head_name, num_classes, model_checkpoint, img_size, freeze_backbone) + return TASK_MODEL_DICT[task]( + conf_model, task, backbone_name, head_name, num_classes, model_checkpoint, img_size, freeze_backbone) def build_model(conf_model, task, num_classes, model_checkpoint, img_size) -> nn.Module: @@ -44,4 +47,5 @@ def build_model(conf_model, task, num_classes, model_checkpoint, img_size) -> nn backbone_name = str(conf_model.architecture.backbone.name).lower() head_name = str(conf_model.architecture.head.name).lower() freeze_backbone = conf_model.freeze_backbone - return load_backbone_and_head_model(conf_model, task, backbone_name, head_name, num_classes, model_checkpoint, img_size, freeze_backbone) + return load_backbone_and_head_model( + conf_model, task, backbone_name, head_name, num_classes, model_checkpoint, img_size, freeze_backbone) diff --git a/src/netspresso_trainer/trainer_common.py b/src/netspresso_trainer/trainer_common.py index daa453007..6d76b36a9 100644 --- a/src/netspresso_trainer/trainer_common.py +++ b/src/netspresso_trainer/trainer_common.py @@ -29,10 +29,9 @@ def train_common(conf: DictConfig, log_level: Literal['DEBUG', 'INFO', 'WARNING' # TODO: Get model name from checkpoint single_task_model = is_single_task_model(conf.model) - conf_model_sub = conf.model.architecture.full if single_task_model else conf.model.architecture.backbone conf.model.single_task_model = single_task_model - model_name = str(conf_model_sub.name).lower() + model_name = str(conf.model.name).lower() if is_graphmodule_training: model_name += "_graphmodule" From 35c748776b3b2fcd8fa088e45d403f9a2a4d68f7 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Fri, 24 Nov 2023 14:49:03 +0900 Subject: [PATCH 159/167] Add TrivialAugmentWide --- .../dataloaders/augmentation/custom.py | 74 +++++++++++++++++++ .../dataloaders/augmentation/registry.py | 4 +- 2 files changed, 77 insertions(+), 1 deletion(-) diff --git a/src/netspresso_trainer/dataloaders/augmentation/custom.py b/src/netspresso_trainer/dataloaders/augmentation/custom.py index ac7e5c054..7b49e7efb 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/custom.py +++ b/src/netspresso_trainer/dataloaders/augmentation/custom.py @@ -12,6 +12,7 @@ import torchvision.transforms.functional as F from torch.nn import functional as F_torch from torchvision.transforms.functional import InterpolationMode +from torchvision.transforms.autoaugment import _apply_op BBOX_CROP_KEEP_THRESHOLD = 0.2 MAX_RETRY = 5 @@ -404,6 +405,79 @@ def forward(self, image, mask=None, bbox=None): return image, mask, bbox +class TrivialAugmentWide(torch.nn.Module): + """ + Based on the torchvision implementation. + https://pytorch.org/vision/main/_modules/torchvision/transforms/autoaugment.html#TrivialAugmentWide + """ + + def __init__( + self, + num_magnitude_bins: int = 31, + interpolation: InterpolationMode = 'bilinear', + fill: Optional[List[float]] = None, + ) -> None: + super().__init__() + interpolation = INVERSE_MODES_MAPPING[interpolation] + + self.num_magnitude_bins = num_magnitude_bins + self.interpolation = interpolation + self.fill = fill + + def _augmentation_space(self, num_bins: int) -> Dict[str, Tuple[Tensor, bool]]: + return { + # op_name: (magnitudes, signed) + "Identity": (torch.tensor(0.0), False), + "ShearX": (torch.linspace(0.0, 0.99, num_bins), True), + "ShearY": (torch.linspace(0.0, 0.99, num_bins), True), + "TranslateX": (torch.linspace(0.0, 32.0, num_bins), True), + "TranslateY": (torch.linspace(0.0, 32.0, num_bins), True), + "Rotate": (torch.linspace(0.0, 135.0, num_bins), True), + "Brightness": (torch.linspace(0.0, 0.99, num_bins), True), + "Color": (torch.linspace(0.0, 0.99, num_bins), True), + "Contrast": (torch.linspace(0.0, 0.99, num_bins), True), + "Sharpness": (torch.linspace(0.0, 0.99, num_bins), True), + "Posterize": (8 - (torch.arange(num_bins) / ((num_bins - 1) / 6)).round().int(), False), + "Solarize": (torch.linspace(255.0, 0.0, num_bins), False), + "AutoContrast": (torch.tensor(0.0), False), + "Equalize": (torch.tensor(0.0), False), + } + + def forward(self, image, mask=None, bbox=None): + fill = self.fill + channels, height, width = F.get_dimensions(image) + if isinstance(image, Tensor): + if isinstance(fill, (int, float)): + fill = [float(fill)] * channels + elif fill is not None: + fill = [float(f) for f in fill] + + op_meta = self._augmentation_space(self.num_magnitude_bins) + op_index = int(torch.randint(len(op_meta), (1,)).item()) + op_name = list(op_meta.keys())[op_index] + magnitudes, signed = op_meta[op_name] + magnitude = ( + float(magnitudes[torch.randint(len(magnitudes), (1,), dtype=torch.long)].item()) + if magnitudes.ndim > 0 + else 0.0 + ) + if signed and torch.randint(2, (1,)): + magnitude *= -1.0 + + # TODO: Compute mask, bbox + return _apply_op(image, op_name, magnitude, interpolation=self.interpolation, fill=fill), mask, bbox + + def __repr__(self) -> str: + s = ( + f"{self.__class__.__name__}(" + f"num_magnitude_bins={self.num_magnitude_bins}" + f", interpolation={self.interpolation}" + f", fill={self.fill}" + f")" + ) + return s + + class RandomMixup: """ Based on the RandomMixup implementation of ml_cvnets. diff --git a/src/netspresso_trainer/dataloaders/augmentation/registry.py b/src/netspresso_trainer/dataloaders/augmentation/registry.py index 63387c232..164ff2e3c 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/registry.py +++ b/src/netspresso_trainer/dataloaders/augmentation/registry.py @@ -11,6 +11,7 @@ RandomVerticalFlip, RandomErasing, Resize, + TrivialAugmentWide, ) TRANSFORM_DICT: Dict[str, Callable] = { @@ -23,5 +24,6 @@ 'randomerasing': RandomErasing, 'resize': Resize, 'mixup': RandomMixup, - 'cutmix': RandomCutmix + 'cutmix': RandomCutmix, + 'trivialaugmentwide': TrivialAugmentWide, } From 948dc8709cf0683d85d22ac90dd70b59756a6f3c Mon Sep 17 00:00:00 2001 From: deepkyu Date: Fri, 24 Nov 2023 14:49:55 +0900 Subject: [PATCH 160/167] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f5bed333d..854b89895 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ - Add a gpu option in `train_with_config` (only single-GPU supported) by `@deepkyu` in [PR 219](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/219) - Support augmentation for classification task: cutmix, mixup by `@illian01` in [PR 221](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/221) - Add model: MixNet by `@illian01` in [PR 229](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/229) +- Add `model.name` to get the exact nickname of the model by `@deepkyu` in [PR 243](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/243/) ## Bug Fixes: From cb15f77094b424988e214fdd823a567aed71f6fd Mon Sep 17 00:00:00 2001 From: deepkyu Date: Fri, 24 Nov 2023 14:59:05 +0900 Subject: [PATCH 161/167] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 74c20025c..9c781be12 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ - Add postprocessor module by `@illian01` in [PR 223](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/223) - Equalize the model backbone configuration format by `@illian01` in [PR 228](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/228) - Separate FPN and PAFPN as neck module by `@illian01` in [PR 234](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/234) +- Auto-download pretrained checkpoint from AWS S3 by `@deepkyu` in [PR 244](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/244) ## Other Changes: From ecf571d540e5673fc3196839794a275e14668819 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Fri, 24 Nov 2023 16:28:51 +0900 Subject: [PATCH 162/167] Ruff fix --- .../dataloaders/augmentation/custom.py | 10 +++++----- .../dataloaders/augmentation/registry.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/netspresso_trainer/dataloaders/augmentation/custom.py b/src/netspresso_trainer/dataloaders/augmentation/custom.py index 7b49e7efb..18ef561b5 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/custom.py +++ b/src/netspresso_trainer/dataloaders/augmentation/custom.py @@ -1,18 +1,18 @@ import math import random from collections.abc import Sequence -from typing import Dict, Optional, List, Tuple +from typing import Dict, List, Optional, Tuple import numpy as np -from omegaconf import ListConfig import PIL.Image as Image import torch -from torch import Tensor import torchvision.transforms as T import torchvision.transforms.functional as F +from omegaconf import ListConfig +from torch import Tensor from torch.nn import functional as F_torch -from torchvision.transforms.functional import InterpolationMode from torchvision.transforms.autoaugment import _apply_op +from torchvision.transforms.functional import InterpolationMode BBOX_CROP_KEEP_THRESHOLD = 0.2 MAX_RETRY = 5 @@ -370,7 +370,7 @@ def get_params( img, scale: Tuple[float, float], ratio: Tuple[float, float], value: Optional[int] = None ): img_w, img_h = img.size - + area = img_h * img_w log_ratio = torch.log(torch.tensor(ratio)) diff --git a/src/netspresso_trainer/dataloaders/augmentation/registry.py b/src/netspresso_trainer/dataloaders/augmentation/registry.py index 164ff2e3c..139ac1a7c 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/registry.py +++ b/src/netspresso_trainer/dataloaders/augmentation/registry.py @@ -5,11 +5,11 @@ Pad, RandomCrop, RandomCutmix, + RandomErasing, RandomHorizontalFlip, RandomMixup, RandomResizedCrop, RandomVerticalFlip, - RandomErasing, Resize, TrivialAugmentWide, ) From 5688053d3c48644b05b7747778b5f1d917291e63 Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Fri, 24 Nov 2023 16:34:38 +0900 Subject: [PATCH 163/167] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e1b9d4cf..734316171 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ - Add a gpu option in `train_with_config` (only single-GPU supported) by `@deepkyu` in [PR 219](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/219) - Support augmentation for classification task: cutmix, mixup by `@illian01` in [PR 221](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/221) - Add model: MixNet by `@illian01` in [PR 229](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/229) +- Add transforms: RandomErasing and TrivialAugmentationWide by `@illian01` in [PR 246](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/246) ## Bug Fixes: From edbb0198ffee90e7a840c53ce9ee779dfc00cb2a Mon Sep 17 00:00:00 2001 From: Junho Shin Date: Fri, 24 Nov 2023 16:51:34 +0900 Subject: [PATCH 164/167] Add visualize value --- src/netspresso_trainer/dataloaders/augmentation/custom.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/netspresso_trainer/dataloaders/augmentation/custom.py b/src/netspresso_trainer/dataloaders/augmentation/custom.py index 18ef561b5..0895bd3bf 100644 --- a/src/netspresso_trainer/dataloaders/augmentation/custom.py +++ b/src/netspresso_trainer/dataloaders/augmentation/custom.py @@ -355,6 +355,7 @@ def __repr__(self): class RandomErasing(T.RandomErasing): + visualize = True def __init__(self, p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=0, inplace=False): if isinstance(scale, ListConfig): @@ -410,6 +411,7 @@ class TrivialAugmentWide(torch.nn.Module): Based on the torchvision implementation. https://pytorch.org/vision/main/_modules/torchvision/transforms/autoaugment.html#TrivialAugmentWide """ + visualize = True def __init__( self, From dbce81b69ba9eb77b9fa81e475040a1e6e643c4b Mon Sep 17 00:00:00 2001 From: deepkyu Date: Fri, 24 Nov 2023 16:57:44 +0900 Subject: [PATCH 165/167] #241 mobilenetv3_small -> mobilenetv3 --- .../model/mobilenetv3/mobilenetv3-small-classification.yaml | 2 +- config/model/mobilenetv3/mobilenetv3-small-segmentation.yaml | 2 +- src/netspresso_trainer/cfg/model.py | 2 +- src/netspresso_trainer/models/backbones/__init__.py | 2 +- .../models/backbones/experimental/mobilenetv3.py | 4 ++-- src/netspresso_trainer/models/registry.py | 4 ++-- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/config/model/mobilenetv3/mobilenetv3-small-classification.yaml b/config/model/mobilenetv3/mobilenetv3-small-classification.yaml index aa623aceb..6ddd1ff2a 100644 --- a/config/model/mobilenetv3/mobilenetv3-small-classification.yaml +++ b/config/model/mobilenetv3/mobilenetv3-small-classification.yaml @@ -8,7 +8,7 @@ model: architecture: full: ~ # auto backbone: - name: mobilenetv3_small + name: mobilenetv3 params: ~ stage_params: - diff --git a/config/model/mobilenetv3/mobilenetv3-small-segmentation.yaml b/config/model/mobilenetv3/mobilenetv3-small-segmentation.yaml index be42afce3..6c8438fcd 100644 --- a/config/model/mobilenetv3/mobilenetv3-small-segmentation.yaml +++ b/config/model/mobilenetv3/mobilenetv3-small-segmentation.yaml @@ -8,7 +8,7 @@ model: architecture: full: ~ # auto backbone: - name: mobilenetv3_small + name: mobilenetv3 params: ~ stage_params: - diff --git a/src/netspresso_trainer/cfg/model.py b/src/netspresso_trainer/cfg/model.py index 98a220ae3..832ccf86c 100644 --- a/src/netspresso_trainer/cfg/model.py +++ b/src/netspresso_trainer/cfg/model.py @@ -86,7 +86,7 @@ class EfficientFormerArchitectureConfig(ArchitectureConfig): @dataclass class MobileNetV3ArchitectureConfig(ArchitectureConfig): backbone: Dict[str, Any] = field(default_factory=lambda: { - "name": "mobilenetv3_small", + "name": "mobilenetv3", "params": None, "stage_params": [ { diff --git a/src/netspresso_trainer/models/backbones/__init__.py b/src/netspresso_trainer/models/backbones/__init__.py index 363ff9e1d..1f4594f86 100644 --- a/src/netspresso_trainer/models/backbones/__init__.py +++ b/src/netspresso_trainer/models/backbones/__init__.py @@ -2,7 +2,7 @@ from .experimental.darknet import cspdarknet from .experimental.efficientformer import efficientformer from .experimental.mixnet import mixnet -from .experimental.mobilenetv3 import mobilenetv3_small +from .experimental.mobilenetv3 import mobilenetv3 from .experimental.mobilevit import mobilevit from .experimental.resnet import resnet50 from .experimental.segformer import segformer diff --git a/src/netspresso_trainer/models/backbones/experimental/mobilenetv3.py b/src/netspresso_trainer/models/backbones/experimental/mobilenetv3.py index 0a12213fa..f1c1e486d 100644 --- a/src/netspresso_trainer/models/backbones/experimental/mobilenetv3.py +++ b/src/netspresso_trainer/models/backbones/experimental/mobilenetv3.py @@ -12,7 +12,7 @@ from ...op.custom import ConvLayer, InvertedResidual from ...utils import BackboneOutput -__all__ = ['mobilenetv3_small'] +__all__ = ['mobilenetv3'] SUPPORTING_TASK = ['classification', 'segmentation'] @@ -130,5 +130,5 @@ def task_support(self, task): return task.lower() in SUPPORTING_TASK -def mobilenetv3_small(task, conf_model_backbone) -> MobileNetV3: +def mobilenetv3(task, conf_model_backbone) -> MobileNetV3: return MobileNetV3(task, conf_model_backbone.params, conf_model_backbone.stage_params) diff --git a/src/netspresso_trainer/models/registry.py b/src/netspresso_trainer/models/registry.py index f39698c08..ef67443a6 100644 --- a/src/netspresso_trainer/models/registry.py +++ b/src/netspresso_trainer/models/registry.py @@ -3,7 +3,7 @@ import torch.nn as nn -from .backbones import cspdarknet, efficientformer, mixnet, mobilenetv3_small, mobilevit, resnet50, segformer, vit +from .backbones import cspdarknet, efficientformer, mixnet, mobilenetv3, mobilevit, resnet50, segformer, vit from .full import pidnet from .heads.classification import fc from .heads.detection import faster_rcnn, yolox_head @@ -12,7 +12,7 @@ MODEL_BACKBONE_DICT: Dict[str, Callable[..., nn.Module]] = { 'resnet50': resnet50, - 'mobilenetv3_small': mobilenetv3_small, + 'mobilenetv3': mobilenetv3, 'segformer': segformer, 'mobilevit': mobilevit, 'vit': vit, From 40cb38f75006ad9510b57d229c69a5fb8e7d9fc8 Mon Sep 17 00:00:00 2001 From: deepkyu Date: Fri, 24 Nov 2023 16:59:54 +0900 Subject: [PATCH 166/167] #241 resnet50 -> resnet --- config/model/resnet/resnet50-classification.yaml | 2 +- config/model/resnet/resnet50-segmentation.yaml | 2 +- demo/gradio_augmentation.py | 2 +- src/netspresso_trainer/cfg/model.py | 2 +- src/netspresso_trainer/models/backbones/__init__.py | 2 +- .../models/backbones/experimental/resnet.py | 6 +++--- src/netspresso_trainer/models/registry.py | 4 ++-- 7 files changed, 10 insertions(+), 10 deletions(-) diff --git a/config/model/resnet/resnet50-classification.yaml b/config/model/resnet/resnet50-classification.yaml index acc715022..a781931f4 100644 --- a/config/model/resnet/resnet50-classification.yaml +++ b/config/model/resnet/resnet50-classification.yaml @@ -8,7 +8,7 @@ model: architecture: full: ~ # auto backbone: - name: resnet50 + name: resnet params: block: bottleneck norm_layer: batch_norm diff --git a/config/model/resnet/resnet50-segmentation.yaml b/config/model/resnet/resnet50-segmentation.yaml index 5baf10f28..002129846 100644 --- a/config/model/resnet/resnet50-segmentation.yaml +++ b/config/model/resnet/resnet50-segmentation.yaml @@ -9,7 +9,7 @@ model: full: name: ~ # auto backbone: - name: resnet50 + name: resnet params: block: bottleneck norm_layer: batch_norm diff --git a/demo/gradio_augmentation.py b/demo/gradio_augmentation.py index 5fa0c901f..5c7cc71bf 100644 --- a/demo/gradio_augmentation.py +++ b/demo/gradio_augmentation.py @@ -88,7 +88,7 @@ def launch_gradio(args): task_choices = gr.Radio(label="Task: ", value='classification', choices=SUPPORTING_TASK_LIST) with gr.Column(scale=1): phase_choices = gr.Radio(label="Phase: ", value='train', choices=['train', 'valid']) - model_choices = gr.Radio(label="Model: ", value='resnet50', choices=SUPPORTING_MODEL_LIST) + model_choices = gr.Radio(label="Model: ", value='resnet', choices=SUPPORTING_MODEL_LIST) with gr.Row(equal_height=True): with gr.Column(scale=1): config_input = gr.Code(label="Augmentation configuration", value=args.config.read_text(), language='yaml', lines=30) diff --git a/src/netspresso_trainer/cfg/model.py b/src/netspresso_trainer/cfg/model.py index 832ccf86c..71184e1c7 100644 --- a/src/netspresso_trainer/cfg/model.py +++ b/src/netspresso_trainer/cfg/model.py @@ -223,7 +223,7 @@ class PIDNetArchitectureConfig(ArchitectureConfig): @dataclass class ResNetArchitectureConfig(ArchitectureConfig): backbone: Dict[str, Any] = field(default_factory=lambda: { - "name": "resnet50", + "name": "resnet", "params": { "block": "bottleneck", "norm_layer": "batch_norm", diff --git a/src/netspresso_trainer/models/backbones/__init__.py b/src/netspresso_trainer/models/backbones/__init__.py index 1f4594f86..03737edd9 100644 --- a/src/netspresso_trainer/models/backbones/__init__.py +++ b/src/netspresso_trainer/models/backbones/__init__.py @@ -4,6 +4,6 @@ from .experimental.mixnet import mixnet from .experimental.mobilenetv3 import mobilenetv3 from .experimental.mobilevit import mobilevit -from .experimental.resnet import resnet50 +from .experimental.resnet import resnet from .experimental.segformer import segformer from .experimental.vit import vit diff --git a/src/netspresso_trainer/models/backbones/experimental/resnet.py b/src/netspresso_trainer/models/backbones/experimental/resnet.py index 34e87a6c4..1aefbb0bf 100644 --- a/src/netspresso_trainer/models/backbones/experimental/resnet.py +++ b/src/netspresso_trainer/models/backbones/experimental/resnet.py @@ -12,7 +12,7 @@ from ...op.custom import BasicBlock, Bottleneck, ConvLayer from ...utils import BackboneOutput -__all__ = ['resnet50'] +__all__ = ['resnet'] SUPPORTING_TASK = ['classification', 'segmentation'] @@ -158,8 +158,8 @@ def task_support(self, task): return task.lower() in SUPPORTING_TASK -def resnet50(task, conf_model_backbone) -> ResNet: +def resnet(task, conf_model_backbone) -> ResNet: """ - ResNet-50 model from "Deep Residual Learning for Image Recognition" https://arxiv.org/pdf/1512.03385.pdf. + ResNet model from "Deep Residual Learning for Image Recognition" https://arxiv.org/pdf/1512.03385.pdf. """ return ResNet(task, conf_model_backbone.params, conf_model_backbone.stage_params) diff --git a/src/netspresso_trainer/models/registry.py b/src/netspresso_trainer/models/registry.py index ef67443a6..999ed5175 100644 --- a/src/netspresso_trainer/models/registry.py +++ b/src/netspresso_trainer/models/registry.py @@ -3,7 +3,7 @@ import torch.nn as nn -from .backbones import cspdarknet, efficientformer, mixnet, mobilenetv3, mobilevit, resnet50, segformer, vit +from .backbones import cspdarknet, efficientformer, mixnet, mobilenetv3, mobilevit, resnet, segformer, vit from .full import pidnet from .heads.classification import fc from .heads.detection import faster_rcnn, yolox_head @@ -11,7 +11,7 @@ from .necks import fpn, pafpn MODEL_BACKBONE_DICT: Dict[str, Callable[..., nn.Module]] = { - 'resnet50': resnet50, + 'resnet': resnet, 'mobilenetv3': mobilenetv3, 'segformer': segformer, 'mobilevit': mobilevit, From bf5f9c6891ff4fc452c3a5292a304b3669ec0be4 Mon Sep 17 00:00:00 2001 From: deepkyu Date: Fri, 24 Nov 2023 17:22:21 +0900 Subject: [PATCH 167/167] v0.0.10 release commit --- CHANGELOG.md | 18 ++++++++++++++++++ src/netspresso_trainer/VERSION | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 263b762f7..ab49db996 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,24 @@ ## New Features: +- + +## Bug Fixes: + +- + +## Breaking Changes: + +- + +## Other Changes: + +- + +# v0.0.10 + +## New Features: + - Add a gpu option in `train_with_config` (only single-GPU supported) by `@deepkyu` in [PR 219](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/219) - Support augmentation for classification task: cutmix, mixup by `@illian01` in [PR 221](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/221) - Add model: MixNet by `@illian01` in [PR 229](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/229) diff --git a/src/netspresso_trainer/VERSION b/src/netspresso_trainer/VERSION index 429d94ae0..b0a122753 100644 --- a/src/netspresso_trainer/VERSION +++ b/src/netspresso_trainer/VERSION @@ -1 +1 @@ -0.0.9 \ No newline at end of file +0.0.10 \ No newline at end of file