diff --git a/CHANGELOG.md b/CHANGELOG.md
index 80a1bada..ab49db99 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,19 +2,48 @@
 
 ## New Features:
 
-No changes to highlight.
+- 
 
 ## Bug Fixes:
 
-No changes to highlight.
+- 
 
 ## Breaking Changes:
 
-No changes to highlight.
+-
 
 ## Other Changes:
 
-No changes to highlight.
+- 
+
+# v0.0.10
+
+## New Features:
+
+- Add a gpu option in `train_with_config` (only single-GPU supported) by `@deepkyu` in [PR 219](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/219)
+- Support augmentation for classification task: cutmix, mixup by `@illian01` in [PR 221](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/221)
+- Add model: MixNet by `@illian01` in [PR 229](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/229)
+- Add `model.name` to get the exact nickname of the model by `@deepkyu` in [PR 243](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/243/)
+- Add transforms: RandomErasing and TrivialAugmentationWide by `@illian01` in [PR 246](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/246)
+
+## Bug Fixes:
+
+- Fix PIDNet model dataclass task field by `@illian01` in [PR 220](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/220)
+- Fix default criterion value of classification `@illian01` in [PR 238](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/238)
+- Fix model access of 2-stage detection pipeline to compat with distributed environment by `@illian` in [PR 239](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/239)
+
+## Breaking Changes:
+
+- Enable dataset augmentation customizing by `@illian01` in [PR 201](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/201)
+- Add postprocessor module by `@illian01` in [PR 223](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/223)
+- Equalize the model backbone configuration format by `@illian01` in [PR 228](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/228)
+- Separate FPN and PAFPN as neck module by `@illian01` in [PR 234](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/234)
+- Auto-download pretrained checkpoint from AWS S3 by `@deepkyu` in [PR 244](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/244)
+
+## Other Changes:
+
+- Update ruff rule (`W`) by `@deepkyu` in [PR 218](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/218)
+- Integrate classification loss modules by `@illian01` in [PR 226](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/226)
 
 # v0.0.9
 
@@ -121,6 +150,7 @@ This change is applied at [PR 151](https://github.com/Nota-NetsPresso/netspresso
 - Initialize loss and metric at same time with optimizer and lr schedulers by `@deepkyu` in [PR 138](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/138)
 - Hotfix the error which shows 0 for validation loss and metrics by fixing the variable name by `@deepkyu` in [PR 140](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/140)
 - Add missing field, `save_optimizer_state`, in `logging.yaml` by `@illian01` in [PR 149](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/149)
+- Hotfix for pythonic config name (classification loss) by `@deepkyu` in [PR 242](https://github.com/Nota-NetsPresso/netspresso-trainer/pull/242)
 
 ## Breaking Changes:
 
diff --git a/config/augmentation/classification.yaml b/config/augmentation/classification.yaml
index d8ac19f9..0648b74b 100644
--- a/config/augmentation/classification.yaml
+++ b/config/augmentation/classification.yaml
@@ -1,31 +1,14 @@
 augmentation:
   img_size: &img_size 256
-  hsv_h: ~
-  hsv_s: ~
-  hsv_v: ~
-  degrees: ~
-  translate: ~
-  scale: ~ 
-  max_scale: ~
-  min_scale: ~  
-  crop_size_h: ~
-  crop_size_w: ~
-  resize_ratio0: ~
-  resize_ratiof: ~
-  resize_add: ~
-  shear: ~
-  perspective: ~
-  flipud: ~
-  fliplr: 0.5
-  mosaic: ~ 
-  mixup: 1.0 
-  copy_paste: ~ 
-  mixup_alpha: 0.0 
-  cutmix_alpha: 0.0 
-  mixup_switch_prob: 0.5 
-  color_jitter:
-    brightness: ~ 
-    contrast: ~ 
-    saturation: ~
-    hue: ~ 
-    colorjitter_p: ~ 
\ No newline at end of file
+  transforms:
+    - 
+      name: randomresizedcrop
+      size: *img_size
+      interpolation: bilinear
+    - 
+      name: randomhorizontalflip
+      p: 0.5
+  mix_transforms:
+    -
+      name: cutmix
+      alpha: 1.0
diff --git a/config/augmentation/detection.yaml b/config/augmentation/detection.yaml
index 4dafab46..11022618 100644
--- a/config/augmentation/detection.yaml
+++ b/config/augmentation/detection.yaml
@@ -1,31 +1,7 @@
 augmentation:
   img_size: &img_size 512
-  hsv_h: ~
-  hsv_s: ~
-  hsv_v: ~
-  degrees: ~
-  translate: ~
-  scale: ~ 
-  max_scale: 2048
-  min_scale: 768
-  crop_size_h: 512
-  crop_size_w: 512
-  resize_ratio0: 0.5
-  resize_ratiof: 2.0
-  resize_add: 1
-  shear: ~
-  perspective: ~
-  flipud: ~
-  fliplr: 0.5
-  mosaic: ~
-  mixup: ~
-  copy_paste: ~
-  mixup_alpha: ~
-  cutmix_alpha: ~
-  mixup_switch_prob: ~
-  color_jitter:
-    brightness: 0.25
-    contrast: 0.25
-    saturation: 0.25
-    hue: 0.1
-    colorjitter_p: 0.5
\ No newline at end of file
+  transforms:
+    - 
+      name: resize
+      size: *img_size
+      interpolation: bilinear
diff --git a/config/augmentation/segmentation.yaml b/config/augmentation/segmentation.yaml
index 48dae02f..d878f5af 100644
--- a/config/augmentation/segmentation.yaml
+++ b/config/augmentation/segmentation.yaml
@@ -1,31 +1,17 @@
 augmentation:
   img_size: &img_size 512
-  hsv_h: ~
-  hsv_s: ~
-  hsv_v: ~
-  degrees: ~
-  translate: ~
-  scale: ~ 
-  max_scale: 1024
-  min_scale: *img_size
-  crop_size_h: *img_size
-  crop_size_w: *img_size
-  resize_ratio0: 1.0
-  resize_ratiof: 1.5
-  resize_add: 1
-  shear: ~
-  perspective: ~
-  flipud: ~
-  fliplr: 0.5
-  mosaic: ~
-  mixup: ~
-  copy_paste: ~
-  mixup_alpha: ~
-  cutmix_alpha: ~
-  mixup_switch_prob: ~
-  color_jitter:
-    brightness: 0.25
-    contrast: 0.25
-    saturation: 0.25
-    hue: 0.1
-    colorjitter_p: 0.5
\ No newline at end of file
+  transforms:
+    - 
+      name: randomresizedcrop
+      size: *img_size
+      interpolation: bilinear
+    -
+      name: randomhorizontalflip
+      p: 0.5
+    -
+      name: colorjitter
+      brightness: 0.25
+      contrast: 0.25
+      saturation: 0.25
+      hue: 0.1
+      p: 0.5
diff --git a/config/augmentation/template/common.yaml b/config/augmentation/template/common.yaml
index 48dae02f..881cb816 100644
--- a/config/augmentation/template/common.yaml
+++ b/config/augmentation/template/common.yaml
@@ -1,31 +1,27 @@
 augmentation:
-  img_size: &img_size 512
-  hsv_h: ~
-  hsv_s: ~
-  hsv_v: ~
-  degrees: ~
-  translate: ~
-  scale: ~ 
-  max_scale: 1024
-  min_scale: *img_size
-  crop_size_h: *img_size
-  crop_size_w: *img_size
-  resize_ratio0: 1.0
-  resize_ratiof: 1.5
-  resize_add: 1
-  shear: ~
-  perspective: ~
-  flipud: ~
-  fliplr: 0.5
-  mosaic: ~
-  mixup: ~
-  copy_paste: ~
-  mixup_alpha: ~
-  cutmix_alpha: ~
-  mixup_switch_prob: ~
-  color_jitter:
-    brightness: 0.25
-    contrast: 0.25
-    saturation: 0.25
-    hue: 0.1
-    colorjitter_p: 0.5
\ No newline at end of file
+  img_size: &img_size ~
+  transforms:
+    - 
+      name: randomresizedcrop
+      size: ~
+      interpolation: bilinear
+    -
+      name: randomhorizontalflip
+      p: ~
+    -
+      name: randomverticalflip
+      p: ~
+    -
+      name: colorjitter
+      brightness: ~
+      contrast: ~
+      saturation: ~
+      hue: ~
+      p: ~
+    -
+      name: resize
+      size: ~
+    -
+      name: pad
+      padding: ~
+  mix_transforms: ~
\ No newline at end of file
diff --git a/config/model/efficientformer/efficientformer-l1-classification.yaml b/config/model/efficientformer/efficientformer-l1-classification.yaml
index 681794cc..b7f51e53 100644
--- a/config/model/efficientformer/efficientformer-l1-classification.yaml
+++ b/config/model/efficientformer/efficientformer-l1-classification.yaml
@@ -1,5 +1,6 @@
 model:
   task: classification
+  name: efficientformer_l1
   checkpoint: ./weights/efficientformer/efficientformer_l1_1000d.pth
   fx_model_checkpoint: ~
   resume_optimizer_checkpoint: ~
@@ -8,29 +9,44 @@ model:
     full: ~ # auto
     backbone:
       name: efficientformer
-      num_blocks: [3, 2, 6, 4]
-      hidden_sizes: [48, 96, 224, 448]
-      num_attention_heads: 8
-      attention_hidden_size: 256  # attention_hidden_size_splitted * num_attention_heads
-      attention_dropout_prob: 0.
-      attention_ratio: 4
-      attention_bias_resolution: 16
-      pool_size: 3
-      intermediate_ratio: 4
-      hidden_dropout_prob: 0.
-      hidden_activation_type: 'gelu'
-      layer_norm_eps: 1e-5
-      drop_path_rate: 0.
-      use_layer_scale: True
-      layer_scale_init_value: 1e-5
-      downsamples: [True, True, True, True]
-      down_patch_size: 3
-      down_stride: 2
-      down_pad: 1
-      vit_num: 1
+      params:
+        num_attention_heads: 8
+        attention_hidden_size: 256  # attention_hidden_size_splitted * num_attention_heads
+        attention_dropout_prob: 0.
+        attention_ratio: 4
+        attention_bias_resolution: 16
+        pool_size: 3
+        intermediate_ratio: 4
+        hidden_dropout_prob: 0.
+        hidden_activation_type: 'gelu'
+        layer_norm_eps: 1e-5
+        drop_path_rate: 0.
+        use_layer_scale: True
+        layer_scale_init_value: 1e-5
+        down_patch_size: 3
+        down_stride: 2
+        down_pad: 1
+        vit_num: 1
+      stage_params:
+        - 
+          num_blocks: 3
+          hidden_sizes: 48
+          downsamples: True
+        - 
+          num_blocks: 2
+          hidden_sizes: 96
+          downsamples: True
+        - 
+          num_blocks: 6
+          hidden_sizes: 224
+          downsamples: True
+        - 
+          num_blocks: 4
+          hidden_sizes: 448
+          downsamples: True
     head:
       name: fc
   losses:
-    - criterion: label_smoothing_cross_entropy
-      smoothing: 0.1
+    - criterion: cross_entropy
+      label_smoothing: 0.1
       weight: ~
\ No newline at end of file
diff --git a/config/model/efficientformer/efficientformer-l1-detection.yaml b/config/model/efficientformer/efficientformer-l1-detection.yaml
index e81da42f..9a3b339f 100644
--- a/config/model/efficientformer/efficientformer-l1-detection.yaml
+++ b/config/model/efficientformer/efficientformer-l1-detection.yaml
@@ -1,5 +1,6 @@
 model:
   task: detection
+  name: efficientformer_l1
   checkpoint: ./weights/efficientformer/efficientformer_l1_1000d.pth
   fx_model_checkpoint: ~
   resume_optimizer_checkpoint: ~
@@ -8,26 +9,43 @@ model:
     full: ~ # auto
     backbone:
       name: efficientformer
-      num_blocks: [3, 2, 6, 4]
-      hidden_sizes: [48, 96, 224, 448]
-      num_attention_heads: 8
-      attention_hidden_size: 256  # attention_hidden_size_splitted * num_attention_heads
-      attention_dropout_prob: 0.
-      attention_ratio: 4
-      attention_bias_resolution: 16
-      pool_size: 3
-      intermediate_ratio: 4
-      hidden_dropout_prob: 0.
-      hidden_activation_type: 'gelu'
-      layer_norm_eps: 1e-5
-      drop_path_rate: 0.
-      use_layer_scale: True
-      layer_scale_init_value: 1e-5
-      downsamples: [True, True, True, True]
-      down_patch_size: 3
-      down_stride: 2
-      down_pad: 1
-      vit_num: 1
+      params:
+        num_attention_heads: 8
+        attention_hidden_size: 256  # attention_hidden_size_splitted * num_attention_heads
+        attention_dropout_prob: 0.
+        attention_ratio: 4
+        attention_bias_resolution: 16
+        pool_size: 3
+        intermediate_ratio: 4
+        hidden_dropout_prob: 0.
+        hidden_activation_type: 'gelu'
+        layer_norm_eps: 1e-5
+        drop_path_rate: 0.
+        use_layer_scale: True
+        layer_scale_init_value: 1e-5
+        down_patch_size: 3
+        down_stride: 2
+        down_pad: 1
+        vit_num: 1
+      stage_params:
+        - 
+          num_blocks: 3
+          hidden_sizes: 48
+          downsamples: True
+        - 
+          num_blocks: 2
+          hidden_sizes: 96
+          downsamples: True
+        - 
+          num_blocks: 6
+          hidden_sizes: 224
+          downsamples: True
+        - 
+          num_blocks: 4
+          hidden_sizes: 448
+          downsamples: True
+    neck:
+      name: fpn
     head:
       name: faster_rcnn
   losses:
diff --git a/config/model/efficientformer/efficientformer-l1-segmentation.yaml b/config/model/efficientformer/efficientformer-l1-segmentation.yaml
index 029abf54..b28718f9 100644
--- a/config/model/efficientformer/efficientformer-l1-segmentation.yaml
+++ b/config/model/efficientformer/efficientformer-l1-segmentation.yaml
@@ -1,5 +1,6 @@
 model:
   task: segmentation
+  name: efficientformer_l1
   checkpoint: ./weights/efficientformer/efficientformer_l1_1000d.pth
   fx_model_checkpoint: ~
   resume_optimizer_checkpoint: ~
@@ -8,26 +9,41 @@ model:
     full: ~ # auto
     backbone:
       name: efficientformer
-      num_blocks: [3, 2, 6, 4]
-      hidden_sizes: [48, 96, 224, 448]
-      num_attention_heads: 8
-      attention_hidden_size: 256  # attention_hidden_size_splitted * num_attention_heads
-      attention_dropout_prob: 0.
-      attention_ratio: 4
-      attention_bias_resolution: 16
-      pool_size: 3
-      intermediate_ratio: 4
-      hidden_dropout_prob: 0.
-      hidden_activation_type: 'gelu'
-      layer_norm_eps: 1e-5
-      drop_path_rate: 0.
-      use_layer_scale: True
-      layer_scale_init_value: 1e-5
-      downsamples: [True, True, True, True]
-      down_patch_size: 3
-      down_stride: 2
-      down_pad: 1
-      vit_num: 1
+      params:
+        num_attention_heads: 8
+        attention_hidden_size: 256  # attention_hidden_size_splitted * num_attention_heads
+        attention_dropout_prob: 0.
+        attention_ratio: 4
+        attention_bias_resolution: 16
+        pool_size: 3
+        intermediate_ratio: 4
+        hidden_dropout_prob: 0.
+        hidden_activation_type: 'gelu'
+        layer_norm_eps: 1e-5
+        drop_path_rate: 0.
+        use_layer_scale: True
+        layer_scale_init_value: 1e-5
+        down_patch_size: 3
+        down_stride: 2
+        down_pad: 1
+        vit_num: 1
+      stage_params:
+        - 
+          num_blocks: 3
+          hidden_sizes: 48
+          downsamples: True
+        - 
+          num_blocks: 2
+          hidden_sizes: 96
+          downsamples: True
+        - 
+          num_blocks: 6
+          hidden_sizes: 224
+          downsamples: True
+        - 
+          num_blocks: 4
+          hidden_sizes: 448
+          downsamples: True
     head:
       name: all_mlp_decoder
   losses:
diff --git a/config/model/mixnet/mixnet-l-classification.yaml b/config/model/mixnet/mixnet-l-classification.yaml
new file mode 100644
index 00000000..80aed0f4
--- /dev/null
+++ b/config/model/mixnet/mixnet-l-classification.yaml
@@ -0,0 +1,67 @@
+model:
+  task: classification
+  name: mixnet_l
+  checkpoint: ./weights/mixnet/mixnet_l.pth
+  fx_model_checkpoint: ~
+  resume_optimizer_checkpoint: ~
+  freeze_backbone: False
+  architecture:
+    full: ~ # auto
+    backbone:
+      name: mixnet
+      params:
+        stem_planes: 24
+        width_multi: 1.3
+        depth_multi: 1.0
+        dropout_rate: 0.
+      stage_params: 
+        -
+          expand_ratio: [1, 6, 3]
+          out_channels: [24, 32, 32]
+          num_blocks: [1, 1, 1]
+          kernel_sizes: [[3], [3, 5, 7], [3]]
+          exp_kernel_sizes: [[1], [1, 1], [1, 1]]
+          poi_kernel_sizes: [[1], [1, 1], [1, 1]]
+          stride: [1, 2, 1]
+          dilation: [1, 1, 1]
+          act_type: ["relu", "relu", "relu"]
+          se_reduction_ratio: [~, ~, ~]
+        -
+          expand_ratio: [6, 6]
+          out_channels: [40, 40]
+          num_blocks: [1, 3]
+          kernel_sizes: [[3, 5, 7, 9], [3, 5]]
+          exp_kernel_sizes: [[1], [1, 1]]
+          poi_kernel_sizes: [[1], [1, 1]]
+          stride: [2, 1]
+          dilation: [1, 1]
+          act_type: ["swish", "swish"]
+          se_reduction_ratio: [2, 2]
+        -
+          expand_ratio: [6, 6, 6, 3]
+          out_channels: [80, 80, 120, 120]
+          num_blocks: [1, 3, 1, 3]
+          kernel_sizes: [[3, 5, 7], [3, 5, 7, 9], [3], [3, 5, 7, 9]]
+          exp_kernel_sizes: [[1], [1, 1], [1], [1, 1]]
+          poi_kernel_sizes: [[1], [1, 1], [1], [1, 1]]
+          stride: [2, 1, 1, 1]
+          dilation: [1, 1, 1, 1]
+          act_type: ["swish", "swish", "swish", "swish"]
+          se_reduction_ratio: [4, 4, 2, 2]
+        -
+          expand_ratio: [6, 6]
+          out_channels: [200, 200]
+          num_blocks: [1, 3]
+          kernel_sizes: [[3, 5, 7, 9], [3, 5, 7, 9]]
+          exp_kernel_sizes: [[1], [1]]
+          poi_kernel_sizes: [[1], [1, 1]]
+          stride: [2, 1]
+          dilation: [1, 1]
+          act_type: ["swish", "swish"]
+          se_reduction_ratio: [2, 2]
+    head:
+      name: fc
+  losses:
+    - criterion: cross_entropy
+      label_smoothing: 0.1
+      weight: ~
\ No newline at end of file
diff --git a/config/model/mixnet/mixnet-l-segmentation.yaml b/config/model/mixnet/mixnet-l-segmentation.yaml
new file mode 100644
index 00000000..623e4675
--- /dev/null
+++ b/config/model/mixnet/mixnet-l-segmentation.yaml
@@ -0,0 +1,67 @@
+model:
+  task: segmentation
+  name: mixnet_l
+  checkpoint: ./weights/mixnet/mixnet_l.pth
+  fx_model_checkpoint: ~
+  resume_optimizer_checkpoint: ~
+  freeze_backbone: False
+  architecture:
+    full: ~ # auto
+    backbone:
+      name: mixnet
+      params:
+        stem_planes: 24
+        width_multi: 1.3
+        depth_multi: 1.0
+        dropout_rate: 0.
+      stage_params: 
+        -
+          expand_ratio: [1, 6, 3]
+          out_channels: [24, 32, 32]
+          num_blocks: [1, 1, 1]
+          kernel_sizes: [[3], [3, 5, 7], [3]]
+          exp_kernel_sizes: [[1], [1, 1], [1, 1]]
+          poi_kernel_sizes: [[1], [1, 1], [1, 1]]
+          stride: [1, 2, 1]
+          dilation: [1, 1, 1]
+          act_type: ["relu", "relu", "relu"]
+          se_reduction_ratio: [~, ~, ~]
+        -
+          expand_ratio: [6, 6]
+          out_channels: [40, 40]
+          num_blocks: [1, 3]
+          kernel_sizes: [[3, 5, 7, 9], [3, 5]]
+          exp_kernel_sizes: [[1], [1, 1]]
+          poi_kernel_sizes: [[1], [1, 1]]
+          stride: [2, 1]
+          dilation: [1, 1]
+          act_type: ["swish", "swish"]
+          se_reduction_ratio: [2, 2]
+        -
+          expand_ratio: [6, 6, 6, 3]
+          out_channels: [80, 80, 120, 120]
+          num_blocks: [1, 3, 1, 3]
+          kernel_sizes: [[3, 5, 7], [3, 5, 7, 9], [3], [3, 5, 7, 9]]
+          exp_kernel_sizes: [[1], [1, 1], [1], [1, 1]]
+          poi_kernel_sizes: [[1], [1, 1], [1], [1, 1]]
+          stride: [2, 1, 1, 1]
+          dilation: [1, 1, 1, 1]
+          act_type: ["swish", "swish", "swish", "swish"]
+          se_reduction_ratio: [4, 4, 2, 2]
+        -
+          expand_ratio: [6, 6]
+          out_channels: [200, 200]
+          num_blocks: [1, 3]
+          kernel_sizes: [[3, 5, 7, 9], [3, 5, 7, 9]]
+          exp_kernel_sizes: [[1], [1]]
+          poi_kernel_sizes: [[1], [1, 1]]
+          stride: [2, 1]
+          dilation: [1, 1]
+          act_type: ["swish", "swish"]
+          se_reduction_ratio: [2, 2]
+    head:
+      name: all_mlp_decoder
+  losses:
+    - criterion: cross_entropy
+      weight: ~
+      ignore_index: 255
\ No newline at end of file
diff --git a/config/model/mixnet/mixnet-m-classification.yaml b/config/model/mixnet/mixnet-m-classification.yaml
new file mode 100644
index 00000000..c41088ef
--- /dev/null
+++ b/config/model/mixnet/mixnet-m-classification.yaml
@@ -0,0 +1,67 @@
+model:
+  task: classification
+  name: mixnet_m
+  checkpoint: ./weights/mixnet/mixnet_m.pth
+  fx_model_checkpoint: ~
+  resume_optimizer_checkpoint: ~
+  freeze_backbone: False
+  architecture:
+    full: ~ # auto
+    backbone:
+      name: mixnet
+      params:
+        stem_planes: 24
+        width_multi: 1.0
+        depth_multi: 1.0
+        dropout_rate: 0.
+      stage_params: 
+        -
+          expand_ratio: [1, 6, 3]
+          out_channels: [24, 32, 32]
+          num_blocks: [1, 1, 1]
+          kernel_sizes: [[3], [3, 5, 7], [3]]
+          exp_kernel_sizes: [[1], [1, 1], [1, 1]]
+          poi_kernel_sizes: [[1], [1, 1], [1, 1]]
+          stride: [1, 2, 1]
+          dilation: [1, 1, 1]
+          act_type: ["relu", "relu", "relu"]
+          se_reduction_ratio: [~, ~, ~]
+        -
+          expand_ratio: [6, 6]
+          out_channels: [40, 40]
+          num_blocks: [1, 3]
+          kernel_sizes: [[3, 5, 7, 9], [3, 5]]
+          exp_kernel_sizes: [[1], [1, 1]]
+          poi_kernel_sizes: [[1], [1, 1]]
+          stride: [2, 1]
+          dilation: [1, 1]
+          act_type: ["swish", "swish"]
+          se_reduction_ratio: [2, 2]
+        -
+          expand_ratio: [6, 6, 6, 3]
+          out_channels: [80, 80, 120, 120]
+          num_blocks: [1, 3, 1, 3]
+          kernel_sizes: [[3, 5, 7], [3, 5, 7, 9], [3], [3, 5, 7, 9]]
+          exp_kernel_sizes: [[1], [1, 1], [1], [1, 1]]
+          poi_kernel_sizes: [[1], [1, 1], [1], [1, 1]]
+          stride: [2, 1, 1, 1]
+          dilation: [1, 1, 1, 1]
+          act_type: ["swish", "swish", "swish", "swish"]
+          se_reduction_ratio: [4, 4, 2, 2]
+        -
+          expand_ratio: [6, 6]
+          out_channels: [200, 200]
+          num_blocks: [1, 3]
+          kernel_sizes: [[3, 5, 7, 9], [3, 5, 7, 9]]
+          exp_kernel_sizes: [[1], [1]]
+          poi_kernel_sizes: [[1], [1, 1]]
+          stride: [2, 1]
+          dilation: [1, 1]
+          act_type: ["swish", "swish"]
+          se_reduction_ratio: [2, 2]
+    head:
+      name: fc
+  losses:
+    - criterion: cross_entropy
+      label_smoothing: 0.1
+      weight: ~
\ No newline at end of file
diff --git a/config/model/mixnet/mixnet-m-segmentation.yaml b/config/model/mixnet/mixnet-m-segmentation.yaml
new file mode 100644
index 00000000..affd2b9a
--- /dev/null
+++ b/config/model/mixnet/mixnet-m-segmentation.yaml
@@ -0,0 +1,67 @@
+model:
+  task: segmentation
+  name: mixnet_m
+  checkpoint: ./weights/mixnet/mixnet_m.pth
+  fx_model_checkpoint: ~
+  resume_optimizer_checkpoint: ~
+  freeze_backbone: False
+  architecture:
+    full: ~ # auto
+    backbone:
+      name: mixnet
+      params:
+        stem_planes: 24
+        width_multi: 1.0
+        depth_multi: 1.0
+        dropout_rate: 0.
+      stage_params: 
+        -
+          expand_ratio: [1, 6, 3]
+          out_channels: [24, 32, 32]
+          num_blocks: [1, 1, 1]
+          kernel_sizes: [[3], [3, 5, 7], [3]]
+          exp_kernel_sizes: [[1], [1, 1], [1, 1]]
+          poi_kernel_sizes: [[1], [1, 1], [1, 1]]
+          stride: [1, 2, 1]
+          dilation: [1, 1, 1]
+          act_type: ["relu", "relu", "relu"]
+          se_reduction_ratio: [~, ~, ~]
+        -
+          expand_ratio: [6, 6]
+          out_channels: [40, 40]
+          num_blocks: [1, 3]
+          kernel_sizes: [[3, 5, 7, 9], [3, 5]]
+          exp_kernel_sizes: [[1], [1, 1]]
+          poi_kernel_sizes: [[1], [1, 1]]
+          stride: [2, 1]
+          dilation: [1, 1]
+          act_type: ["swish", "swish"]
+          se_reduction_ratio: [2, 2]
+        -
+          expand_ratio: [6, 6, 6, 3]
+          out_channels: [80, 80, 120, 120]
+          num_blocks: [1, 3, 1, 3]
+          kernel_sizes: [[3, 5, 7], [3, 5, 7, 9], [3], [3, 5, 7, 9]]
+          exp_kernel_sizes: [[1], [1, 1], [1], [1, 1]]
+          poi_kernel_sizes: [[1], [1, 1], [1], [1, 1]]
+          stride: [2, 1, 1, 1]
+          dilation: [1, 1, 1, 1]
+          act_type: ["swish", "swish", "swish", "swish"]
+          se_reduction_ratio: [4, 4, 2, 2]
+        -
+          expand_ratio: [6, 6]
+          out_channels: [200, 200]
+          num_blocks: [1, 3]
+          kernel_sizes: [[3, 5, 7, 9], [3, 5, 7, 9]]
+          exp_kernel_sizes: [[1], [1]]
+          poi_kernel_sizes: [[1], [1, 1]]
+          stride: [2, 1]
+          dilation: [1, 1]
+          act_type: ["swish", "swish"]
+          se_reduction_ratio: [2, 2]
+    head:
+      name: all_mlp_decoder
+  losses:
+    - criterion: cross_entropy
+      weight: ~
+      ignore_index: 255
\ No newline at end of file
diff --git a/config/model/mixnet/mixnet-s-classification.yaml b/config/model/mixnet/mixnet-s-classification.yaml
new file mode 100644
index 00000000..e4c54cc0
--- /dev/null
+++ b/config/model/mixnet/mixnet-s-classification.yaml
@@ -0,0 +1,67 @@
+model:
+  task: classification
+  name: mixnet_s
+  checkpoint: ./weights/mixnet/mixnet_s.pth
+  fx_model_checkpoint: ~
+  resume_optimizer_checkpoint: ~
+  freeze_backbone: False
+  architecture:
+    full: ~ # auto
+    backbone:
+      name: mixnet
+      params:
+        stem_planes: 16
+        width_multi: 1.0
+        depth_multi: 1.0
+        dropout_rate: 0.
+      stage_params: 
+        -
+          expand_ratio: [1, 6, 3]
+          out_channels: [16, 24, 24]
+          num_blocks: [1, 1, 1]
+          kernel_sizes: [[3], [3], [3]]
+          exp_kernel_sizes: [[1], [1, 1], [1, 1]]
+          poi_kernel_sizes: [[1], [1, 1], [1, 1]]
+          stride: [1, 2, 1]
+          dilation: [1, 1, 1]
+          act_type: ["relu", "relu", "relu"]
+          se_reduction_ratio: [~, ~, ~]
+        -
+          expand_ratio: [6, 6]
+          out_channels: [40, 40]
+          num_blocks: [1, 3]
+          kernel_sizes: [[3, 5, 7], [3, 5]]
+          exp_kernel_sizes: [[1], [1, 1]]
+          poi_kernel_sizes: [[1], [1, 1]]
+          stride: [2, 1]
+          dilation: [1, 1]
+          act_type: ["swish", "swish"]
+          se_reduction_ratio: [2, 2]
+        -
+          expand_ratio: [6, 6, 6, 3]
+          out_channels: [80, 80, 120, 120]
+          num_blocks: [1, 2, 1, 2]
+          kernel_sizes: [[3, 5, 7], [3, 5], [3, 5, 7], [3, 5, 7, 9]]
+          exp_kernel_sizes: [[1], [1], [1, 1], [1, 1]]
+          poi_kernel_sizes: [[1, 1], [1, 1], [1, 1], [1, 1]]
+          stride: [2, 1, 1, 1]
+          dilation: [1, 1, 1, 1]
+          act_type: ["swish", "swish", "swish", "swish"]
+          se_reduction_ratio: [4, 4, 2, 2]
+        -
+          expand_ratio: [6, 6]
+          out_channels: [200, 200]
+          num_blocks: [1, 2]
+          kernel_sizes: [[3, 5, 7, 9, 11], [3, 5, 7, 9]]
+          exp_kernel_sizes: [[1], [1]]
+          poi_kernel_sizes: [[1], [1, 1]]
+          stride: [2, 1]
+          dilation: [1, 1]
+          act_type: ["swish", "swish"]
+          se_reduction_ratio: [2, 2]
+    head:
+      name: fc
+  losses:
+    - criterion: cross_entropy
+      label_smoothing: 0.1
+      weight: ~
\ No newline at end of file
diff --git a/config/model/mixnet/mixnet-s-segmentation.yaml b/config/model/mixnet/mixnet-s-segmentation.yaml
new file mode 100644
index 00000000..dd8cdeb9
--- /dev/null
+++ b/config/model/mixnet/mixnet-s-segmentation.yaml
@@ -0,0 +1,67 @@
+model:
+  task: segmentation
+  name: mixnet_s
+  checkpoint: ./weights/mixnet/mixnet_s.pth
+  fx_model_checkpoint: ~
+  resume_optimizer_checkpoint: ~
+  freeze_backbone: False
+  architecture:
+    full: ~ # auto
+    backbone:
+      name: mixnet
+      params:
+        stem_planes: 16
+        width_multi: 1.0
+        depth_multi: 1.0
+        dropout_rate: 0.
+      stage_params: 
+        -
+          expand_ratio: [1, 6, 3]
+          out_channels: [16, 24, 24]
+          num_blocks: [1, 1, 1]
+          kernel_sizes: [[3], [3], [3]]
+          exp_kernel_sizes: [[1], [1, 1], [1, 1]]
+          poi_kernel_sizes: [[1], [1, 1], [1, 1]]
+          stride: [1, 2, 1]
+          dilation: [1, 1, 1]
+          act_type: ["relu", "relu", "relu"]
+          se_reduction_ratio: [~, ~, ~]
+        -
+          expand_ratio: [6, 6]
+          out_channels: [40, 40]
+          num_blocks: [1, 3]
+          kernel_sizes: [[3, 5, 7], [3, 5]]
+          exp_kernel_sizes: [[1], [1, 1]]
+          poi_kernel_sizes: [[1], [1, 1]]
+          stride: [2, 1]
+          dilation: [1, 1]
+          act_type: ["swish", "swish"]
+          se_reduction_ratio: [2, 2]
+        -
+          expand_ratio: [6, 6, 6, 3]
+          out_channels: [80, 80, 120, 120]
+          num_blocks: [1, 2, 1, 2]
+          kernel_sizes: [[3, 5, 7], [3, 5], [3, 5, 7], [3, 5, 7, 9]]
+          exp_kernel_sizes: [[1], [1], [1, 1], [1, 1]]
+          poi_kernel_sizes: [[1, 1], [1, 1], [1, 1], [1, 1]]
+          stride: [2, 1, 1, 1]
+          dilation: [1, 1, 1, 1]
+          act_type: ["swish", "swish", "swish", "swish"]
+          se_reduction_ratio: [4, 4, 2, 2]
+        -
+          expand_ratio: [6, 6]
+          out_channels: [200, 200]
+          num_blocks: [1, 2]
+          kernel_sizes: [[3, 5, 7, 9, 11], [3, 5, 7, 9]]
+          exp_kernel_sizes: [[1], [1]]
+          poi_kernel_sizes: [[1], [1, 1]]
+          stride: [2, 1]
+          dilation: [1, 1]
+          act_type: ["swish", "swish"]
+          se_reduction_ratio: [2, 2]
+    head:
+      name: all_mlp_decoder
+  losses:
+    - criterion: cross_entropy
+      weight: ~
+      ignore_index: 255
\ No newline at end of file
diff --git a/config/model/mobilenetv3/mobilenetv3-small-classification.yaml b/config/model/mobilenetv3/mobilenetv3-small-classification.yaml
index 27398828..6ddd1ff2 100644
--- a/config/model/mobilenetv3/mobilenetv3-small-classification.yaml
+++ b/config/model/mobilenetv3/mobilenetv3-small-classification.yaml
@@ -1,5 +1,6 @@
 model:
   task: classification
+  name: mobilenet_v3_small
   checkpoint: ./weights/mobilenetv3/mobilenet_v3_small.pth
   fx_model_checkpoint: ~
   resume_optimizer_checkpoint: ~
@@ -7,26 +8,48 @@ model:
   architecture:
     full: ~ # auto
     backbone:
-      name: mobilenetv3_small
-      block_info: # [in_channels, kernel, expended_channels, out_channels, use_se, activation, stride, dilation]
+      name: mobilenetv3
+      params: ~
+      stage_params:
         -
-          - [16, 3, 16, 16, True, "relu", 2, 1]
+          in_channels: [16]
+          kernel: [3]
+          expanded_channels: [16]
+          out_channels: [16]
+          use_se: [True]
+          activation: ["relu"]
+          stride: [2]
+          dilation: [1]
         -
-          - [16, 3, 72, 24, False, "relu", 2, 1]
-          - [24, 3, 88, 24, False, "relu", 1, 1]
+          in_channels: [16, 24]
+          kernel: [3, 3]
+          expanded_channels: [72, 88]
+          out_channels: [24, 24]
+          use_se: [False, False]
+          activation: ["relu", "relu"]
+          stride: [2, 1]
+          dilation: [1, 1]
         -
-          - [24, 5, 96, 40, True, "hard_swish", 2, 1]
-          - [40, 5, 240, 40, True, "hard_swish", 1, 1]
-          - [40, 5, 240, 40, True, "hard_swish", 1, 1]
-          - [40, 5, 120, 48, True, "hard_swish", 1, 1]
-          - [48, 5, 144, 48, True, "hard_swish", 1, 1]
+          in_channels: [24, 40, 40, 40, 48]
+          kernel: [5, 5, 5, 5, 5]
+          expanded_channels: [96, 240, 240, 120, 144]
+          out_channels: [40, 40, 40, 48, 48]
+          use_se: [True, True, True, True, True]
+          activation: ["hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish"]
+          stride: [2, 1, 1, 1, 1]
+          dilation: [1, 1, 1, 1, 1]
         -
-          - [48, 5, 288, 96, True, "hard_swish", 2, 1]
-          - [96, 5, 576, 96, True, "hard_swish", 1, 1]
-          - [96, 5, 576, 96, True, "hard_swish", 1, 1]
+          in_channels: [48, 96, 96]
+          kernel: [5, 5, 5]
+          expanded_channels: [288, 576, 576]
+          out_channels: [96, 96, 96]
+          use_se: [True, True, True]
+          activation: ["hard_swish", "hard_swish", "hard_swish"]
+          stride: [2, 1, 1]
+          dilation: [1, 1, 1]
     head:
       name: fc
   losses:
-    - criterion: label_smoothing_cross_entropy
-      smoothing: 0.1
+    - criterion: cross_entropy
+      label_smoothing: 0.1
       weight: ~
\ No newline at end of file
diff --git a/config/model/mobilenetv3/mobilenetv3-small-segmentation.yaml b/config/model/mobilenetv3/mobilenetv3-small-segmentation.yaml
index 58aceec8..6c8438fc 100644
--- a/config/model/mobilenetv3/mobilenetv3-small-segmentation.yaml
+++ b/config/model/mobilenetv3/mobilenetv3-small-segmentation.yaml
@@ -1,5 +1,6 @@
 model:
   task: segmentation
+  name: mobilenet_v3_small
   checkpoint: ./weights/mobilenetv3/mobilenet_v3_small.pth
   fx_model_checkpoint: ~
   resume_optimizer_checkpoint: ~
@@ -7,23 +8,45 @@ model:
   architecture:
     full: ~ # auto
     backbone:
-      name: mobilenetv3_small
-      block_info: # [in_channels, kernel, expended_channels, out_channels, use_se, activation, stride, dilation]
+      name: mobilenetv3
+      params: ~
+      stage_params:
         -
-          - [16, 3, 16, 16, True, "relu", 2, 1]
+          in_channels: [16]
+          kernel: [3]
+          expanded_channels: [16]
+          out_channels: [16]
+          use_se: [True]
+          activation: ["relu"]
+          stride: [2]
+          dilation: [1]
         -
-          - [16, 3, 72, 24, False, "relu", 2, 1]
-          - [24, 3, 88, 24, False, "relu", 1, 1]
+          in_channels: [16, 24]
+          kernel: [3, 3]
+          expanded_channels: [72, 88]
+          out_channels: [24, 24]
+          use_se: [False, False]
+          activation: ["relu", "relu"]
+          stride: [2, 1]
+          dilation: [1, 1]
         -
-          - [24, 5, 96, 40, True, "hard_swish", 2, 1]
-          - [40, 5, 240, 40, True, "hard_swish", 1, 1]
-          - [40, 5, 240, 40, True, "hard_swish", 1, 1]
-          - [40, 5, 120, 48, True, "hard_swish", 1, 1]
-          - [48, 5, 144, 48, True, "hard_swish", 1, 1]
+          in_channels: [24, 40, 40, 40, 48]
+          kernel: [5, 5, 5, 5, 5]
+          expanded_channels: [96, 240, 240, 120, 144]
+          out_channels: [40, 40, 40, 48, 48]
+          use_se: [True, True, True, True, True]
+          activation: ["hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish"]
+          stride: [2, 1, 1, 1, 1]
+          dilation: [1, 1, 1, 1, 1]
         -
-          - [48, 5, 288, 96, True, "hard_swish", 2, 1]
-          - [96, 5, 576, 96, True, "hard_swish", 1, 1]
-          - [96, 5, 576, 96, True, "hard_swish", 1, 1]
+          in_channels: [48, 96, 96]
+          kernel: [5, 5, 5]
+          expanded_channels: [288, 576, 576]
+          out_channels: [96, 96, 96]
+          use_se: [True, True, True]
+          activation: ["hard_swish", "hard_swish", "hard_swish"]
+          stride: [2, 1, 1]
+          dilation: [1, 1, 1]
     head:
       name: all_mlp_decoder
   losses:
diff --git a/config/model/mobilevit/mobilevit-s-classification.yaml b/config/model/mobilevit/mobilevit-s-classification.yaml
index 17bcc1cf..6e21b48c 100644
--- a/config/model/mobilevit/mobilevit-s-classification.yaml
+++ b/config/model/mobilevit/mobilevit-s-classification.yaml
@@ -1,5 +1,6 @@
 model:
   task: classification
+  name: mobilevit_s
   checkpoint: ./weights/mobilevit/mobilevit_s.pth
   fx_model_checkpoint: ~
   resume_optimizer_checkpoint: ~
@@ -8,27 +9,70 @@ model:
     full: ~ # auto
     backbone:
       name: mobilevit
-      out_channels: [32, 64, 96, 128, 160]
-      block_type: ['mv2', 'mv2', 'mobilevit', 'mobilevit', 'mobilevit']
-      num_blocks: [1, 3, None, None, None]
-      stride: [1, 2, 2, 2, 2]
-      hidden_size: [None, None, 144, 192, 240]
-      intermediate_size: [None, None, 288, 384, 480]
-      num_transformer_blocks: [None, None, 2, 4, 3]
-      dilate: [None, None, False, False, False]
-      expand_ratio: [4, 4, 4, 4, 4]  # [mv2_exp_mult] * 4
-      patch_embedding_out_channels: 16
-      local_kernel_size: 3
-      patch_size: 2
-      num_attention_heads: 4  # num_heads
-      attention_dropout_prob: 0.1
-      hidden_dropout_prob: 0.0
-      exp_factor: 4
-      layer_norm_eps: 1e-5
-      use_fusion_layer: True
+      params:
+        patch_embedding_out_channels: 16
+        local_kernel_size: 3
+        patch_size: 2
+        num_attention_heads: 4  # num_heads
+        attention_dropout_prob: 0.1
+        hidden_dropout_prob: 0.0
+        exp_factor: 4
+        layer_norm_eps: 1e-5
+        use_fusion_layer: True
+      stage_params:
+        -
+          out_channels: 32
+          block_type: 'mv2'
+          num_blocks: 1
+          stride: 1
+          hidden_size: None
+          intermediate_size: None
+          num_transformer_blocks: None
+          dilate: None
+          expand_ratio: 4  # [mv2_exp_mult] * 4
+        -
+          out_channels: 64
+          block_type: 'mv2'
+          num_blocks: 3
+          stride: 2
+          hidden_size: None
+          intermediate_size: None
+          num_transformer_blocks: None
+          dilate: None
+          expand_ratio: 4  # [mv2_exp_mult] * 4
+        -
+          out_channels: 96
+          block_type: 'mobilevit'
+          num_blocks: None
+          stride: 2
+          hidden_size: 144
+          intermediate_size: 288
+          num_transformer_blocks: 2
+          dilate: False
+          expand_ratio: 4  # [mv2_exp_mult] * 4
+        -
+          out_channels: 128
+          block_type: 'mobilevit'
+          num_blocks: None
+          stride: 2
+          hidden_size: 192
+          intermediate_size: 384
+          num_transformer_blocks: 4
+          dilate: False
+          expand_ratio: 4  # [mv2_exp_mult] * 4
+        -
+          out_channels: 160
+          block_type: 'mobilevit'
+          num_blocks: None
+          stride: 2
+          hidden_size: 240
+          intermediate_size: 480
+          num_transformer_blocks: 3
+          dilate: False
+          expand_ratio: 4  # [mv2_exp_mult] * 4
     head:
       name: fc
   losses:
-    - criterion: label_smoothing_cross_entropy
-      smoothing: 0.1
+    - criterion: cross_entropy
+      label_smoothing: 0.1
       weight: ~
\ No newline at end of file
diff --git a/config/model/pidnet/pidnet-s-segmentation.yaml b/config/model/pidnet/pidnet-s-segmentation.yaml
index 52223fcf..2cbaf179 100644
--- a/config/model/pidnet/pidnet-s-segmentation.yaml
+++ b/config/model/pidnet/pidnet-s-segmentation.yaml
@@ -1,5 +1,6 @@
 model:
   task: segmentation
+  name: pidnet_s
   checkpoint: ./weights/pidnet/pidnet_s.pth
   fx_model_checkpoint: ~
   resume_optimizer_checkpoint: ~
diff --git a/config/model/resnet/resnet50-classification.yaml b/config/model/resnet/resnet50-classification.yaml
index 10c9bc99..a781931f 100644
--- a/config/model/resnet/resnet50-classification.yaml
+++ b/config/model/resnet/resnet50-classification.yaml
@@ -1,5 +1,6 @@
 model:
   task: classification
+  name: resnet50
   checkpoint: ./weights/resnet/resnet50.pth
   fx_model_checkpoint: ~
   resume_optimizer_checkpoint: ~
@@ -7,12 +8,33 @@ model:
   architecture:
     full: ~ # auto
     backbone:
-      name: resnet50
-      block: bottleneck
-      layers: [3, 4, 6, 3]
+      name: resnet
+      params:
+        block: bottleneck
+        norm_layer: batch_norm
+        groups: 1
+        width_per_group: 64
+        zero_init_residual: False
+        expansion: ~
+      stage_params:
+        - 
+          plane: 64
+          layers: 3
+        - 
+          plane: 128
+          layers: 4
+          replace_stride_with_dilation: False
+        - 
+          plane: 256
+          layers: 6
+          replace_stride_with_dilation: False
+        - 
+          plane: 512
+          layers: 3
+          replace_stride_with_dilation: False
     head:
       name: fc
   losses:
-    - criterion: label_smoothing_cross_entropy
-      smoothing: 0.1
+    - criterion: cross_entropy
+      label_smoothing: 0.1
       weight: ~
\ No newline at end of file
diff --git a/config/model/resnet/resnet50-segmentation.yaml b/config/model/resnet/resnet50-segmentation.yaml
index 3f83f708..00212984 100644
--- a/config/model/resnet/resnet50-segmentation.yaml
+++ b/config/model/resnet/resnet50-segmentation.yaml
@@ -1,5 +1,6 @@
 model:
   task: segmentation
+  name: resnet50
   checkpoint: ./weights/resnet/resnet50.pth
   fx_model_checkpoint: ~
   resume_optimizer_checkpoint: ~
@@ -8,9 +9,30 @@ model:
     full:
       name: ~ # auto
     backbone:
-      name: resnet50
-      block: bottleneck
-      layers: [3, 4, 6, 3]
+      name: resnet
+      params:
+        block: bottleneck
+        norm_layer: batch_norm
+        groups: 1
+        width_per_group: 64
+        zero_init_residual: False
+        expansion: ~
+      stage_params:
+        - 
+          plane: 64
+          layers: 3
+        - 
+          plane: 128
+          layers: 4
+          replace_stride_with_dilation: False
+        - 
+          plane: 256
+          layers: 6
+          replace_stride_with_dilation: False
+        - 
+          plane: 512
+          layers: 3
+          replace_stride_with_dilation: False
     head:
       name: all_mlp_decoder
   losses:
diff --git a/config/model/segformer/segformer-classification.yaml b/config/model/segformer/segformer-classification.yaml
index e669de24..72eea484 100644
--- a/config/model/segformer/segformer-classification.yaml
+++ b/config/model/segformer/segformer-classification.yaml
@@ -1,5 +1,6 @@
 model:
   task: classification
+  name: segformer
   checkpoint: ./weights/segformer/segformer.pth
   fx_model_checkpoint: ~
   resume_optimizer_checkpoint: ~
@@ -8,21 +9,44 @@ model:
     full: ~ # auto
     backbone:
       name: segformer
-      num_modules: 4  # `num_encoder_blocks` in original
-      num_blocks: [2, 2, 2, 2]  # `depth` in original
-      sr_ratios: [8, 4, 2, 1]
-      hidden_sizes: [32, 64, 160, 256]
-      embedding_patch_sizes: [7, 3, 3, 3]
-      embedding_strides: [4, 2, 2, 2]
-      num_attention_heads: [1, 2, 5, 8]
-      intermediate_ratio: 4
-      hidden_activation_type: "gelu"
-      hidden_dropout_prob: 0.0
-      attention_dropout_prob: 0.0
-      layer_norm_eps: 1e-5
+      params:
+        intermediate_ratio: 4
+        hidden_activation_type: "gelu"
+        hidden_dropout_prob: 0.0
+        attention_dropout_prob: 0.0
+        layer_norm_eps: 1e-5
+      stage_params:
+        -
+          num_blocks: 2
+          sr_ratios: 8
+          hidden_sizes: 32
+          embedding_patch_sizes: 7
+          embedding_strides: 4
+          num_attention_heads: 1
+        -
+          num_blocks: 2
+          sr_ratios: 4
+          hidden_sizes: 64
+          embedding_patch_sizes: 3
+          embedding_strides: 2
+          num_attention_heads: 2
+        -
+          num_blocks: 2
+          sr_ratios: 2
+          hidden_sizes: 160
+          embedding_patch_sizes: 3
+          embedding_strides: 2
+          num_attention_heads: 5
+        -
+          num_blocks: 2
+          sr_ratios: 1
+          hidden_sizes: 256
+          embedding_patch_sizes: 3
+          embedding_strides: 2
+          num_attention_heads: 8
     head:
       name: fc
   losses:
-    - criterion: label_smoothing_cross_entropy
-      smoothing: 0.1
+    - criterion: cross_entropy
+      label_smoothing: 0.1
       weight: ~
\ No newline at end of file
diff --git a/config/model/segformer/segformer-segmentation.yaml b/config/model/segformer/segformer-segmentation.yaml
index bb990dfc..589d31ac 100644
--- a/config/model/segformer/segformer-segmentation.yaml
+++ b/config/model/segformer/segformer-segmentation.yaml
@@ -1,5 +1,6 @@
 model:
   task: segmentation
+  name: segformer
   checkpoint: ./weights/segformer/segformer.pth
   fx_model_checkpoint: ~
   resume_optimizer_checkpoint: ~
@@ -8,18 +9,41 @@ model:
     full: ~ # auto
     backbone:
       name: segformer
-      num_modules: 4  # `num_encoder_blocks` in original
-      num_blocks: [2, 2, 2, 2]  # `depth` in original
-      sr_ratios: [8, 4, 2, 1]
-      hidden_sizes: [32, 64, 160, 256]
-      embedding_patch_sizes: [7, 3, 3, 3]
-      embedding_strides: [4, 2, 2, 2]
-      num_attention_heads: [1, 2, 5, 8]
-      intermediate_ratio: 4
-      hidden_activation_type: "gelu"
-      hidden_dropout_prob: 0.0
-      attention_dropout_prob: 0.0
-      layer_norm_eps: 1e-5
+      params:
+        intermediate_ratio: 4
+        hidden_activation_type: "gelu"
+        hidden_dropout_prob: 0.0
+        attention_dropout_prob: 0.0
+        layer_norm_eps: 1e-5
+      stage_params:
+        -
+          num_blocks: 2
+          sr_ratios: 8
+          hidden_sizes: 32
+          embedding_patch_sizes: 7
+          embedding_strides: 4
+          num_attention_heads: 1
+        -
+          num_blocks: 2
+          sr_ratios: 4
+          hidden_sizes: 64
+          embedding_patch_sizes: 3
+          embedding_strides: 2
+          num_attention_heads: 2
+        -
+          num_blocks: 2
+          sr_ratios: 2
+          hidden_sizes: 160
+          embedding_patch_sizes: 3
+          embedding_strides: 2
+          num_attention_heads: 5
+        -
+          num_blocks: 2
+          sr_ratios: 1
+          hidden_sizes: 256
+          embedding_patch_sizes: 3
+          embedding_strides: 2
+          num_attention_heads: 8
     head:
       name: all_mlp_decoder
   losses:
diff --git a/config/model/vit/vit-classification.yaml b/config/model/vit/vit-classification.yaml
index 2fa9a0a8..5b0e063f 100644
--- a/config/model/vit/vit-classification.yaml
+++ b/config/model/vit/vit-classification.yaml
@@ -1,5 +1,6 @@
 model:
   task: classification
+  name: vit_tiny
   checkpoint: ./weights/vit/vit-tiny.pth
   fx_model_checkpoint: ~
   resume_optimizer_checkpoint: ~
@@ -8,16 +9,21 @@ model:
     full: ~ # auto
     backbone:
       name: vit
-      patch_size: 16
-      hidden_size: 192
-      num_blocks: 12
-      num_attention_heads: 3
-      attention_dropout_prob: 0.0
-      intermediate_size: 768  # hidden_size * 4
-      hidden_dropout_prob: 0.1
+      params:
+        patch_size: 16
+        hidden_size: 192
+        num_blocks: 12
+        num_attention_heads: 3
+        attention_dropout_prob: 0.0
+        intermediate_size: 768  # hidden_size * 4
+        hidden_dropout_prob: 0.1
+        layer_norm_eps: 1e-6
+        use_cls_token: True
+        vocab_size: 1000
+      stage_params: ~
     head:
       name: fc
   losses:
-    - criterion: label_smoothing_cross_entropy
-      smoothing: 0.1
+    - criterion: cross_entropy
+      label_smoothing: 0.1
       weight: ~
\ No newline at end of file
diff --git a/config/model/yolox/yolox-detection.yaml b/config/model/yolox/yolox-detection.yaml
index a5502fba..67137cfa 100644
--- a/config/model/yolox/yolox-detection.yaml
+++ b/config/model/yolox/yolox-detection.yaml
@@ -1,5 +1,6 @@
 model:
   task: detection
+  name: yolox_s
   checkpoint: ./weights/yolox/yolox_s.pth
   fx_model_checkpoint: ~
   resume_optimizer_checkpoint: ~
@@ -8,10 +9,15 @@ model:
     full: ~ # auto
     backbone:
       name: cspdarknet
-      dep_mul: 0.33
-      wid_mul: 0.5
+      params:
+        dep_mul: 0.33
+        wid_mul: 0.5
+        act_type: "silu"
+      stage_params: ~
+    neck:
+      name: pafpn
     head:
-      name: yolo_head
+      name: yolox_head
   losses:
     - criterion: yolox_loss
       weight: ~
\ No newline at end of file
diff --git a/demo/gradio_augmentation.py b/demo/gradio_augmentation.py
index e885dab7..5c7cc71b 100644
--- a/demo/gradio_augmentation.py
+++ b/demo/gradio_augmentation.py
@@ -25,7 +25,7 @@ def summary_transform(phase, task, model_name, yaml_str):
     try:
         conf = OmegaConf.create(yaml_str)
         is_training = (phase == 'train')
-        transform = CREATE_TRANSFORM[task](model_name, is_training=is_training)
+        transform = CREATE_TRANSFORM(model_name, is_training=is_training)
         transform_composed = transform(conf.augmentation)
         return str(transform_composed)
     except Exception as e:
@@ -37,7 +37,7 @@ def get_augmented_images(phase, task, model_name, yaml_str, test_image,
     try:
         conf = OmegaConf.create(yaml_str)
         is_training = (phase == 'train')
-        transform = CREATE_TRANSFORM[task](model_name, is_training=is_training)
+        transform = CREATE_TRANSFORM(model_name, is_training=is_training)
         transform_composed = transform(conf.augmentation)
 
         transformed_images = [transform_composed(test_image,
@@ -88,7 +88,7 @@ def launch_gradio(args):
                 task_choices = gr.Radio(label="Task: ", value='classification', choices=SUPPORTING_TASK_LIST)
             with gr.Column(scale=1):
                 phase_choices = gr.Radio(label="Phase: ", value='train', choices=['train', 'valid'])
-        model_choices = gr.Radio(label="Model: ", value='resnet50', choices=SUPPORTING_MODEL_LIST)
+        model_choices = gr.Radio(label="Model: ", value='resnet', choices=SUPPORTING_MODEL_LIST)
         with gr.Row(equal_height=True):
             with gr.Column(scale=1):
                 config_input = gr.Code(label="Augmentation configuration", value=args.config.read_text(), language='yaml', lines=30)
diff --git a/pyproject.toml b/pyproject.toml
index 303021ee..ca0b3de6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,6 +8,7 @@ extend-select = [
   "I",
   "SIM",
   "INP001",
+  "W"
 ]
 
 ignore = [
@@ -19,6 +20,8 @@ extend-exclude = [
   "docs/*.py",
   "src/netspresso_trainer/models/backbones/core",
   "src/netspresso_trainer/models/backbones/experimental",
+  "src/netspresso_trainer/models/necks/core",
+  "src/netspresso_trainer/models/necks/experimental",
   "src/netspresso_trainer/models/heads/classification",
   "src/netspresso_trainer/models/heads/detection",
   "src/netspresso_trainer/models/heads/segmentation",
diff --git a/src/netspresso_trainer/VERSION b/src/netspresso_trainer/VERSION
index 429d94ae..b0a12275 100644
--- a/src/netspresso_trainer/VERSION
+++ b/src/netspresso_trainer/VERSION
@@ -1 +1 @@
-0.0.9
\ No newline at end of file
+0.0.10
\ No newline at end of file
diff --git a/src/netspresso_trainer/__init__.py b/src/netspresso_trainer/__init__.py
index 24b54002..0c4517b2 100644
--- a/src/netspresso_trainer/__init__.py
+++ b/src/netspresso_trainer/__init__.py
@@ -10,4 +10,4 @@
 
 version = (Path(__file__).parent / "VERSION").read_text().strip()
 
-__version__ = version
\ No newline at end of file
+__version__ = version
diff --git a/src/netspresso_trainer/cfg/__init__.py b/src/netspresso_trainer/cfg/__init__.py
index 892dcd41..4315dab4 100644
--- a/src/netspresso_trainer/cfg/__init__.py
+++ b/src/netspresso_trainer/cfg/__init__.py
@@ -8,6 +8,12 @@
     ClassificationAugmentationConfig,
     ColorJitter,
     DetectionAugmentationConfig,
+    Pad,
+    RandomCrop,
+    RandomHorizontalFlip,
+    RandomResizedCrop,
+    RandomVerticalFlip,
+    Resize,
     SegmentationAugmentationConfig,
 )
 from .data import (
@@ -32,15 +38,22 @@
 from .logging import LoggingConfig
 from .model import (
     ClassificationEfficientFormerModelConfig,
+    ClassificationMixNetLargeModelConfig,
+    ClassificationMixNetMediumModelConfig,
+    ClassificationMixNetSmallModelConfig,
     ClassificationMobileNetV3ModelConfig,
     ClassificationMobileViTModelConfig,
     ClassificationResNetModelConfig,
     ClassificationSegFormerModelConfig,
     ClassificationViTModelConfig,
     DetectionEfficientFormerModelConfig,
+    DetectionYoloXModelConfig,
     ModelConfig,
     PIDNetModelConfig,
     SegmentationEfficientFormerModelConfig,
+    SegmentationMixNetLargeModelConfig,
+    SegmentationMixNetMediumModelConfig,
+    SegmentationMixNetSmallModelConfig,
     SegmentationMobileNetV3ModelConfig,
     SegmentationResNetModelConfig,
     SegmentationSegFormerModelConfig,
@@ -59,6 +72,7 @@
     'detection': DetectionScheduleConfig
 }
 
+
 @dataclass
 class TrainerConfig:
     task: str = field(default=MISSING, metadata={"omegaconf_ignore": True})
@@ -69,19 +83,19 @@ class TrainerConfig:
     training: Optional[ScheduleConfig] = None
     logging: LoggingConfig = field(default_factory=lambda: LoggingConfig())
     environment: EnvironmentConfig = field(default_factory=lambda: EnvironmentConfig())
-    
+
     @property
     def epochs(self) -> int:
         return self.training.epochs
-    
+
     @property
     def batch_size(self) -> int:
         return self.training.batch_size
-    
+
     @property
     def num_workers(self) -> int:
         return self.environment.num_workers
-    
+
     @epochs.setter
     def epochs(self, v: int) -> None:
         self.training.epochs = v
@@ -89,18 +103,18 @@ def epochs(self, v: int) -> None:
     @batch_size.setter
     def batch_size(self, v: int) -> None:
         self.training.batch_size = v
-    
+
     @num_workers.setter
     def num_workers(self, v: int) -> None:
         self.environment.num_workers = v
-        
+
     def __post_init__(self):
         assert self.task in ['classification', 'segmentation', 'detection']
         self.data.task = self.task
         self.model.task = self.task
-        
+
         if self.auto:
             if self.augmentation is None:
                 self.augmentation = _AUGMENTATION_CONFIG_TYPE_DICT[self.task]()
             if self.training is None:
-                self.training = _TRAINING_CONFIG_TYPE_DICT[self.task]()
\ No newline at end of file
+                self.training = _TRAINING_CONFIG_TYPE_DICT[self.task]()
diff --git a/src/netspresso_trainer/cfg/augmentation.py b/src/netspresso_trainer/cfg/augmentation.py
index 8e9378b7..1a0d5730 100644
--- a/src/netspresso_trainer/cfg/augmentation.py
+++ b/src/netspresso_trainer/cfg/augmentation.py
@@ -1,12 +1,29 @@
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Optional, Union
+from typing import List, Optional, Union
 
 from omegaconf import MISSING, MissingMandatoryValue
 
+DEFAULT_IMG_SIZE = 256
+
 
 @dataclass
-class ColorJitter:
+class Transform:
+    name: str = MISSING
+
+
+@dataclass
+class AugmentationConfig:
+    img_size: int = DEFAULT_IMG_SIZE
+    transforms: List[Transform] = field(default_factory=lambda: [
+        Transform()
+    ])
+    mix_transforms: Optional[List[Transform]] = None
+
+
+@dataclass
+class ColorJitter(Transform):
+    name: str = 'colorjitter'
     brightness: Optional[float] = 0.25
     contrast: Optional[float] = 0.25
     saturation: Optional[float] = 0.25
@@ -15,56 +32,83 @@ class ColorJitter:
 
 
 @dataclass
-class AugmentationConfig:
-    img_size: int = 256
-    max_scale: Optional[int] = 1024
-    min_scale: Optional[int] = None
-    crop_size_h: Optional[int] = None
-    crop_size_w: Optional[int] = None
-    resize_ratio0: Optional[float] = None
-    resize_ratiof: Optional[float] = None
-    resize_add: Optional[float] = 1
-    fliplr: Optional[float] = 0.5
-    color_jitter: Optional[ColorJitter] = field(default_factory=lambda: ColorJitter())
-    
-    
+class Pad(Transform):
+    name: str = 'pad'
+    padding: Union[int, List] = 0
+
+
+@dataclass
+class RandomCrop(Transform):
+    name: str = 'randomcrop'
+    size: int = DEFAULT_IMG_SIZE
+    interpolation: Optional[str] = 'bilinear'
+
+
+@dataclass
+class RandomResizedCrop(Transform):
+    name: str = 'randomresizedcrop'
+    size: int = DEFAULT_IMG_SIZE
+    interpolation: Optional[str] = 'bilinear'
+
+
+@dataclass
+class RandomHorizontalFlip(Transform):
+    name: str = 'randomhorizontalflip'
+    p: float = 0.5
+
+
+@dataclass
+class RandomVerticalFlip(Transform):
+    name: str = 'randomverticalflip'
+    p: float = 0.5
+
+
+@dataclass
+class Resize(Transform):
+    name: str = 'resize'
+    size: int = DEFAULT_IMG_SIZE
+    interpolation: Optional[str] = 'bilinear'
+
+
+@dataclass
+class RandomMixup(Transform):
+    name: str = 'mixup'
+    alpha: float = 0.2
+    p: float = 1.0
+
+
+@dataclass
+class RandomCutmix(Transform):
+    name: str = 'cutmix'
+    alpha: float = 1.0
+    p: float = 1.0
+
 
 @dataclass
 class ClassificationAugmentationConfig(AugmentationConfig):
-    resize_ratio0 = None
-    resize_ratiof = None
-    resize_add = None
-    color_jitter = None
+    img_size: int = 256
+    transforms: List[Transform] = field(default_factory=lambda: [
+        RandomResizedCrop(size=256),
+        RandomHorizontalFlip()
+    ])
+    mix_transforms: List[Transform] = field(default_factory=lambda: [
+        RandomCutmix(),
+    ])
 
 
 @dataclass
 class SegmentationAugmentationConfig(AugmentationConfig):
-    img_size = 512
-    resize_ratio0 = 1.0
-    resize_ratiof = 1.5
-    
-    def __post_init__(self):
-        # variable interpolation
-        if self.min_scale is None:
-            self.min_scale = self.img_size
-        if self.crop_size_h is None:
-            self.crop_size_h = self.img_size
-        if self.crop_size_w is None:
-            self.crop_size_w = self.img_size
-    
+    img_size: int = 512
+    transforms: List[Transform] = field(default_factory=lambda: [
+        RandomResizedCrop(size=512),
+        RandomHorizontalFlip(),
+        ColorJitter()
+    ])
+
 
 @dataclass
 class DetectionAugmentationConfig(AugmentationConfig):
-    img_size = 512
-    max_scale = 2048
-    min_scale = 768
-    resize_ratio0: 0.5
-    resize_ratiof: 2.0
-    resize_add: 1
-    
-    def __post_init__(self):
-        # variable interpolation
-        if self.crop_size_h is None:
-            self.crop_size_h = self.img_size
-        if self.crop_size_w is None:
-            self.crop_size_w = self.img_size
\ No newline at end of file
+    img_size: int = 512
+    transforms: List[Transform] = field(default_factory=lambda: [
+        Resize(size=512)
+    ])
diff --git a/src/netspresso_trainer/cfg/data.py b/src/netspresso_trainer/cfg/data.py
index 000624b1..21e2abc5 100644
--- a/src/netspresso_trainer/cfg/data.py
+++ b/src/netspresso_trainer/cfg/data.py
@@ -262,4 +262,4 @@ class HuggingFaceSegmentationDatasetConfig(DatasetConfig):
         subset="full",
         features={"image": "image", "label": "artist"}
     )
-)
\ No newline at end of file
+)
diff --git a/src/netspresso_trainer/cfg/model.py b/src/netspresso_trainer/cfg/model.py
index c9d47604..71184e1c 100644
--- a/src/netspresso_trainer/cfg/model.py
+++ b/src/netspresso_trainer/cfg/model.py
@@ -18,20 +18,31 @@
     "ClassificationSegFormerModelConfig",
     "SegmentationSegFormerModelConfig",
     "ClassificationViTModelConfig",
+    "DetectionYoloXModelConfig",
+    "ClassificationMixNetSmallModelConfig",
+    "ClassificationMixNetMediumModelConfig",
+    "ClassificationMixNetLargeModelConfig",
+    "SegmentationMixNetSmallModelConfig",
+    "SegmentationMixNetMediumModelConfig",
+    "SegmentationMixNetLargeModelConfig",
 ]
 
+
 @dataclass
 class ArchitectureConfig:
     full: Optional[Dict[str, Any]] = None
     backbone: Optional[Dict[str, Any]] = None
+    neck: Optional[Dict[str, Any]] = None
     head: Optional[Dict[str, Any]] = None
-    
+
     def __post_init__(self):
         assert bool(self.full) != bool(self.backbone), "Only one of full or backbone should be given."
-    
+
+
 @dataclass
 class ModelConfig:
     task: str = MISSING
+    name: str = MISSING
     checkpoint: Optional[Union[Path, str]] = None
     fx_model_checkpoint: Optional[Union[Path, str]] = None
     resume_optimizer_checkpoint: Optional[Union[Path, str]] = None
@@ -44,56 +55,81 @@ class ModelConfig:
 class EfficientFormerArchitectureConfig(ArchitectureConfig):
     backbone: Dict[str, Any] = field(default_factory=lambda: {
         "name": "efficientformer",
-        "num_blocks": [3, 2, 6, 4],
-        "hidden_sizes": [48, 96, 224, 448],
-        "num_attention_heads": 8,
-        "attention_hidden_size": 256,  # attention_hidden_size_splitted * num_attention_heads
-        "attention_dropout_prob": 0.,
-        "attention_ratio": 4,
-        "attention_bias_resolution": 16,
-        "pool_size": 3,
-        "intermediate_ratio": 4,
-        "hidden_dropout_prob": 0.,
-        "hidden_activation_type": 'gelu',
-        "layer_norm_eps": 1e-5,
-        "drop_path_rate": 0.,
-        "use_layer_scale": True,
-        "layer_scale_init_value": 1e-5,
-        "downsamples": [True, True, True, True],
-        "down_patch_size": 3,
-        "down_stride": 2,
-        "down_pad": 1,
-        "vit_num": 1,
+        "params": {
+            "num_attention_heads": 8,
+            "attention_hidden_size": 256,
+            "attention_dropout_prob": 0.,
+            "attention_ratio": 4,
+            "attention_bias_resolution": 16,
+            "pool_size": 3,
+            "intermediate_ratio": 4,
+            "hidden_dropout_prob": 0.,
+            "hidden_activation_type": 'gelu',
+            "layer_norm_eps": 1e-5,
+            "drop_path_rate": 0.,
+            "use_layer_scale": True,
+            "layer_scale_init_value": 1e-5,
+            "down_patch_size": 3,
+            "down_stride": 2,
+            "down_pad": 1,
+            "vit_num": 1,
+        },
+        "stage_params": [
+            {"num_blocks": 3, "hidden_sizes": 48, "downsamples": True},
+            {"num_blocks": 2, "hidden_sizes": 96, "downsamples": True},
+            {"num_blocks": 6, "hidden_sizes": 224, "downsamples": True},
+            {"num_blocks": 4, "hidden_sizes": 448, "downsamples": True},
+        ],
     })
 
 
 @dataclass
 class MobileNetV3ArchitectureConfig(ArchitectureConfig):
     backbone: Dict[str, Any] = field(default_factory=lambda: {
-        "name": "mobilenetv3_small",
-        
-        # [in_channels, kernel, expended_channels, out_channels, use_se, activation, stride, dilation]
-        "block_info": [
-            [
-                [16, 3, 16, 16, True, "relu", 2, 1]
-            ],
-            [
-                [16, 3, 72, 24, False, "relu", 2, 1],
-                [24, 3, 88, 24, False, "relu", 1, 1]
-            ],
-            [
-                [24, 5, 96, 40, True, "hard_swish", 2, 1],
-                [40, 5, 240, 40, True, "hard_swish", 1, 1],
-                [40, 5, 240, 40, True, "hard_swish", 1, 1],
-                [40, 5, 120, 48, True, "hard_swish", 1, 1],
-                [48, 5, 144, 48, True, "hard_swish", 1, 1]
-            ],
-            [
-                [48, 5, 288, 96, True, "hard_swish", 2, 1],
-                [96, 5, 576, 96, True, "hard_swish", 1, 1],
-                [96, 5, 576, 96, True, "hard_swish", 1, 1]
-            ]
-        ]
+        "name": "mobilenetv3",
+        "params": None,
+        "stage_params": [
+            {
+                "in_channels": [16],
+                "kernel": [3],
+                "expanded_channels": [16],
+                "out_channels": [16],
+                "use_se": [True],
+                "activation": ["relu"],
+                "stride": [2],
+                "dilation": [1],
+            },
+            {
+                "in_channels": [16, 24],
+                "kernel": [3, 3],
+                "expanded_channels": [72, 88],
+                "out_channels": [24, 24],
+                "use_se": [False, False],
+                "activation": ["relu", "relu"],
+                "stride": [2, 1],
+                "dilation": [1, 1],
+            },
+            {
+                "in_channels": [24, 40, 40, 40, 48],
+                "kernel": [5, 5, 5, 5, 5],
+                "expanded_channels": [96, 240, 240, 120, 144],
+                "out_channels": [40, 40, 40, 48, 48],
+                "use_se": [True, True, True, True, True],
+                "activation": ["hard_swish", "hard_swish", "hard_swish", "hard_swish", "hard_swish"],
+                "stride": [2, 1, 1, 1, 1],
+                "dilation": [1, 1, 1, 1, 1],
+            },
+            {
+                "in_channels": [48, 96, 96],
+                "kernel": [5, 5, 5],
+                "expanded_channels": [288, 576, 576],
+                "out_channels": [96, 96, 96],
+                "use_se": [True, True, True],
+                "activation": ["hard_swish", "hard_swish", "hard_swish"],
+                "stride": [2, 1, 1],
+                "dilation": [1, 1, 1],
+            },
+        ],
     })
 
 
@@ -101,24 +137,74 @@ class MobileNetV3ArchitectureConfig(ArchitectureConfig):
 class MobileViTArchitectureConfig(ArchitectureConfig):
     backbone: Dict[str, Any] = field(default_factory=lambda: {
         "name": "mobilevit",
-        "out_channels": [32, 64, 96, 128, 160],
-        "block_type": ['mv2', 'mv2', 'mobilevit', 'mobilevit', 'mobilevit'],
-        "num_blocks": [1, 3, None, None, None],
-        "stride": [1, 2, 2, 2, 2],
-        "hidden_size": [None, None, 144, 192, 240],
-        "intermediate_size": [None, None, 288, 384, 480],
-        "num_transformer_blocks": [None, None, 2, 4, 3],
-        "dilate": [None, None, False, False, False],
-        "expand_ratio": [4, 4, 4, 4, 4],  # [mv2_exp_mult] * 4
-        "patch_embedding_out_channels": 16,
-        "local_kernel_size": 3,
-        "patch_size": 2,
-        "num_attention_heads": 4,  # num_heads
-        "attention_dropout_prob": 0.1,
-        "hidden_dropout_prob": 0.0,
-        "exp_factor": 4,
-        "layer_norm_eps": 1e-5,
-        "use_fusion_layer": True,
+        "params": {
+            "patch_embedding_out_channels": 16,
+            "local_kernel_size": 3,
+            "patch_size": 2,
+            "num_attention_heads": 4,
+            "attention_dropout_prob": 0.1,
+            "hidden_dropout_prob": 0.0,
+            "exp_factor": 4,
+            "layer_norm_eps": 1e-5,
+            "use_fusion_layer": True,
+        },
+        "stage_params": [
+            {
+                "out_channels": 32,
+                "block_type": "mv2",
+                "num_blocks": 1,
+                "stride": 1,
+                "hidden_size": None,
+                "intermediate_size": None,
+                "num_transformer_blocks": None,
+                "dilate": None,
+                "expand_ratio": 4,
+            },
+            {
+                "out_channels": 64,
+                "block_type": "mv2",
+                "num_blocks": 3,
+                "stride": 2,
+                "hidden_size": None,
+                "intermediate_size": None,
+                "num_transformer_blocks": None,
+                "dilate": None,
+                "expand_ratio": 4,
+            },
+            {
+                "out_channels": 96,
+                "block_type": "mobilevit",
+                "num_blocks": None,
+                "stride": 2,
+                "hidden_size": 144,
+                "intermediate_size": 288,
+                "num_transformer_blocks": 2,
+                "dilate": False,
+                "expand_ratio": 4,
+            },
+            {
+                "out_channels": 128,
+                "block_type": "mobilevit",
+                "num_blocks": None,
+                "stride": 2,
+                "hidden_size": 192,
+                "intermediate_size": 384,
+                "num_transformer_blocks": 4,
+                "dilate": False,
+                "expand_ratio": 4,
+            },
+            {
+                "out_channels": 160,
+                "block_type": "mobilevit",
+                "num_blocks": None,
+                "stride": 2,
+                "hidden_size": 240,
+                "intermediate_size": 480,
+                "num_transformer_blocks": 3,
+                "dilate": False,
+                "expand_ratio": 4,
+            },
+        ]
     })
 
 
@@ -137,9 +223,21 @@ class PIDNetArchitectureConfig(ArchitectureConfig):
 @dataclass
 class ResNetArchitectureConfig(ArchitectureConfig):
     backbone: Dict[str, Any] = field(default_factory=lambda: {
-        "name": "resnet50",
-        "block": "bottleneck",
-        "layers": [3, 4, 6, 3],
+        "name": "resnet",
+        "params": {
+            "block": "bottleneck",
+            "norm_layer": "batch_norm",
+            "groups": 1,
+            "width_per_group": 64,
+            "zero_init_residual": False,
+            "expansion": None,
+        },
+        "stage_params": [
+            {"plane": 64, "layers": 3},
+            {"plane": 128, "layers": 4},
+            {"plane": 256, "layers": 6},
+            {"plane": 512, "layers": 3},
+        ],
     })
 
 
@@ -147,18 +245,47 @@ class ResNetArchitectureConfig(ArchitectureConfig):
 class SegFormerArchitectureConfig(ArchitectureConfig):
     backbone: Dict[str, Any] = field(default_factory=lambda: {
         "name": "segformer",
-        "num_modules": 4,
-        "num_blocks": [2, 2, 2, 2],
-        "sr_ratios": [8, 4, 2, 1],
-        "hidden_sizes": [32, 64, 160, 256],
-        "embedding_patch_sizes": [7, 3, 3, 3],
-        "embedding_strides": [4, 2, 2, 2],
-        "num_attention_heads": [1, 2, 5, 8],
-        "intermediate_ratio": 4,
-        "hidden_activation_type": "gelu",
-        "hidden_dropout_prob": 0.0,
-        "attention_dropout_prob": 0.0,
-        "layer_norm_eps": 1e-5,
+        "params": {
+            "intermediate_ratio": 4,
+            "hidden_activation_type": "gelu",
+            "hidden_dropout_prob": 0.0,
+            "attention_dropout_prob": 0.0,
+            "layer_norm_eps": 1e-5,
+        },
+        "stage_params": [
+            {
+                "num_blocks": 2,
+                "sr_ratios": 8,
+                "hidden_sizes": 32,
+                "embedding_patch_sizes": 7,
+                "embedding_strides": 4,
+                "num_attention_heads": 1,
+            },
+            {
+                "num_blocks": 2,
+                "sr_ratios": 4,
+                "hidden_sizes": 64,
+                "embedding_patch_sizes": 3,
+                "embedding_strides": 2,
+                "num_attention_heads": 2,
+            },
+            {
+                "num_blocks": 2,
+                "sr_ratios": 2,
+                "hidden_sizes": 160,
+                "embedding_patch_sizes": 3,
+                "embedding_strides": 2,
+                "num_attention_heads": 5,
+            },
+            {
+                "num_blocks": 2,
+                "sr_ratios": 1,
+                "hidden_sizes": 256,
+                "embedding_patch_sizes": 3,
+                "embedding_strides": 2,
+                "num_attention_heads": 8,
+            },
+        ],
     })
 
 
@@ -166,31 +293,241 @@ class SegFormerArchitectureConfig(ArchitectureConfig):
 class ViTArchitectureConfig(ArchitectureConfig):
     backbone: Dict[str, Any] = field(default_factory=lambda: {
         "name": "vit",
-        "patch_size": 16,
-        "hidden_size": 192,
-        "num_blocks": 12,
-        "num_attention_heads": 3,
-        "attention_dropout_prob": 0.0,
-        "intermediate_size": 192 * 4,
-        "hidden_dropout_prob": 0.1,
+        "params": {
+            "patch_size": 16,
+            "hidden_size": 192,
+            "num_blocks": 12,
+            "num_attention_heads": 3,
+            "attention_dropout_prob": 0.0,
+            "intermediate_size": 768,
+            "hidden_dropout_prob": 0.1,
+            "layer_norm_eps": 1e-6,
+            "use_cls_token": True,
+            "vocab_size": 1000,
+        },
+        "stage_params": None,
+    })
+
+
+@dataclass
+class MixNetSmallArchitectureConfig(ArchitectureConfig):
+    backbone: Dict[str, Any] = field(default_factory=lambda: {
+        "name": "mixnet",
+        "params": {
+            "stem_planes": 16,
+            "width_multi": 1.0,
+            "depth_multi": 1.0,
+            "dropout_rate": 0.,
+        },
+        "stage_params":  [
+            {
+                "expand_ratio": [1, 6, 3],
+                "out_channels": [16, 24, 24],
+                "num_blocks": [1, 1, 1],
+                "kernel_sizes": [[3], [3], [3]],
+                "exp_kernel_sizes": [[1], [1, 1], [1, 1]],
+                "poi_kernel_sizes": [[1], [1, 1], [1, 1]],
+                "stride": [1, 2, 1],
+                "dilation": [1, 1, 1],
+                "act_type": ["relu", "relu", "relu"],
+                "se_reduction_ratio": [None, None, None],
+            },
+            {
+                "expand_ratio": [6, 6],
+                "out_channels": [40, 40],
+                "num_blocks": [1, 3],
+                "kernel_sizes": [[3, 5, 7], [3, 5]],
+                "exp_kernel_sizes": [[1], [1, 1]],
+                "poi_kernel_sizes": [[1], [1, 1]],
+                "stride": [2, 1],
+                "dilation": [1, 1],
+                "act_type": ["swish", "swish"],
+                "se_reduction_ratio": [2, 2],
+            },
+            {
+                "expand_ratio": [6, 6, 6, 3],
+                "out_channels": [80, 80, 120, 120],
+                "num_blocks": [1, 2, 1, 2],
+                "kernel_sizes": [[3, 5, 7], [3, 5], [3, 5, 7], [3, 5, 7, 9]],
+                "exp_kernel_sizes": [[1], [1], [1, 1], [1, 1]],
+                "poi_kernel_sizes": [[1, 1], [1, 1], [1, 1], [1, 1]],
+                "stride": [2, 1, 1, 1],
+                "dilation": [1, 1, 1, 1],
+                "act_type": ["swish", "swish", "swish", "swish"],
+                "se_reduction_ratio": [4, 4, 2, 2],
+            },
+            {
+                "expand_ratio": [6, 6],
+                "out_channels": [200, 200],
+                "num_blocks": [1, 2],
+                "kernel_sizes": [[3, 5, 7, 9, 11], [3, 5, 7, 9]],
+                "exp_kernel_sizes": [[1], [1]],
+                "poi_kernel_sizes": [[1], [1, 1]],
+                "stride": [2, 1],
+                "dilation": [1, 1],
+                "act_type": ["swish", "swish"],
+                "se_reduction_ratio": [2, 2],
+            },
+        ],
+    })
+
+
+@dataclass
+class MixNetMediumArchitectureConfig(ArchitectureConfig):
+    backbone: Dict[str, Any] = field(default_factory=lambda: {
+        "name": "mixnet",
+        "params": {
+            "stem_planes": 24,
+            "width_multi": 1.0,
+            "depth_multi": 1.0,
+            "dropout_rate": 0.,
+        },
+        "stage_params":  [
+            {
+                "expand_ratio": [1, 6, 3],
+                "out_channels": [24, 32, 32],
+                "num_blocks": [1, 1, 1],
+                "kernel_sizes": [[3], [3, 5, 7], [3]],
+                "exp_kernel_sizes": [[1], [1, 1], [1, 1]],
+                "poi_kernel_sizes": [[1], [1, 1], [1, 1]],
+                "stride": [1, 2, 1],
+                "dilation": [1, 1, 1],
+                "act_type": ["relu", "relu", "relu"],
+                "se_reduction_ratio": [None, None, None],
+            },
+            {
+                "expand_ratio": [6, 6],
+                "out_channels": [40, 40],
+                "num_blocks": [1, 3],
+                "kernel_sizes": [[3, 5, 7, 9], [3, 5]],
+                "exp_kernel_sizes": [[1], [1, 1]],
+                "poi_kernel_sizes": [[1], [1, 1]],
+                "stride": [2, 1],
+                "dilation": [1, 1],
+                "act_type": ["swish", "swish"],
+                "se_reduction_ratio": [2, 2],
+            },
+            {
+                "expand_ratio": [6, 6, 6, 3],
+                "out_channels": [80, 80, 120, 120],
+                "num_blocks": [1, 3, 1, 3],
+                "kernel_sizes": [[3, 5, 7], [3, 5, 7, 9], [3], [3, 5, 7, 9]],
+                "exp_kernel_sizes": [[1], [1, 1], [1], [1, 1]],
+                "poi_kernel_sizes": [[1], [1, 1], [1], [1, 1]],
+                "stride": [2, 1, 1, 1],
+                "dilation": [1, 1, 1, 1],
+                "act_type": ["swish", "swish", "swish", "swish"],
+                "se_reduction_ratio": [4, 4, 2, 2],
+            },
+            {
+                "expand_ratio": [6, 6],
+                "out_channels": [200, 200],
+                "num_blocks": [1, 3],
+                "kernel_sizes": [[3, 5, 7, 9], [3, 5, 7, 9]],
+                "exp_kernel_sizes": [[1], [1]],
+                "poi_kernel_sizes": [[1], [1, 1]],
+                "stride": [2, 1],
+                "dilation": [1, 1],
+                "act_type": ["swish", "swish"],
+                "se_reduction_ratio": [2, 2],
+            },
+        ],
+    })
+
+
+@dataclass
+class MixNetLargeArchitectureConfig(ArchitectureConfig):
+    backbone: Dict[str, Any] = field(default_factory=lambda: {
+        "name": "mixnet",
+        "params": {
+            "stem_planes": 24,
+            "width_multi": 1.3,
+            "depth_multi": 1.0,
+            "dropout_rate": 0.,
+        },
+        "stage_params":  [
+            {
+                "expand_ratio": [1, 6, 3],
+                "out_channels": [24, 32, 32],
+                "num_blocks": [1, 1, 1],
+                "kernel_sizes": [[3], [3, 5, 7], [3]],
+                "exp_kernel_sizes": [[1], [1, 1], [1, 1]],
+                "poi_kernel_sizes": [[1], [1, 1], [1, 1]],
+                "stride": [1, 2, 1],
+                "dilation": [1, 1, 1],
+                "act_type": ["relu", "relu", "relu"],
+                "se_reduction_ratio": [None, None, None],
+            },
+            {
+                "expand_ratio": [6, 6],
+                "out_channels": [40, 40],
+                "num_blocks": [1, 3],
+                "kernel_sizes": [[3, 5, 7, 9], [3, 5]],
+                "exp_kernel_sizes": [[1], [1, 1]],
+                "poi_kernel_sizes": [[1], [1, 1]],
+                "stride": [2, 1],
+                "dilation": [1, 1],
+                "act_type": ["swish", "swish"],
+                "se_reduction_ratio": [2, 2],
+            },
+            {
+                "expand_ratio": [6, 6, 6, 3],
+                "out_channels": [80, 80, 120, 120],
+                "num_blocks": [1, 3, 1, 3],
+                "kernel_sizes": [[3, 5, 7], [3, 5, 7, 9], [3], [3, 5, 7, 9]],
+                "exp_kernel_sizes": [[1], [1, 1], [1], [1, 1]],
+                "poi_kernel_sizes": [[1], [1, 1], [1], [1, 1]],
+                "stride": [2, 1, 1, 1],
+                "dilation": [1, 1, 1, 1],
+                "act_type": ["swish", "swish", "swish", "swish"],
+                "se_reduction_ratio": [4, 4, 2, 2],
+            },
+            {
+                "expand_ratio": [6, 6],
+                "out_channels": [200, 200],
+                "num_blocks": [1, 3],
+                "kernel_sizes": [[3, 5, 7, 9], [3, 5, 7, 9]],
+                "exp_kernel_sizes": [[1], [1]],
+                "poi_kernel_sizes": [[1], [1, 1]],
+                "stride": [2, 1],
+                "dilation": [1, 1],
+                "act_type": ["swish", "swish"],
+                "se_reduction_ratio": [2, 2],
+            },
+        ],
+    })
+
+
+@dataclass
+class CSPDarkNetSmallArchitectureConfig(ArchitectureConfig):
+    backbone: Dict[str, Any] = field(default_factory=lambda: {
+        "name": "cspdarknet",
+        "params": {
+            "dep_mul": 0.33,
+            "wid_mul": 0.5,
+            "act_type": "silu",
+        },
+        "stage_params": None,
     })
 
 
 @dataclass
 class ClassificationEfficientFormerModelConfig(ModelConfig):
     task: str = "classification"
+    name: str = "efficientformer_l1"
     checkpoint: Optional[Union[Path, str]] = "./weights/efficientformer/efficientformer_l1_1000d.pth"
     architecture: ArchitectureConfig = field(default_factory=lambda: EfficientFormerArchitectureConfig(
         head={"name": "fc"}
     ))
     losses: List[Dict[str, Any]] = field(default_factory=lambda: [
-        {"criterion": "label_smoothing_cross_entropy", "smoothing": 0.1, "weight": None}
+        {"criterion": "cross_entropy", "label_smoothing": 0.1, "weight": None}
     ])
 
 
 @dataclass
 class SegmentationEfficientFormerModelConfig(ModelConfig):
     task: str = "segmentation"
+    name: str = "efficientformer_l1"
     checkpoint: Optional[Union[Path, str]] = "./weights/efficientformer/efficientformer_l1_1000d.pth"
     architecture: ArchitectureConfig = field(default_factory=lambda: EfficientFormerArchitectureConfig(
         head={"name": "all_mlp_decoder"}
@@ -203,8 +540,10 @@ class SegmentationEfficientFormerModelConfig(ModelConfig):
 @dataclass
 class DetectionEfficientFormerModelConfig(ModelConfig):
     task: str = "detection"
+    name: str = "efficientformer_l1"
     checkpoint: Optional[Union[Path, str]] = "./weights/efficientformer/efficientformer_l1_1000d.pth"
     architecture: ArchitectureConfig = field(default_factory=lambda: EfficientFormerArchitectureConfig(
+        neck={"name": "fpn"},
         head={"name": "faster_rcnn"}
     ))
     losses: List[Dict[str, Any]] = field(default_factory=lambda: [
@@ -216,18 +555,20 @@ class DetectionEfficientFormerModelConfig(ModelConfig):
 @dataclass
 class ClassificationMobileNetV3ModelConfig(ModelConfig):
     task: str = "classification"
+    name: str = "mobilenet_v3_small"
     checkpoint: Optional[Union[Path, str]] = "./weights/mobilenetv3/mobilenet_v3_small.pth"
     architecture: ArchitectureConfig = field(default_factory=lambda: MobileNetV3ArchitectureConfig(
         head={"name": "fc"}
     ))
     losses: List[Dict[str, Any]] = field(default_factory=lambda: [
-        {"criterion": "label_smoothing_cross_entropy", "smoothing": 0.1, "weight": None}
+        {"criterion": "cross_entropy", "label_smoothing": 0.1, "weight": None}
     ])
 
 
 @dataclass
 class SegmentationMobileNetV3ModelConfig(ModelConfig):
     task: str = "segmentation"
+    name: str = "mobilenet_v3_small"
     checkpoint: Optional[Union[Path, str]] = "./weights/mobilenetv3/mobilenet_v3_small.pth"
     architecture: ArchitectureConfig = field(default_factory=lambda: MobileNetV3ArchitectureConfig(
         head={"name": "all_mlp_decoder"}
@@ -240,18 +581,20 @@ class SegmentationMobileNetV3ModelConfig(ModelConfig):
 @dataclass
 class ClassificationMobileViTModelConfig(ModelConfig):
     task: str = "classification"
+    name: str = "mobilevit_s"
     checkpoint: Optional[Union[Path, str]] = "./weights/mobilevit/mobilevit_s.pth"
     architecture: ArchitectureConfig = field(default_factory=lambda: MobileViTArchitectureConfig(
         head={"name": "fc"}
     ))
     losses: List[Dict[str, Any]] = field(default_factory=lambda: [
-        {"criterion": "label_smoothing_cross_entropy", "smoothing": 0.1, "weight": None}
+        {"criterion": "cross_entropy", "label_smoothing": 0.1, "weight": None}
     ])
 
 
 @dataclass
 class PIDNetModelConfig(ModelConfig):
-    task: str = "classification"
+    task: str = "segmentation"
+    name: str = "pidnet_s"
     checkpoint: Optional[Union[Path, str]] = "./weights/pidnet/pidnet_s.pth"
     architecture: ArchitectureConfig = field(default_factory=lambda: PIDNetArchitectureConfig())
     losses: List[Dict[str, Any]] = field(default_factory=lambda: [
@@ -264,18 +607,20 @@ class PIDNetModelConfig(ModelConfig):
 @dataclass
 class ClassificationResNetModelConfig(ModelConfig):
     task: str = "classification"
+    name: str = "resnet50"
     checkpoint: Optional[Union[Path, str]] = "./weights/resnet/resnet50.pth"
     architecture: ArchitectureConfig = field(default_factory=lambda: ResNetArchitectureConfig(
         head={"name": "fc"}
     ))
     losses: List[Dict[str, Any]] = field(default_factory=lambda: [
-        {"criterion": "label_smoothing_cross_entropy", "smoothing": 0.1, "weight": None}
+        {"criterion": "cross_entropy", "label_smoothing": 0.1, "weight": None}
     ])
 
 
 @dataclass
 class SegmentationResNetModelConfig(ModelConfig):
     task: str = "segmentation"
+    name: str = "resnet50"
     checkpoint: Optional[Union[Path, str]] = "./weights/resnet/resnet50.pth"
     architecture: ArchitectureConfig = field(default_factory=lambda: ResNetArchitectureConfig(
         head={"name": "all_mlp_decoder"}
@@ -288,18 +633,20 @@ class SegmentationResNetModelConfig(ModelConfig):
 @dataclass
 class ClassificationSegFormerModelConfig(ModelConfig):
     task: str = "classification"
+    name: str = "segformer"
     checkpoint: Optional[Union[Path, str]] = "./weights/segformer/segformer.pth"
     architecture: ArchitectureConfig = field(default_factory=lambda: SegFormerArchitectureConfig(
         head={"name": "fc"}
     ))
     losses: List[Dict[str, Any]] = field(default_factory=lambda: [
-        {"criterion": "label_smoothing_cross_entropy", "smoothing": 0.1, "weight": None}
+        {"criterion": "cross_entropy", "label_smoothing": 0.1, "weight": None}
     ])
 
 
 @dataclass
 class SegmentationSegFormerModelConfig(ModelConfig):
     task: str = "segmentation"
+    name: str = "segformer"
     checkpoint: Optional[Union[Path, str]] = "./weights/segformer/segformer.pth"
     architecture: ArchitectureConfig = field(default_factory=lambda: SegFormerArchitectureConfig(
         head={"name": "all_mlp_decoder"}
@@ -312,11 +659,103 @@ class SegmentationSegFormerModelConfig(ModelConfig):
 @dataclass
 class ClassificationViTModelConfig(ModelConfig):
     task: str = "classification"
+    name: str = "vit_tiny"
     checkpoint: Optional[Union[Path, str]] = "./weights/vit/vit-tiny.pth"
     architecture: ArchitectureConfig = field(default_factory=lambda: ViTArchitectureConfig(
         head={"name": "fc"}
     ))
     losses: List[Dict[str, Any]] = field(default_factory=lambda: [
-        {"criterion": "label_smoothing_cross_entropy", "smoothing": 0.1, "weight": None}
+        {"criterion": "cross_entropy", "label_smoothing": 0.1, "weight": None}
+    ])
+
+
+@dataclass
+class DetectionYoloXModelConfig(ModelConfig):
+    task: str = "detection"
+    name: str = "yolox_s"
+    checkpoint: Optional[Union[Path, str]] = "./weights/yolox/yolox_s.pth"
+    architecture: ArchitectureConfig = field(default_factory=lambda: CSPDarkNetSmallArchitectureConfig(
+        neck={"name": "pafpn"},
+        head={"name": "yolox_head"}
+    ))
+    losses: List[Dict[str, Any]] = field(default_factory=lambda: [
+        {"criterion": "yolox_loss", "weight": None}
+    ])
+
+
+@dataclass
+class ClassificationMixNetSmallModelConfig(ModelConfig):
+    task: str = "classification"
+    name: str = "mixnet_s"
+    checkpoint: Optional[Union[Path, str]] = "./weights/mixnet/mixnet_s.pth"
+    architecture: ArchitectureConfig = field(default_factory=lambda: MixNetSmallArchitectureConfig(
+        head={"name": "fc"}
+    ))
+    losses: List[Dict[str, Any]] = field(default_factory=lambda: [
+        {"criterion": "cross_entropy", "label_smoothing": 0.1, "weight": None}
     ])
 
+
+@dataclass
+class SegmentationMixNetSmallModelConfig(ModelConfig):
+    task: str = "segmentation"
+    name: str = "mixnet_s"
+    checkpoint: Optional[Union[Path, str]] = "./weights/mixnet/mixnet_s.pth"
+    architecture: ArchitectureConfig = field(default_factory=lambda: MixNetSmallArchitectureConfig(
+        head={"name": "all_mlp_decoder"}
+    ))
+    losses: List[Dict[str, Any]] = field(default_factory=lambda: [
+        {"criterion": "cross_entropy", "ignore_index": 255, "weight": None}
+    ])
+
+
+@dataclass
+class ClassificationMixNetMediumModelConfig(ModelConfig):
+    task: str = "classification"
+    name: str = "mixnet_m"
+    checkpoint: Optional[Union[Path, str]] = "./weights/mixnet/mixnet_m.pth"
+    architecture: ArchitectureConfig = field(default_factory=lambda: MixNetMediumArchitectureConfig(
+        head={"name": "fc"}
+    ))
+    losses: List[Dict[str, Any]] = field(default_factory=lambda: [
+        {"criterion": "cross_entropy", "label_smoothing": 0.1, "weight": None}
+    ])
+
+
+@dataclass
+class SegmentationMixNetMediumModelConfig(ModelConfig):
+    task: str = "segmentation"
+    name: str = "mixnet_m"
+    checkpoint: Optional[Union[Path, str]] = "./weights/mixnet/mixnet_m.pth"
+    architecture: ArchitectureConfig = field(default_factory=lambda: MixNetMediumArchitectureConfig(
+        head={"name": "all_mlp_decoder"}
+    ))
+    losses: List[Dict[str, Any]] = field(default_factory=lambda: [
+        {"criterion": "cross_entropy", "ignore_index": 255, "weight": None}
+    ])
+
+
+@dataclass
+class ClassificationMixNetLargeModelConfig(ModelConfig):
+    task: str = "classification"
+    name: str = "mixnet_l"
+    checkpoint: Optional[Union[Path, str]] = "./weights/mixnet/mixnet_l.pth"
+    architecture: ArchitectureConfig = field(default_factory=lambda: MixNetLargeArchitectureConfig(
+        head={"name": "fc"}
+    ))
+    losses: List[Dict[str, Any]] = field(default_factory=lambda: [
+        {"criterion": "cross_entropy", "label_smoothing": 0.1, "weight": None}
+    ])
+
+
+@dataclass
+class SegmentationMixNetLargeModelConfig(ModelConfig):
+    task: str = "segmentation"
+    name: str = "mixnet_l"
+    checkpoint: Optional[Union[Path, str]] = "./weights/mixnet/mixnet_l.pth"
+    architecture: ArchitectureConfig = field(default_factory=lambda: MixNetLargeArchitectureConfig(
+        head={"name": "all_mlp_decoder"}
+    ))
+    losses: List[Dict[str, Any]] = field(default_factory=lambda: [
+        {"criterion": "cross_entropy", "ignore_index": 255, "weight": None}
+    ])
diff --git a/src/netspresso_trainer/cfg/training.py b/src/netspresso_trainer/cfg/training.py
index a13be88f..3c0c32ff 100644
--- a/src/netspresso_trainer/cfg/training.py
+++ b/src/netspresso_trainer/cfg/training.py
@@ -32,4 +32,4 @@ class SegmentationScheduleConfig(ScheduleConfig):
 
 @dataclass
 class DetectionScheduleConfig(ScheduleConfig):
-    pass
\ No newline at end of file
+    pass
diff --git a/src/netspresso_trainer/dataloaders/augmentation/__init__.py b/src/netspresso_trainer/dataloaders/augmentation/__init__.py
index 0c893e74..624fcd12 100644
--- a/src/netspresso_trainer/dataloaders/augmentation/__init__.py
+++ b/src/netspresso_trainer/dataloaders/augmentation/__init__.py
@@ -12,3 +12,4 @@
     Resize,
     ToTensor,
 )
+from .registry import TRANSFORM_DICT
diff --git a/src/netspresso_trainer/dataloaders/augmentation/custom.py b/src/netspresso_trainer/dataloaders/augmentation/custom.py
index 720e5b0c..0895bd3b 100644
--- a/src/netspresso_trainer/dataloaders/augmentation/custom.py
+++ b/src/netspresso_trainer/dataloaders/augmentation/custom.py
@@ -1,15 +1,27 @@
+import math
 import random
 from collections.abc import Sequence
-from typing import Dict, Optional
+from typing import Dict, List, Optional, Tuple
 
 import numpy as np
 import PIL.Image as Image
 import torch
 import torchvision.transforms as T
 import torchvision.transforms.functional as F
+from omegaconf import ListConfig
+from torch import Tensor
+from torch.nn import functional as F_torch
+from torchvision.transforms.autoaugment import _apply_op
+from torchvision.transforms.functional import InterpolationMode
 
 BBOX_CROP_KEEP_THRESHOLD = 0.2
 MAX_RETRY = 5
+INVERSE_MODES_MAPPING = {
+    'nearest': InterpolationMode.NEAREST,
+    'bilinear': InterpolationMode.BILINEAR,
+    'bicubic': InterpolationMode.BICUBIC,
+}
+
 
 class Compose:
     def __init__(self, transforms, additional_targets: Dict = None):
@@ -22,7 +34,7 @@ def _get_transformed(self, image, mask, bbox, visualize_for_debug):
         for t in self.transforms:
             if visualize_for_debug and not t.visualize:
                 continue
-            image, mask, bbox = t(image=image, mask=mask, bbox=bbox)    
+            image, mask, bbox = t(image=image, mask=mask, bbox=bbox)
         return image, mask, bbox
 
     def __call__(self, image, mask=None, bbox=None, visualize_for_debug=False, **kwargs):
@@ -91,6 +103,18 @@ def __repr__(self):
 class Resize(T.Resize):
     visualize = True
 
+    def __init__(self, size, interpolation='bilinear', max_size=None, antialias=None):
+        interpolation = INVERSE_MODES_MAPPING[interpolation]
+
+        # TODO: There is logic error in forward. If `size` is int, this specify edge for shorter one.
+        # And, this is not match with bbox computing logic.
+        # Thus, automatically transform to sequence format for now,
+        # but this should be specified whether Resize receives sequence or int.
+        if isinstance(size, int):
+            size = [size, size]
+
+        super().__init__(size, interpolation, max_size, antialias)
+
     def forward(self, image, mask=None, bbox=None):
         w, h = image.size
 
@@ -275,6 +299,15 @@ def __repr__(self):
 class RandomResizedCrop(T.RandomResizedCrop):
     visualize = True
 
+    def __init__(self,
+                 size,
+                 scale=(0.08, 1.0),
+                 ratio=(3.0 / 4.0, 4.0 / 3.0),
+                 interpolation='bilinear',
+                 antialias: Optional[bool]=None):
+        interpolation = INVERSE_MODES_MAPPING[interpolation]
+        super().__init__(size, scale, ratio, interpolation, antialias)
+
     def _crop_bbox(self, bbox, i, j, h, w):
         area_original = (bbox[..., 2] - bbox[..., 0]) * (bbox[..., 3] - bbox[..., 1])
 
@@ -321,6 +354,300 @@ def __repr__(self):
         return format_string
 
 
+class RandomErasing(T.RandomErasing):
+    visualize = True
+
+    def __init__(self, p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=0, inplace=False):
+        if isinstance(scale, ListConfig):
+            scale = tuple(scale)
+        if isinstance(ratio, ListConfig):
+            ratio = tuple(ratio)
+        if isinstance(value, ListConfig):
+            value = tuple(value)
+        super().__init__(p, scale, ratio, value, inplace)
+
+    @staticmethod
+    def get_params(
+        img, scale: Tuple[float, float], ratio: Tuple[float, float], value: Optional[int] = None
+    ):
+        img_w, img_h = img.size
+
+        area = img_h * img_w
+
+        log_ratio = torch.log(torch.tensor(ratio))
+        for _ in range(10):
+            erase_area = area * torch.empty(1).uniform_(scale[0], scale[1]).item()
+            aspect_ratio = torch.exp(torch.empty(1).uniform_(log_ratio[0], log_ratio[1])).item()
+
+            h = int(round(math.sqrt(erase_area * aspect_ratio)))
+            w = int(round(math.sqrt(erase_area / aspect_ratio)))
+            if not (h < img_h and w < img_w):
+                continue
+
+            if value is None:
+                v = np.random.randint(255, size=(h, w)).astype('uint8')
+                v = Image.fromarray(v).convert(img.mode)
+            else:
+                v = Image.new(img.mode, (w, h), value)
+
+            i = torch.randint(0, img_h - h + 1, size=(1,)).item()
+            j = torch.randint(0, img_w - w + 1, size=(1,)).item()
+            return i, j, v
+
+        # Return original image
+        return 0, 0, img
+
+    def forward(self, image, mask=None, bbox=None):
+        if torch.rand(1) < self.p:
+            x, y, v = self.get_params(image, scale=self.scale, ratio=self.ratio, value=self.value)
+            image.paste(v, (y, x))
+            # TODO: Object-aware
+            return image, mask, bbox
+        return image, mask, bbox
+
+
+class TrivialAugmentWide(torch.nn.Module):
+    """
+    Based on the torchvision implementation.
+    https://pytorch.org/vision/main/_modules/torchvision/transforms/autoaugment.html#TrivialAugmentWide
+    """
+    visualize = True
+
+    def __init__(
+        self,
+        num_magnitude_bins: int = 31,
+        interpolation: InterpolationMode = 'bilinear',
+        fill: Optional[List[float]] = None,
+    ) -> None:
+        super().__init__()
+        interpolation = INVERSE_MODES_MAPPING[interpolation]
+
+        self.num_magnitude_bins = num_magnitude_bins
+        self.interpolation = interpolation
+        self.fill = fill
+
+    def _augmentation_space(self, num_bins: int) -> Dict[str, Tuple[Tensor, bool]]:
+        return {
+            # op_name: (magnitudes, signed)
+            "Identity": (torch.tensor(0.0), False),
+            "ShearX": (torch.linspace(0.0, 0.99, num_bins), True),
+            "ShearY": (torch.linspace(0.0, 0.99, num_bins), True),
+            "TranslateX": (torch.linspace(0.0, 32.0, num_bins), True),
+            "TranslateY": (torch.linspace(0.0, 32.0, num_bins), True),
+            "Rotate": (torch.linspace(0.0, 135.0, num_bins), True),
+            "Brightness": (torch.linspace(0.0, 0.99, num_bins), True),
+            "Color": (torch.linspace(0.0, 0.99, num_bins), True),
+            "Contrast": (torch.linspace(0.0, 0.99, num_bins), True),
+            "Sharpness": (torch.linspace(0.0, 0.99, num_bins), True),
+            "Posterize": (8 - (torch.arange(num_bins) / ((num_bins - 1) / 6)).round().int(), False),
+            "Solarize": (torch.linspace(255.0, 0.0, num_bins), False),
+            "AutoContrast": (torch.tensor(0.0), False),
+            "Equalize": (torch.tensor(0.0), False),
+        }
+
+    def forward(self, image, mask=None, bbox=None):
+        fill = self.fill
+        channels, height, width = F.get_dimensions(image)
+        if isinstance(image, Tensor):
+            if isinstance(fill, (int, float)):
+                fill = [float(fill)] * channels
+            elif fill is not None:
+                fill = [float(f) for f in fill]
+
+        op_meta = self._augmentation_space(self.num_magnitude_bins)
+        op_index = int(torch.randint(len(op_meta), (1,)).item())
+        op_name = list(op_meta.keys())[op_index]
+        magnitudes, signed = op_meta[op_name]
+        magnitude = (
+            float(magnitudes[torch.randint(len(magnitudes), (1,), dtype=torch.long)].item())
+            if magnitudes.ndim > 0
+            else 0.0
+        )
+        if signed and torch.randint(2, (1,)):
+            magnitude *= -1.0
+
+        # TODO: Compute mask, bbox
+        return _apply_op(image, op_name, magnitude, interpolation=self.interpolation, fill=fill), mask, bbox
+
+    def __repr__(self) -> str:
+        s = (
+            f"{self.__class__.__name__}("
+            f"num_magnitude_bins={self.num_magnitude_bins}"
+            f", interpolation={self.interpolation}"
+            f", fill={self.fill}"
+            f")"
+        )
+        return s
+
+
+class RandomMixup:
+    """
+    Based on the RandomMixup implementation of ml_cvnets.
+    https://github.com/apple/ml-cvnets/blob/77717569ab4a852614dae01f010b32b820cb33bb/data/transforms/image_torch.py
+
+    Given a batch of input images and labels, this class randomly applies the
+    `MixUp transformation <https://arxiv.org/abs/1710.09412>`_
+
+    Args:
+        opts (argparse.Namespace): Arguments
+        num_classes (int): Number of classes in the dataset
+    """
+    visualize = False
+
+    def __init__(self, num_classes: int, alpha, p=1.0, inplace=False):
+        if not (num_classes > 0):
+            raise ValueError("Please provide a valid positive value for the num_classes.")
+        if not (alpha > 0):
+            raise ValueError("Alpha param can't be zero.")
+        if not (0.0 < p <= 1.0):
+            raise ValueError("MixUp probability should be between 0 and 1, where 1 is inclusive")
+
+        self.num_classes = num_classes
+        self.alpha = alpha
+        self.p = p
+        self.inplace = inplace
+
+    def _apply_mixup_transform(self, image_tensor, target_tensor):
+        if image_tensor.ndim != 4:
+            raise ValueError(f"Batch ndim should be 4. Got {image_tensor.ndim}")
+        if target_tensor.ndim != 1:
+            raise ValueError(f"Target ndim should be 1. Got {target_tensor.ndim}")
+        if not image_tensor.is_floating_point():
+            raise ValueError(f"Batch datatype should be a float tensor. Got {image_tensor.dtype}.")
+        if target_tensor.dtype != torch.int64:
+            raise ValueError(f"Target datatype should be torch.int64. Got {target_tensor.dtype}")
+
+        if not self.inplace:
+            image_tensor = image_tensor.clone()
+            target_tensor = target_tensor.clone()
+
+        if target_tensor.ndim == 1:
+            target_tensor = F_torch.one_hot(
+                target_tensor, num_classes=self.num_classes
+            ).to(dtype=image_tensor.dtype)
+
+        # It's faster to roll the batch by one instead of shuffling it to create image pairs
+        batch_rolled = image_tensor.roll(1, 0)
+        target_rolled = target_tensor.roll(1, 0)
+
+        # Implemented as on mixup paper, page 3.
+        lambda_param = float(
+            torch._sample_dirichlet(torch.tensor([self.alpha, self.alpha]))[0]
+        )
+        batch_rolled.mul_(1.0 - lambda_param)
+        image_tensor.mul_(lambda_param).add_(batch_rolled)
+
+        target_rolled.mul_(1.0 - lambda_param)
+        target_tensor.mul_(lambda_param).add_(target_rolled)
+        return image_tensor, target_tensor
+
+    def __call__(self, samples, targets):
+        if torch.rand(1).item() >= self.p:
+            return samples, targets
+
+        mixup_samples, mixup_targets = self._apply_mixup_transform(
+            image_tensor=samples, target_tensor=targets
+        )
+
+        return mixup_samples, mixup_targets
+
+    def __repr__(self) -> str:
+        return "{}(num_classes={}, p={}, alpha={}, inplace={})".format(
+            self.__class__.__name__, self.num_classes, self.p, self.alpha, self.inplace
+        )
+
+
+class RandomCutmix:
+    """
+    Based on the RandomCutmix implementation of ml_cvnets.
+    https://github.com/apple/ml-cvnets/blob/77717569ab4a852614dae01f010b32b820cb33bb/data/transforms/image_torch.py
+
+    Given a batch of input images and labels, this class randomly applies the
+    `CutMix transformation <https://arxiv.org/abs/1905.04899>`_
+
+    Args:
+        opts (argparse.Namespace): Arguments
+        num_classes (int): Number of classes in the dataset
+    """
+    visualize = False
+
+    def __init__(self, num_classes, alpha, p=1.0, inplace=False):
+        if not (num_classes > 0):
+            raise ValueError("Please provide a valid positive value for the num_classes.")
+        if not (alpha > 0):
+            raise ValueError("Alpha param can't be zero.")
+        if not (0.0 < p <= 1.0):
+            raise ValueError("CutMix probability should be between 0 and 1, where 1 is inclusive")
+
+        self.num_classes = num_classes
+        self.alpha = alpha
+        self.p = p
+        self.inplace = inplace
+
+    def _apply_cutmix_transform(self, image_tensor, target_tensor):
+        if image_tensor.ndim != 4:
+            raise ValueError(f"Batch ndim should be 4. Got {image_tensor.ndim}")
+        if target_tensor.ndim != 1:
+            raise ValueError(f"Target ndim should be 1. Got {target_tensor.ndim}")
+        if not image_tensor.is_floating_point():
+            raise ValueError(f"Batch dtype should be a float tensor. Got {image_tensor.dtype}.")
+        if target_tensor.dtype != torch.int64:
+            raise ValueError(f"Target dtype should be torch.int64. Got {target_tensor.dtype}")
+
+        if not self.inplace:
+            image_tensor = image_tensor.clone()
+            target_tensor = target_tensor.clone()
+
+        if target_tensor.ndim == 1:
+            target_tensor = F_torch.one_hot(
+                target_tensor, num_classes=self.num_classes
+            ).to(dtype=image_tensor.dtype)
+
+        # It's faster to roll the batch by one instead of shuffling it to create image pairs
+        batch_rolled = image_tensor.roll(1, 0)
+        target_rolled = target_tensor.roll(1, 0)
+
+        # Implemented as on cutmix paper, page 12 (with minor corrections on typos).
+        lambda_param = float(
+            torch._sample_dirichlet(torch.tensor([self.alpha, self.alpha]))[0]
+        )
+        W, H = F.get_image_size(image_tensor)
+
+        r_x = torch.randint(W, (1,))
+        r_y = torch.randint(H, (1,))
+
+        r = 0.5 * math.sqrt(1.0 - lambda_param)
+        r_w_half = int(r * W)
+        r_h_half = int(r * H)
+
+        x1 = int(torch.clamp(r_x - r_w_half, min=0))
+        y1 = int(torch.clamp(r_y - r_h_half, min=0))
+        x2 = int(torch.clamp(r_x + r_w_half, max=W))
+        y2 = int(torch.clamp(r_y + r_h_half, max=H))
+
+        image_tensor[:, :, y1:y2, x1:x2] = batch_rolled[:, :, y1:y2, x1:x2]
+        lambda_param = float(1.0 - (x2 - x1) * (y2 - y1) / (W * H))
+
+        target_rolled.mul_(1.0 - lambda_param)
+        target_tensor.mul_(lambda_param).add_(target_rolled)
+        return image_tensor, target_tensor
+
+    def __call__(self, samples, targets) -> Dict:
+        if torch.rand(1).item() >= self.p:
+            return samples, targets
+
+        mixup_samples, mixup_targets = self._apply_cutmix_transform(
+            image_tensor=samples, target_tensor=targets
+        )
+
+        return mixup_samples, mixup_targets
+
+    def __repr__(self) -> str:
+        return "{}(num_classes={}, p={}, alpha={}, inplace={})".format(
+            self.__class__.__name__, self.num_classes, self.p, self.alpha, self.inplace
+        )
+
+
 class Normalize:
     visualize = False
 
@@ -351,4 +678,4 @@ def __call__(self, image, mask=None, bbox=None):
         return image, mask, bbox
 
     def __repr__(self):
-        return self.__class__.__name__ + "()"
\ No newline at end of file
+        return self.__class__.__name__ + "()"
diff --git a/src/netspresso_trainer/dataloaders/augmentation/registry.py b/src/netspresso_trainer/dataloaders/augmentation/registry.py
new file mode 100644
index 00000000..139ac1a7
--- /dev/null
+++ b/src/netspresso_trainer/dataloaders/augmentation/registry.py
@@ -0,0 +1,29 @@
+from typing import Callable, Dict
+
+from .custom import (
+    ColorJitter,
+    Pad,
+    RandomCrop,
+    RandomCutmix,
+    RandomErasing,
+    RandomHorizontalFlip,
+    RandomMixup,
+    RandomResizedCrop,
+    RandomVerticalFlip,
+    Resize,
+    TrivialAugmentWide,
+)
+
+TRANSFORM_DICT: Dict[str, Callable] = {
+    'colorjitter': ColorJitter,
+    'pad': Pad,
+    'randomcrop': RandomCrop,
+    'randomresizedcrop': RandomResizedCrop,
+    'randomhorizontalflip': RandomHorizontalFlip,
+    'randomverticalflip': RandomVerticalFlip,
+    'randomerasing': RandomErasing,
+    'resize': Resize,
+    'mixup': RandomMixup,
+    'cutmix': RandomCutmix,
+    'trivialaugmentwide': TrivialAugmentWide,
+}
diff --git a/src/netspresso_trainer/dataloaders/augmentation/transforms.py b/src/netspresso_trainer/dataloaders/augmentation/transforms.py
new file mode 100644
index 00000000..35adbc0b
--- /dev/null
+++ b/src/netspresso_trainer/dataloaders/augmentation/transforms.py
@@ -0,0 +1,89 @@
+import cv2
+import numpy as np
+import PIL.Image as Image
+
+from ..utils.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
+from . import custom as TC
+from .registry import TRANSFORM_DICT
+
+EDGE_SIZE = 4
+Y_K_SIZE = 6
+X_K_SIZE = 6
+
+
+def reduce_label(label: np.ndarray) -> Image.Image:
+    label[label == 0] = 255
+    label = label - 1
+    label[label == 254] = 255
+    return Image.fromarray(label)
+
+
+def generate_edge(label: np.ndarray) -> Image.Image:
+    edge = cv2.Canny(label, 0.1, 0.2)
+    kernel = np.ones((EDGE_SIZE, EDGE_SIZE), np.uint8)
+    # edge_pad == True
+    edge = edge[Y_K_SIZE:-Y_K_SIZE, X_K_SIZE:-X_K_SIZE]
+    edge = np.pad(edge, ((Y_K_SIZE, Y_K_SIZE), (X_K_SIZE, X_K_SIZE)), mode='constant')
+    edge = (cv2.dilate(edge, kernel, iterations=1) > 50) * 1.0
+    return Image.fromarray((edge.copy() * 255).astype(np.uint8))
+
+
+def transforms_custom_train(conf_augmentation):
+    assert conf_augmentation.img_size > 32
+    preprocess = []
+    for augment in conf_augmentation.transforms:
+        name = augment.name.lower()
+        augment_kwargs = list(augment.keys())
+        augment_kwargs.remove('name')
+        augment_kwargs = {k:augment[k] for k in augment_kwargs}
+        transform = TRANSFORM_DICT[name](**augment_kwargs)
+        preprocess.append(transform)
+
+    preprocess = preprocess + [
+        TC.ToTensor(),
+        TC.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD)
+    ]
+    return TC.Compose(preprocess)
+
+
+def transforms_custom_eval(conf_augmentation):
+    assert conf_augmentation.img_size > 32
+    preprocess = [
+        TC.Resize((conf_augmentation.img_size, conf_augmentation.img_size), interpolation='bilinear'),
+        TC.ToTensor(),
+        TC.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD)
+    ]
+    return TC.Compose(preprocess)
+
+
+def train_transforms_pidnet(conf_augmentation):
+    preprocess = []
+    for augment in conf_augmentation.transforms:
+        name = augment.name.lower()
+        augment_kwargs = list(augment.keys())
+        augment_kwargs.remove('name')
+        augment_kwargs = {k:augment[k] for k in augment_kwargs}
+        transform = TRANSFORM_DICT[name](**augment_kwargs)
+        preprocess.append(transform)
+
+    preprocess = preprocess + [
+        TC.ToTensor(),
+        TC.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD)
+    ]
+    return TC.Compose(preprocess, additional_targets={'edge': 'mask'})
+
+
+def val_transforms_pidnet(conf_augmentation):
+    assert conf_augmentation.img_size > 32
+    preprocess = [
+        TC.Resize((conf_augmentation.img_size, conf_augmentation.img_size), interpolation='bilinear'),
+        TC.ToTensor(),
+        TC.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD)
+    ]
+    return TC.Compose(preprocess, additional_targets={'edge': 'mask'})
+
+
+def create_transform(model_name: str, is_training=False):
+    if 'pidnet' in model_name:
+        return train_transforms_pidnet if is_training else val_transforms_pidnet
+    return transforms_custom_train if is_training else transforms_custom_eval
diff --git a/src/netspresso_trainer/dataloaders/base.py b/src/netspresso_trainer/dataloaders/base.py
index 5e1e8b11..e00bb5a1 100644
--- a/src/netspresso_trainer/dataloaders/base.py
+++ b/src/netspresso_trainer/dataloaders/base.py
@@ -15,10 +15,10 @@ def __init__(self, conf_data, conf_augmentation, model_name, idx_to_class, split
         self.conf_data = conf_data
         self.conf_augmentation = conf_augmentation
         self.model_name = model_name
-        
+
         self.transform = transform
         self.samples = samples
-        
+
         self._root = conf_data.path.root
         self._idx_to_class = idx_to_class
         self._num_classes = len(self._idx_to_class)
@@ -47,12 +47,12 @@ def root(self):
     @property
     def mode(self):
         return self._split
-    
+
     @property
     def with_label(self):
         return self._with_label
-    
-    
+
+
 class BaseHFDataset(data.Dataset):
 
     def __init__(self, conf_data, conf_augmentation, model_name, root, split, with_label):
@@ -64,7 +64,7 @@ def __init__(self, conf_data, conf_augmentation, model_name, root, split, with_l
         self._split = split
         self._with_label = with_label
 
-    def _load_dataset(self, root, subset_name=None, cache_dir=None):         
+    def _load_dataset(self, root, subset_name=None, cache_dir=None):
         from datasets import load_dataset
         if cache_dir is not None:
             Path(cache_dir).mkdir(exist_ok=True, parents=True)
@@ -94,7 +94,7 @@ def root(self):
     @property
     def mode(self):
         return self._split
-    
+
     @property
     def with_label(self):
         return self._with_label
@@ -104,15 +104,15 @@ class BaseDataSampler(ABC):
     def __init__(self, conf_data, train_valid_split_ratio):
         self.conf_data = conf_data
         self.train_valid_split_ratio = train_valid_split_ratio
-    
+
     @abstractmethod
     def load_data(self):
         raise NotImplementedError
-    
+
     @abstractmethod
     def load_samples(self):
         raise NotImplementedError
-    
+
     @abstractmethod
     def load_huggingface_samples(self):
-        raise NotImplementedError
\ No newline at end of file
+        raise NotImplementedError
diff --git a/src/netspresso_trainer/dataloaders/builder.py b/src/netspresso_trainer/dataloaders/builder.py
index f13a7fea..3910ec45 100644
--- a/src/netspresso_trainer/dataloaders/builder.py
+++ b/src/netspresso_trainer/dataloaders/builder.py
@@ -1,8 +1,11 @@
 import logging
 import os
+from functools import partial
 from pathlib import Path
 from typing import Dict, List, Optional, Type, Union
 
+from .augmentation.registry import TRANSFORM_DICT
+from .classification import classification_mix_collate_fn, classification_onehot_collate_fn
 from .detection import detection_collate_fn
 from .registry import CREATE_TRANSFORM, CUSTOM_DATASET, DATA_SAMPLER, HUGGINGFACE_DATASET
 from .utils.loader import create_loader
@@ -19,11 +22,10 @@ def build_dataset(conf_data, conf_augmentation, task: str, model_name: str):
 
     task = conf_data.task
 
-    assert task in CREATE_TRANSFORM, f"The given task `{task}` is not supported!"
     assert task in DATA_SAMPLER, f"Data sampler for {task} is not yet supported!"
 
-    train_transform = CREATE_TRANSFORM[task](model_name, is_training=True)
-    target_transform = CREATE_TRANSFORM[task](model_name, is_training=False)
+    train_transform = CREATE_TRANSFORM(model_name, is_training=True)
+    target_transform = CREATE_TRANSFORM(model_name, is_training=False)
 
     data_format = conf_data.format
 
@@ -101,7 +103,25 @@ def build_dataset(conf_data, conf_augmentation, task: str, model_name: str):
 def build_dataloader(conf, task: str, model_name: str, train_dataset, eval_dataset, profile=False):
 
     if task == 'classification':
-        collate_fn = None
+        conf_mix_transform = getattr(conf.augmentation, 'mix_transforms', None)
+        if conf_mix_transform:
+            mix_transforms = []
+            for mix_transform_conf in conf.augmentation.mix_transforms:
+                name = mix_transform_conf.name.lower()
+
+                mix_kwargs = list(mix_transform_conf.keys())
+                mix_kwargs.remove('name')
+                mix_kwargs = {k:mix_transform_conf[k] for k in mix_kwargs}
+                mix_kwargs['num_classes'] = train_dataset.num_classes
+
+                transform = TRANSFORM_DICT[name](**mix_kwargs)
+                mix_transforms.append(transform)
+
+            train_collate_fn = partial(classification_mix_collate_fn, mix_transforms=mix_transforms)
+            eval_collate_fn = partial(classification_onehot_collate_fn, num_classes=train_dataset.num_classes)
+        else:
+            train_collate_fn = None
+            eval_collate_fn = None
 
         train_loader = create_loader(
             train_dataset,
@@ -112,7 +132,7 @@ def build_dataloader(conf, task: str, model_name: str, train_dataset, eval_datas
             is_training=True,
             num_workers=conf.environment.num_workers if not profile else 1,
             distributed=conf.distributed,
-            collate_fn=collate_fn,
+            collate_fn=train_collate_fn,
             pin_memory=False,
             world_size=conf.world_size,
             rank=conf.rank,
@@ -128,7 +148,7 @@ def build_dataloader(conf, task: str, model_name: str, train_dataset, eval_datas
             is_training=False,
             num_workers=conf.environment.num_workers if not profile else 1,
             distributed=conf.distributed,
-            collate_fn=None,
+            collate_fn=eval_collate_fn,
             pin_memory=False,
             world_size=conf.world_size,
             rank=conf.rank,
diff --git a/src/netspresso_trainer/dataloaders/classification/__init__.py b/src/netspresso_trainer/dataloaders/classification/__init__.py
index b297f1a0..8618218e 100644
--- a/src/netspresso_trainer/dataloaders/classification/__init__.py
+++ b/src/netspresso_trainer/dataloaders/classification/__init__.py
@@ -1,4 +1,3 @@
-from .dataset import ClassficationDataSampler
+from .dataset import ClassficationDataSampler, classification_mix_collate_fn, classification_onehot_collate_fn
 from .huggingface import ClassificationHFDataset
 from .local import ClassificationCustomDataset
-from .transforms import create_transform_classification
diff --git a/src/netspresso_trainer/dataloaders/classification/dataset.py b/src/netspresso_trainer/dataloaders/classification/dataset.py
index cd42daa2..298aeb0d 100644
--- a/src/netspresso_trainer/dataloaders/classification/dataset.py
+++ b/src/netspresso_trainer/dataloaders/classification/dataset.py
@@ -1,5 +1,6 @@
 import csv
 import logging
+import random
 from collections import Counter
 from itertools import chain
 from pathlib import Path
@@ -7,6 +8,7 @@
 
 import torch
 from omegaconf import DictConfig
+from torch.nn import functional as F
 from torch.utils.data import random_split
 
 from ..base import BaseDataSampler
@@ -16,21 +18,21 @@
 logger = logging.getLogger("netspresso_trainer")
 
 VALID_IMG_EXTENSIONS = IMG_EXTENSIONS + tuple((x.upper() for x in IMG_EXTENSIONS))
-    
+
 def load_class_map_with_id_mapping(root_dir, train_dir,
                                    map_or_filename: Optional[Union[str, Path]]=None,
                                    id_mapping: Optional[Dict[str, str]]=None):
 
     if map_or_filename is None:  # may be labeled with directory
-        # dir -> 
+        # dir ->
         dir_list = [x.name for x in Path(train_dir).iterdir() if x.is_dir()]
         dir_to_class = id_mapping if id_mapping is not None else {k: k for k in dir_list}  # id_mapping or identity
-        
+
         class_list = [dir_to_class[dir] for dir in dir_list]
         class_list = sorted(class_list, key=lambda k: natural_key(k))
         _class_to_idx = {class_name: class_idx for class_idx, class_name in enumerate(class_list)}
         idx_to_class = {v: k for k, v in _class_to_idx.items()}
-        
+
         file_or_dir_to_idx = {dir: _class_to_idx[dir_to_class[dir]] for dir in dir_list}  # dir -> idx
         return file_or_dir_to_idx, idx_to_class
 
@@ -45,9 +47,9 @@ def load_class_map_with_id_mapping(root_dir, train_dir,
         reader = csv.DictReader(csvfile)
         file_class_list = [{column: str(row[column]).strip() for column in ['image_id', 'class']}
                            for row in reader]
-    
+
     class_stats = Counter([x['class'] for x in file_class_list])
-    
+
     _class_to_idx = {class_name: class_idx
                     for class_idx, class_name in enumerate(sorted(class_stats, key=lambda k: natural_key(k)))}
     idx_to_class = {v: k for k, v in _class_to_idx.items()}
@@ -62,26 +64,59 @@ def is_file_dict(image_dir: Union[Path, str], file_or_dir_to_idx):
     file_or_dir: Path = image_dir / candidate_name
     if file_or_dir.exists():
         return file_or_dir.is_file()
-    
+
     file_candidates = list(image_dir.glob(f"{candidate_name}.*"))
     assert len(file_candidates) != 0, f"Unknown label format! Is there any something file like {file_or_dir} ?"
-    
+
     return True
 
+
+def classification_mix_collate_fn(original_batch, mix_transforms):
+    images = []
+    target = []
+    for data_sample in original_batch:
+        images.append(data_sample[0])
+        target.append(data_sample[1])
+
+    images = torch.stack(images, dim=0)
+    target = torch.tensor(target, dtype=torch.long)
+
+    _mix_transform = random.choice(mix_transforms)
+    images, target = _mix_transform(images, target)
+
+    outputs = (images, target)
+    return outputs
+
+
+def classification_onehot_collate_fn(original_batch, num_classes):
+    images = []
+    target = []
+    for data_sample in original_batch:
+        images.append(data_sample[0])
+        target.append(data_sample[1])
+
+    images = torch.stack(images, dim=0)
+    target = torch.tensor(target, dtype=torch.long)
+    target = F.one_hot(target, num_classes=num_classes).to(dtype=images.dtype)
+
+    outputs = (images, target)
+    return outputs
+
+
 class ClassficationDataSampler(BaseDataSampler):
     def __init__(self, conf_data, train_valid_split_ratio):
         super(ClassficationDataSampler, self).__init__(conf_data, train_valid_split_ratio)
-    
+
     def load_data(self, file_or_dir_to_idx, split='train'):
         data_root = Path(self.conf_data.path.root)
         split_dir = self.conf_data.path[split]
         image_dir: Path = data_root / split_dir.image
-        
+
         images_and_targets: List[Dict[str, Optional[Union[str, int]]]] = []
-        
+
         assert split in ['train', 'valid', 'test'], f"split should be either {['train', 'valid', 'test']}"
         if split in ['train', 'valid']:
-            
+
             if is_file_dict(image_dir, file_or_dir_to_idx):
                 file_to_idx = file_or_dir_to_idx
                 for file in chain(image_dir.glob(f'*{ext}') for ext in VALID_IMG_EXTENSIONS):
@@ -92,7 +127,7 @@ def load_data(self, file_or_dir_to_idx, split='train'):
                         images_and_targets.append({'image': str(file), 'label': file_to_idx[file.stem]})
                         continue
                     logger.debug(f"Found file wihtout label: {file}")
-            
+
             else:
                 dir_to_idx = file_or_dir_to_idx
                 for dir_name, dir_idx in dir_to_idx.items():
@@ -103,24 +138,24 @@ def load_data(self, file_or_dir_to_idx, split='train'):
         else:  # split == test
             for ext in VALID_IMG_EXTENSIONS:
                 images_and_targets.extend([{'image': str(file), 'label': None} for file in image_dir.glob(f'*{ext}')])
-            
+
 
         images_and_targets = sorted(images_and_targets, key=lambda k: natural_key(k['image']))
         return images_and_targets
-        
+
     def load_samples(self):
         assert self.conf_data.path.train.image is not None
         root_dir = Path(self.conf_data.path.root)
         train_dir = root_dir / self.conf_data.path.train.image
         id_mapping: Optional[dict] = dict(self.conf_data.id_mapping) if self.conf_data.id_mapping is not None else None
         file_or_dir_to_idx, idx_to_class = load_class_map_with_id_mapping(root_dir, train_dir, map_or_filename=self.conf_data.path.train.label, id_mapping=id_mapping)
-        
+
         exists_valid = self.conf_data.path.valid.image is not None
         exists_test = self.conf_data.path.test.image is not None
-        
+
         valid_samples = None
         test_samples = None
-        
+
         train_samples = self.load_data(file_or_dir_to_idx, split='train')
         if exists_valid:
             valid_samples = self.load_data(file_or_dir_to_idx, split='valid')
@@ -128,16 +163,16 @@ def load_samples(self):
             test_samples = self.load_data(file_or_dir_to_idx, split='test')
 
         if not exists_valid:
-            num_train_splitted = int(len(train_samples) * self.train_valid_split_ratio) 
+            num_train_splitted = int(len(train_samples) * self.train_valid_split_ratio)
             train_samples, valid_samples = \
                 random_split(train_samples, [num_train_splitted, len(train_samples) - num_train_splitted],
                                 generator=torch.Generator().manual_seed(42))
-        
+
         return train_samples, valid_samples, test_samples, {'idx_to_class': idx_to_class}
-    
+
     def load_huggingface_samples(self):
         from datasets import ClassLabel, load_dataset
-        
+
         cache_dir = self.conf_data.metadata.custom_cache_dir
         root = self.conf_data.metadata.repo
         subset_name = self.conf_data.metadata.subset
@@ -145,23 +180,23 @@ def load_huggingface_samples(self):
             cache_dir = Path(cache_dir)
             Path(cache_dir).mkdir(exist_ok=True, parents=True)
         total_dataset = load_dataset(root, name=subset_name, cache_dir=cache_dir)
-        
+
         label_feature_name = self.conf_data.metadata.features.label
         label_feature = total_dataset['train'].features[label_feature_name]
         if isinstance(label_feature, ClassLabel):
             labels: List[str] = label_feature.names
         else:
             labels = list({sample[label_feature_name] for sample in total_dataset['train']})
-            
+
         if isinstance(labels[0], int):
             # TODO: find class_map <-> idx and apply it (ex. using id_mapping)
             idx_to_class: Dict[int, int] = {k: k for k in labels}
         elif isinstance(labels[0], str):
             idx_to_class: Dict[int, str] = dict(enumerate(labels))
-        
+
         exists_valid = 'validation' in total_dataset
         exists_test = 'test' in total_dataset
-        
+
         train_samples = total_dataset['train']
         valid_samples = None
         if exists_valid:
@@ -174,4 +209,4 @@ def load_huggingface_samples(self):
             splitted_datasets = train_samples.train_test_split(test_size=(1 - self.train_valid_split_ratio))
             train_samples = splitted_datasets['train']
             valid_samples = splitted_datasets['test']
-        return train_samples, valid_samples, test_samples, {'idx_to_class': idx_to_class}
\ No newline at end of file
+        return train_samples, valid_samples, test_samples, {'idx_to_class': idx_to_class}
diff --git a/src/netspresso_trainer/dataloaders/classification/transforms.py b/src/netspresso_trainer/dataloaders/classification/transforms.py
deleted file mode 100644
index 9ac5f000..00000000
--- a/src/netspresso_trainer/dataloaders/classification/transforms.py
+++ /dev/null
@@ -1,33 +0,0 @@
-from typing import Optional
-
-from torchvision.transforms.functional import InterpolationMode
-
-from ..augmentation import custom as TC
-from ..utils.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
-
-
-def transforms_custom_train(conf_augmentation):
-    assert conf_augmentation.img_size > 32
-    primary_tfl = [TC.RandomResizedCrop(conf_augmentation.img_size, interpolation=InterpolationMode.BILINEAR),
-                   TC.RandomHorizontalFlip(p=conf_augmentation.fliplr)
-                   ]
-    preprocess = [
-        TC.ToTensor(),
-        TC.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD)
-    ]
-    return TC.Compose(primary_tfl + preprocess)
-
-
-def transforms_custom_eval(conf_augmentation):
-    assert conf_augmentation.img_size > 32
-    preprocess = [
-        TC.Resize((conf_augmentation.img_size, conf_augmentation.img_size),
-                  interpolation=InterpolationMode.BILINEAR),
-        TC.ToTensor(),
-        TC.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD)
-    ]
-    return TC.Compose(preprocess)
-
-
-def create_transform_classification(model_name: str, is_training=False):
-    return transforms_custom_train if is_training else transforms_custom_eval
diff --git a/src/netspresso_trainer/dataloaders/detection/__init__.py b/src/netspresso_trainer/dataloaders/detection/__init__.py
index 9234262e..38587950 100644
--- a/src/netspresso_trainer/dataloaders/detection/__init__.py
+++ b/src/netspresso_trainer/dataloaders/detection/__init__.py
@@ -1,3 +1,2 @@
 from .dataset import DetectionDataSampler, detection_collate_fn
 from .local import DetectionCustomDataset
-from .transforms import create_transform_detection
diff --git a/src/netspresso_trainer/dataloaders/detection/dataset.py b/src/netspresso_trainer/dataloaders/detection/dataset.py
index fd6dbcf2..76984375 100644
--- a/src/netspresso_trainer/dataloaders/detection/dataset.py
+++ b/src/netspresso_trainer/dataloaders/detection/dataset.py
@@ -49,7 +49,7 @@ def detection_collate_fn(original_batch):
 class DetectionDataSampler(BaseDataSampler):
     def __init__(self, conf_data, train_valid_split_ratio):
         super(DetectionDataSampler, self).__init__(conf_data, train_valid_split_ratio)
-    
+
     def load_data(self, split='train'):
         data_root = Path(self.conf_data.path.root)
         split_dir = self.conf_data.path[split]
@@ -71,7 +71,7 @@ def load_data(self, split='train'):
             images = sorted(images, key=lambda k: natural_key(k))
             labels = sorted(labels, key=lambda k: natural_key(k))
             images_and_targets.extend([{'image': str(image), 'label': str(label)} for image, label in zip(images, labels)])
-            
+
         elif split == 'test':
             for ext in IMG_EXTENSIONS:
                 images_and_targets.extend([{'image': str(file), 'label': None}
@@ -79,21 +79,21 @@ def load_data(self, split='train'):
             images_and_targets = sorted(images_and_targets, key=lambda k: natural_key(k['image']))
         else:
             raise AssertionError(f"split should be either {['train', 'valid', 'test']}")
-        
+
         return images_and_targets
-        
+
     def load_samples(self):
         assert self.conf_data.path.train.image is not None
         assert self.conf_data.id_mapping is not None
         id_mapping: Optional[list] = list(self.conf_data.id_mapping)
         idx_to_class = load_custom_class_map(id_mapping=id_mapping)
-        
+
         exists_valid = self.conf_data.path.valid.image is not None
         exists_test = self.conf_data.path.test.image is not None
-        
+
         valid_samples = None
         test_samples = None
-        
+
         train_samples = self.load_data(split='train')
         if exists_valid:
             valid_samples = self.load_data(split='valid')
@@ -101,12 +101,12 @@ def load_samples(self):
             test_samples = self.load_data(split='test')
 
         if not exists_valid:
-            num_train_splitted = int(len(train_samples) * self.train_valid_split_ratio) 
+            num_train_splitted = int(len(train_samples) * self.train_valid_split_ratio)
             train_samples, valid_samples = \
                 random_split(train_samples, [num_train_splitted, len(train_samples) - num_train_splitted],
                                 generator=torch.Generator().manual_seed(42))
-        
+
         return train_samples, valid_samples, test_samples, {'idx_to_class': idx_to_class}
-    
+
     def load_huggingface_samples(self):
-        raise NotImplementedError
\ No newline at end of file
+        raise NotImplementedError
diff --git a/src/netspresso_trainer/dataloaders/detection/local.py b/src/netspresso_trainer/dataloaders/detection/local.py
index 2ba29762..3dbbe842 100644
--- a/src/netspresso_trainer/dataloaders/detection/local.py
+++ b/src/netspresso_trainer/dataloaders/detection/local.py
@@ -23,13 +23,13 @@ def exist_name(candidate, folder_iterable):
 
 def get_label(label_file: Path):
     target = Path(label_file).read_text()
-    
+
     try:
         target_array = np.array([list(map(float, box.split(' '))) for box in target.split('\n') if box.strip()])
     except ValueError as e:
         print(target)
         raise e
-        
+
     label, boxes = target_array[:, 0], target_array[:, 1:]
     label = label[..., np.newaxis]
     return label, boxes
@@ -43,7 +43,7 @@ def __init__(self, conf_data, conf_augmentation, model_name, idx_to_class,
             conf_data, conf_augmentation, model_name, idx_to_class,
             split, samples, transform, with_label, **kwargs
         )
-    
+
     @staticmethod
     def xywhn2xyxy(original: np.ndarray, w: int, h: int, padw=0, padh=0):
         converted = original.copy()
@@ -67,12 +67,12 @@ def __getitem__(self, index):
         if ann_path is None:
             out = self.transform(self.conf_augmentation)(image=img)
             return {'pixel_values': out['image'], 'name': img_path.name, 'org_img': org_img, 'org_shape': (h, w)}
-        
+
         outputs = {}
 
         label, boxes_yolo = get_label(Path(ann_path))
         boxes = self.xywhn2xyxy(boxes_yolo, w, h)
-        
+
         out = self.transform(self.conf_augmentation)(image=img, bbox=np.concatenate((boxes, label), axis=-1))
         assert out['bbox'].shape[-1] == 5  # ltrb + class_label
         outputs.update({'pixel_values': out['image'], 'bbox': out['bbox'][..., :4],
@@ -83,6 +83,6 @@ def __getitem__(self, index):
             return outputs
 
         assert self._split in ['val', 'valid', 'test']
-        # outputs.update({'org_img': org_img, 'org_shape': (h, w)})  # TODO: return org_img with batch_size > 1 
+        # outputs.update({'org_img': org_img, 'org_shape': (h, w)})  # TODO: return org_img with batch_size > 1
         outputs.update({'org_shape': (h, w)})
-        return outputs
\ No newline at end of file
+        return outputs
diff --git a/src/netspresso_trainer/dataloaders/detection/transforms.py b/src/netspresso_trainer/dataloaders/detection/transforms.py
deleted file mode 100644
index ac3090a5..00000000
--- a/src/netspresso_trainer/dataloaders/detection/transforms.py
+++ /dev/null
@@ -1,40 +0,0 @@
-from typing import Optional
-
-import cv2
-import numpy as np
-import PIL.Image as Image
-
-from ..augmentation import custom as TC
-from ..utils.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
-
-
-def train_transforms_efficientformer(conf_augmentation):
-    
-    crop_size_h = conf_augmentation.crop_size_h
-    crop_size_w = conf_augmentation.crop_size_w
-    
-    train_transforms_composed = TC.Compose([
-        TC.Resize(size=(crop_size_h, crop_size_w)),
-        TC.ToTensor(),
-        TC.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD)
-    ])
-
-    return train_transforms_composed
-
-def val_transforms_efficientformer(conf_augmentation):
-    
-    crop_size_h = conf_augmentation.crop_size_h
-    crop_size_w = conf_augmentation.crop_size_w
-    
-    val_transforms_composed = TC.Compose([
-        TC.Resize(size=(crop_size_h, crop_size_w)),
-        TC.ToTensor(),
-        TC.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD)
-    ])
-
-    return val_transforms_composed
-
-def create_transform_detection(model_name: str, is_training=False):
-    if is_training:
-        return train_transforms_efficientformer
-    return val_transforms_efficientformer
diff --git a/src/netspresso_trainer/dataloaders/registry.py b/src/netspresso_trainer/dataloaders/registry.py
index bc71e992..7ff3cc42 100644
--- a/src/netspresso_trainer/dataloaders/registry.py
+++ b/src/netspresso_trainer/dataloaders/registry.py
@@ -1,26 +1,20 @@
 from typing import Callable, Dict, Type
 
-from .augmentation import custom as TC
+from .augmentation.transforms import create_transform
 from .base import BaseCustomDataset, BaseDataSampler, BaseHFDataset
 from .classification import (
     ClassficationDataSampler,
     ClassificationCustomDataset,
     ClassificationHFDataset,
-    create_transform_classification,
 )
-from .detection import DetectionCustomDataset, DetectionDataSampler, create_transform_detection
+from .detection import DetectionCustomDataset, DetectionDataSampler
 from .segmentation import (
     SegmentationCustomDataset,
     SegmentationDataSampler,
     SegmentationHFDataset,
-    create_transform_segmentation,
 )
 
-CREATE_TRANSFORM: Dict[str, Callable[..., Callable[..., TC.Compose]]] = {
-    'classification': create_transform_classification,
-    'segmentation': create_transform_segmentation,
-    'detection': create_transform_detection
-}
+CREATE_TRANSFORM = create_transform
 
 CUSTOM_DATASET: Dict[str, Type[BaseCustomDataset]] = {
     'classification': ClassificationCustomDataset,
@@ -37,4 +31,4 @@
     'classification': ClassficationDataSampler,
     'segmentation': SegmentationDataSampler,
     'detection': DetectionDataSampler
-}
\ No newline at end of file
+}
diff --git a/src/netspresso_trainer/dataloaders/segmentation/__init__.py b/src/netspresso_trainer/dataloaders/segmentation/__init__.py
index efed9d1d..9d73a030 100644
--- a/src/netspresso_trainer/dataloaders/segmentation/__init__.py
+++ b/src/netspresso_trainer/dataloaders/segmentation/__init__.py
@@ -1,4 +1,3 @@
 from .dataset import SegmentationDataSampler
 from .huggingface import SegmentationHFDataset
 from .local import SegmentationCustomDataset
-from .transforms import create_transform_segmentation
diff --git a/src/netspresso_trainer/dataloaders/segmentation/huggingface.py b/src/netspresso_trainer/dataloaders/segmentation/huggingface.py
index 8c494307..eeb6afc6 100644
--- a/src/netspresso_trainer/dataloaders/segmentation/huggingface.py
+++ b/src/netspresso_trainer/dataloaders/segmentation/huggingface.py
@@ -3,8 +3,8 @@
 import numpy as np
 import PIL.Image as Image
 
+from ..augmentation.transforms import generate_edge, reduce_label
 from ..base import BaseHFDataset
-from ..segmentation.transforms import generate_edge, reduce_label
 
 
 class SegmentationHFDataset(BaseHFDataset):
diff --git a/src/netspresso_trainer/dataloaders/segmentation/local.py b/src/netspresso_trainer/dataloaders/segmentation/local.py
index 1ab8d305..b39bd8b1 100644
--- a/src/netspresso_trainer/dataloaders/segmentation/local.py
+++ b/src/netspresso_trainer/dataloaders/segmentation/local.py
@@ -5,8 +5,8 @@
 import numpy as np
 import PIL.Image as Image
 
+from ..augmentation.transforms import generate_edge, reduce_label
 from ..base import BaseCustomDataset
-from ..segmentation.transforms import generate_edge, reduce_label
 
 
 class SegmentationCustomDataset(BaseCustomDataset):
@@ -51,7 +51,7 @@ def __getitem__(self, index):
 
         mask = Image.fromarray(mask, mode='L')  # single mode array (PIL.Image) compatbile with torchvision transform API
 
-        if self.model_name == 'pidnet':
+        if 'pidnet' in self.model_name:
             edge = generate_edge(np.array(mask))
             out = self.transform(self.conf_augmentation)(image=img, mask=mask, edge=edge)
             outputs.update({'pixel_values': out['image'], 'labels': out['mask'], 'edges': out['edge'].float(), 'name': img_path.name})
diff --git a/src/netspresso_trainer/dataloaders/segmentation/transforms.py b/src/netspresso_trainer/dataloaders/segmentation/transforms.py
deleted file mode 100644
index d4aa506c..00000000
--- a/src/netspresso_trainer/dataloaders/segmentation/transforms.py
+++ /dev/null
@@ -1,119 +0,0 @@
-from typing import Optional
-
-import cv2
-import numpy as np
-import PIL.Image as Image
-
-from ..augmentation import custom as TC
-from ..utils.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
-
-EDGE_SIZE = 4
-Y_K_SIZE = 6
-X_K_SIZE = 6
-
-
-def reduce_label(label: np.ndarray) -> Image.Image:
-    label[label == 0] = 255
-    label = label - 1
-    label[label == 254] = 255
-    return Image.fromarray(label)
-
-def generate_edge(label: np.ndarray) -> Image.Image:
-    edge = cv2.Canny(label, 0.1, 0.2)
-    kernel = np.ones((EDGE_SIZE, EDGE_SIZE), np.uint8)
-    # edge_pad == True
-    edge = edge[Y_K_SIZE:-Y_K_SIZE, X_K_SIZE:-X_K_SIZE]
-    edge = np.pad(edge, ((Y_K_SIZE, Y_K_SIZE), (X_K_SIZE, X_K_SIZE)), mode='constant')
-    edge = (cv2.dilate(edge, kernel, iterations=1) > 50) * 1.0
-    return Image.fromarray((edge.copy() * 255).astype(np.uint8))
-
-
-def train_transforms_segmentation(conf_augmentation):
-
-    crop_size_h = conf_augmentation.crop_size_h
-    crop_size_w = conf_augmentation.crop_size_w
-
-    scale_ratio = (conf_augmentation.resize_ratio0, conf_augmentation.resize_ratiof)
-    
-    train_transforms_composed = TC.Compose([
-        TC.RandomResizedCrop((crop_size_h, crop_size_w), scale=scale_ratio, ratio=(1.0, 1.0)),
-        TC.RandomHorizontalFlip(p=conf_augmentation.fliplr),
-        TC.ColorJitter(brightness=conf_augmentation.color_jitter.brightness,
-                       contrast=conf_augmentation.color_jitter.contrast,
-                       saturation=conf_augmentation.color_jitter.saturation,
-                       hue=conf_augmentation.color_jitter.hue,
-                       p=conf_augmentation.color_jitter.colorjitter_p),
-        TC.ToTensor(),
-        TC.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD)
-    ])
-
-    return train_transforms_composed
-
-def val_transforms_segmentation(conf_augmentation):
-
-    crop_size_h = conf_augmentation.crop_size_h
-    crop_size_w = conf_augmentation.crop_size_w
-
-    val_transforms_composed = TC.Compose([
-        TC.Resize((crop_size_h, crop_size_w)),
-        TC.ToTensor(),
-        TC.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD)
-    ])
-
-    return val_transforms_composed
-
-
-def infer_transforms_segmentation(conf_augmentation):
-    return
-
-
-def train_transforms_pidnet(conf_augmentation):
-
-    crop_size_h = conf_augmentation.crop_size_h
-    crop_size_w = conf_augmentation.crop_size_w
-
-    scale_ratio = (conf_augmentation.resize_ratio0, conf_augmentation.resize_ratiof)
-
-    train_transforms_composed = TC.Compose(
-        [
-            TC.RandomResizedCrop((crop_size_h, crop_size_w), scale=scale_ratio, ratio=(1.0, 1.0)),
-            TC.RandomHorizontalFlip(p=conf_augmentation.fliplr),
-            TC.ToTensor(),
-            TC.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD)
-        ],
-        additional_targets={'edge': 'mask'}
-    )
-    
-    return train_transforms_composed
-
-
-def val_transforms_pidnet(conf_augmentation):
-
-    crop_size_h = conf_augmentation.crop_size_h
-    crop_size_w = conf_augmentation.crop_size_w
-
-    val_transforms_composed = TC.Compose(
-        [
-            TC.Resize((crop_size_h, crop_size_w)),
-            TC.ToTensor(),
-            TC.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD)
-        ],
-        additional_targets={'edge': 'mask'}
-    )
-
-    return val_transforms_composed
-
-
-def infer_transforms_pidnet(conf_augmentation):
-    return
-
-
-def create_transform_segmentation(model_name: str, is_training=False):
-
-    if model_name == 'pidnet':
-        if is_training:
-            return train_transforms_pidnet
-        return val_transforms_pidnet
-    if is_training:
-        return train_transforms_segmentation
-    return val_transforms_segmentation
diff --git a/src/netspresso_trainer/dataloaders/utils/constants.py b/src/netspresso_trainer/dataloaders/utils/constants.py
index b017eb44..91d73f29 100644
--- a/src/netspresso_trainer/dataloaders/utils/constants.py
+++ b/src/netspresso_trainer/dataloaders/utils/constants.py
@@ -1,4 +1,4 @@
-DEFAULT_CROP_PCT = 0.95 #0.875 
+DEFAULT_CROP_PCT = 0.95 #0.875
 IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
 IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
 IMAGENET_INCEPTION_MEAN = (0.5, 0.5, 0.5)
diff --git a/src/netspresso_trainer/dataloaders/utils/misc.py b/src/netspresso_trainer/dataloaders/utils/misc.py
index 0fab867f..3fb73621 100644
--- a/src/netspresso_trainer/dataloaders/utils/misc.py
+++ b/src/netspresso_trainer/dataloaders/utils/misc.py
@@ -19,4 +19,4 @@ def expand_to_chs(x, n):
 
 def natural_key(string_):
     """See http://www.codinghorror.com/blog/archives/001018.html"""
-    return [int(s) if s.isdigit() else s for s in re.split(r'(\d+)', string_.lower())]
\ No newline at end of file
+    return [int(s) if s.isdigit() else s for s in re.split(r'(\d+)', string_.lower())]
diff --git a/src/netspresso_trainer/loggers/base.py b/src/netspresso_trainer/loggers/base.py
index dffa31d6..6dd321da 100644
--- a/src/netspresso_trainer/loggers/base.py
+++ b/src/netspresso_trainer/loggers/base.py
@@ -13,41 +13,41 @@ def __init__(self, model, result_dir):
         self.model = model
         self.csv_path = Path(result_dir) / CSV_FILENAME
         self.header: List = []
-        
+
         self._temp_row_dict = {}
-        
+
         if self.csv_path.exists():
             self.csv_path.unlink()
-            
+
         self._epoch = None
-        
+
     @property
     @abstractmethod
     def key_map(self) -> Dict[str, str]:
         raise NotImplementedError
-    
+
     def init_epoch(self):
         self._epoch = 0
-        
+
     @property
     def epoch(self):
         return self._epoch
-    
+
     @epoch.setter
     def epoch(self, value: int) -> None:
         self._epoch = int(value)
-        
+
     def update_header(self, header: List):
         assert len(header) != 0
         self.header = header
-        
+
         with open(self.csv_path, 'a') as f:
             f.write(",".join(self.header))
             f.write("\n")
 
     def _clear_temp(self):
         self._temp_row_dict = {}
-    
+
     def _update_with_list(self, data: List):
         if data is not None and len(data) != 0:
             with open(self.csv_path, 'a') as f:
@@ -55,18 +55,18 @@ def _update_with_list(self, data: List):
                 f.write("\n")
         self._clear_temp()
         return
-    
+
     def _update_specific(self, data: Dict):
         for _key, _value in data.items():
             if _key not in self.header:
                 raise AssertionError(f"The given key ({_key}) is not in {self.header}!")
             if _key not in self._temp_row_dict:
                 self._temp_row_dict[_key] = _value
-        
+
         if set(self.header) == set(self._temp_row_dict.keys()):
             self._update_with_list([self._temp_row_dict[_col] for _col in self.header])
         return
-            
+
     def update(self, data=None, **kwargs):
         if isinstance(data, List):
             return self._update_with_list(data)
@@ -74,9 +74,9 @@ def update(self, data=None, **kwargs):
             return self._update_specific(data)
         # if isinstance(data, type(None)):
         #     return self._update_specific(kwargs)
-        
+
         raise AssertionError(f"Type of data should be either List or Dict! Current: {type(data)}")
-    
+
     def _convert_as_csv_record(self, scalar_dict: Dict, prefix: Literal['train', 'valid'] = 'train'):
         converted_dict = {}
         for k, v in scalar_dict.items():
@@ -84,25 +84,25 @@ def _convert_as_csv_record(self, scalar_dict: Dict, prefix: Literal['train', 'va
                 continue
             record_key = self.key_map[f"{prefix}/{k}"]
             assert record_key in self.header, f"{record_key} not in {self.header}"
-            
+
             converted_dict.update({record_key: v})
         return converted_dict
-    
+
     def __call__(self, train_losses, train_metrics, valid_losses=None, valid_metrics=None):
         assert len(self.header) != 0
         assert len(self.key_map) != 0
-        
+
         csv_record_dict = {'epoch': self._epoch}
         converted_train_losses = self._convert_as_csv_record(train_losses, prefix='train')
         converted_train_metrics = self._convert_as_csv_record(train_metrics, prefix='train')
         csv_record_dict.update(converted_train_losses)
         csv_record_dict.update(converted_train_metrics)
-        
+
         if valid_losses is not None:
             converted_valid_losses = self._convert_as_csv_record(valid_losses, prefix='valid')
             csv_record_dict.update(converted_valid_losses)
         if valid_metrics is not None:
             converted_valid_metrics = self._convert_as_csv_record(valid_metrics, prefix='valid')
             csv_record_dict.update(converted_valid_metrics)
-        
+
         self.update(csv_record_dict)
diff --git a/src/netspresso_trainer/loggers/builder.py b/src/netspresso_trainer/loggers/builder.py
index 912044bb..63f60253 100644
--- a/src/netspresso_trainer/loggers/builder.py
+++ b/src/netspresso_trainer/loggers/builder.py
@@ -62,16 +62,16 @@ def __init__(
                               step_per_epoch=step_per_epoch, num_sample_images=num_sample_images) if self.use_tensorboard else None
         self.stdout_logger: Optional[StdOutLogger] = \
             StdOutLogger(task=task, model=model, total_epochs=conf.training.epochs) if self.use_stdout else None
-            
+
         self.netspresso_api_client = None
         if self.use_netspresso:
             from loggers.netspresso import ModelSearchServerHandler
             self.netspresso_api_client: Optional[ModelSearchServerHandler] = ModelSearchServerHandler(task=task, model=model)
-        
+
         if task in VISUALIZER:
             pallete = conf.data.pallete if 'pallete' in conf.data else None
             self.label_converter = VISUALIZER[task](class_map=class_map, pallete=pallete)
-            
+
     @property
     def result_dir(self):
         return self._result_dir
@@ -117,7 +117,7 @@ def _convert_imagedict_as_readable(self, images_dict: Dict):
         for k, v in images_dict.items():
             if k == 'images':
                 continue
-            
+
             # target, pred, bg_gt
             v = v[:self.num_sample_images]
             v_new: np.ndarray = magic_image_handler(
diff --git a/src/netspresso_trainer/loggers/csv.py b/src/netspresso_trainer/loggers/csv.py
index 943acb29..d82f629a 100644
--- a/src/netspresso_trainer/loggers/csv.py
+++ b/src/netspresso_trainer/loggers/csv.py
@@ -6,7 +6,7 @@ class ClassificationCSVLogger(BaseCSVLogger):
     def __init__(self, model, result_dir):
         super(ClassificationCSVLogger, self).__init__(model, result_dir)
         self.update_header(self.csv_header)
-        
+
         self._key_map = {
             'epoch': 'epoch',
             'train/total': 'train_loss',
@@ -14,7 +14,7 @@ def __init__(self, model, result_dir):
             'train/Acc@1': 'train_accuracy',
             'valid/Acc@1': 'valid_accuracy',
         }
-    
+
     @property
     def key_map(self):
         return self._key_map
@@ -24,7 +24,7 @@ class SegmentationCSVLogger(BaseCSVLogger):
     def __init__(self, model, result_dir):
         super(SegmentationCSVLogger, self).__init__(model, result_dir)
         self.update_header(self.csv_header)
-        
+
         self._key_map = {
             'epoch': 'epoch',
             'train/total': 'train_loss',
@@ -35,4 +35,4 @@ def __init__(self, model, result_dir):
 
     @property
     def key_map(self):
-        return self._key_map
\ No newline at end of file
+        return self._key_map
diff --git a/src/netspresso_trainer/loggers/image.py b/src/netspresso_trainer/loggers/image.py
index 68f6c23e..cf9bb0e1 100644
--- a/src/netspresso_trainer/loggers/image.py
+++ b/src/netspresso_trainer/loggers/image.py
@@ -12,32 +12,32 @@ def __init__(self, model, result_dir) -> None:
         self.save_dir: Path = Path(result_dir) / "result_image"
         self.save_dir.mkdir(exist_ok=True)
         self._epoch = None
-    
+
     def init_epoch(self):
         self._epoch = 0
-        
+
     @property
     def epoch(self):
         return self._epoch
-    
+
     @epoch.setter
     def epoch(self, value: int) -> None:
         self._epoch = int(value)
-        
+
     def save_ndarray_as_image(self, image_array: np.ndarray, filename: Union[str, Path], dataformats: Literal['HWC', 'CHW'] = 'HWC'):
         assert image_array.ndim == 3
         if dataformats != 'HWC' and dataformats == 'CHW':
             image_array = image_array.transpose((1, 2, 0))
-        
+
         # HWC
         assert image_array.shape[-1] in [1, 3]
         Image.fromarray(image_array.astype(np.uint8)).save(filename)
         return True
-        
+
     def save_result(self, image_dict: Dict, prefix='train'):
         prefix_dir: Path = self.save_dir / prefix
         prefix_dir.mkdir(exist_ok=True)
-        
+
         for k, v in image_dict.items():
             assert isinstance(v, np.ndarray)
             assert v.ndim in [3, 4], \
@@ -53,5 +53,5 @@ def __call__(self, train_images=None, valid_images=None):
             self.save_result(train_images, prefix='train')
         if valid_images is not None:
             self.save_result(valid_images, prefix='valid')
-        
+
 
diff --git a/src/netspresso_trainer/loggers/netspresso.py b/src/netspresso_trainer/loggers/netspresso.py
index 8402c6f1..589a7503 100644
--- a/src/netspresso_trainer/loggers/netspresso.py
+++ b/src/netspresso_trainer/loggers/netspresso.py
@@ -7,7 +7,7 @@
 
 logger = logging.getLogger("netspresso_trainer")
 
-MONGODB_TEMP_URI = "" 
+MONGODB_TEMP_URI = ""
 
 
 class ModelSearchServerHandler:
@@ -19,27 +19,27 @@ def __init__(self, task, model, mongodb_uri: str=MONGODB_TEMP_URI) -> None:
             logger.debug("Pinged your deployment. You successfully connected to MongoDB!")
         except Exception as e:
             raise e
-        
+
         self._db = client['custom-training-board']['trainer-all-in-one']
         self._session_id = None
-        
+
         self._create_session(title=f"[{task}]{model}")
-    
-        
+
+
     def init_epoch(self):
         self._epoch = 0
-        
+
     @property
     def epoch(self):
         return self._epoch
-    
+
     @epoch.setter
     def epoch(self, value: int) -> None:
         self._epoch = int(value)
-    
+
     def _is_ready(self):
         return self._session_id is not None
-        
+
     def _append(self, scalar_dict, mode='train'):
         assert self._is_ready()
         meta_string = f"{mode}/" if mode is not None else ""
@@ -48,38 +48,38 @@ def _append(self, scalar_dict, mode='train'):
                     '$currentDate': {'lastModified': True }}
         result = self._db.update_one({'_id': self._session_id}, contents, upsert=True)
         return result
-        
+
     def _create_session(self, title: str ="test") -> ObjectId:
         example_document = { "title": title }
         document = self._db.insert_one(example_document)
         self._session_id = document.inserted_id
         return self._session_id
-    
+
     def create_session(self, title: str="test") -> ObjectId:
         return self._create_session(title=title)
-    
+
     def log_scalar(self, key, value, mode='train'):
         result = self._append({key: value}, mode=mode)
         return result
-        
+
     def log_scalars_with_dict(self, scalar_dict, mode='train'):
         result = self._append(scalar_dict, mode=mode)
         return result
-    
+
     def __call__(self,
             train_losses, train_metrics, valid_losses, valid_metrics,
             learning_rate, elapsed_time,
         ) -> None:
-        
+
         self.log_scalars_with_dict(train_losses, mode='train')
         self.log_scalars_with_dict(train_metrics, mode='train')
-        
+
         if valid_losses is not None:
             self.log_scalars_with_dict(valid_losses, mode='valid')
         if valid_metrics is not None:
             self.log_scalars_with_dict(valid_metrics, mode='valid')
-        
+
         if learning_rate is not None:
             self.log_scalar('learning_rate', learning_rate, mode='misc')
         if elapsed_time is not None:
-            self.log_scalar('elapsed_time', elapsed_time, mode='misc')
\ No newline at end of file
+            self.log_scalar('elapsed_time', elapsed_time, mode='misc')
diff --git a/src/netspresso_trainer/loggers/registry.py b/src/netspresso_trainer/loggers/registry.py
index 1ba8aad6..b8c48590 100644
--- a/src/netspresso_trainer/loggers/registry.py
+++ b/src/netspresso_trainer/loggers/registry.py
@@ -9,4 +9,4 @@
 VISUALIZER = {
     'segmentation': SegmentationVisualizer,
     'detection': DetectionVisualizer,
-}
\ No newline at end of file
+}
diff --git a/src/netspresso_trainer/loggers/stdout.py b/src/netspresso_trainer/loggers/stdout.py
index aa0e99ff..7e3d653d 100644
--- a/src/netspresso_trainer/loggers/stdout.py
+++ b/src/netspresso_trainer/loggers/stdout.py
@@ -11,21 +11,21 @@ def __init__(self, task, model, total_epochs=None) -> None:
         self.task = task
         self.model_name = model
         self.total_epochs = total_epochs if total_epochs is not None else "???"
-    
+
     def init_epoch(self):
         self._epoch = 0
-        
+
     @property
     def epoch(self):
         return self._epoch
-    
+
     @epoch.setter
     def epoch(self, value: int) -> None:
         self._epoch = int(value)
-    
+
     def __call__(self, train_losses, train_metrics, valid_losses, valid_metrics, learning_rate, elapsed_time):
         logger.info(f"Epoch: {self._epoch} / {self.total_epochs}")
-        
+
         if learning_rate is not None:
             logger.info(f"learning rate: {learning_rate:.7f}")
         if elapsed_time is not None:
diff --git a/src/netspresso_trainer/loggers/tensorboard.py b/src/netspresso_trainer/loggers/tensorboard.py
index 3ebc9c60..e905b0dd 100644
--- a/src/netspresso_trainer/loggers/tensorboard.py
+++ b/src/netspresso_trainer/loggers/tensorboard.py
@@ -70,14 +70,14 @@ def log_image(self, key, value: Union[np.ndarray, torch.Tensor], mode='train'):
     def log_images_with_dict(self, image_dict, mode='train'):
         for k, v in image_dict.items():
             self._log_image(k, v, mode)
-    
+
     def _get_rasterized_hparam(self, hparams):
         if not isinstance(hparams, dict):
             stem = hparams
             if not isinstance(hparams, (int, float, str, bool, torch.Tensor)):
                 return str(stem)
             return stem
-        
+
         rasterized_dict = {}
         for key, value in hparams.items():
             if isinstance(value, dict):
@@ -90,15 +90,15 @@ def _get_rasterized_hparam(self, hparams):
         return rasterized_dict
 
     def log_hparams(self, hp_omegaconf: Union[Dict, List], final_metrics=None):
-        
+
         if final_metrics is None:
             final_metrics = {}
         final_metrics = {f"hparams_metrics/{k}": v for k, v in final_metrics.items()}
-        
+
         hp_dict = OmegaConf.to_container(hp_omegaconf, resolve=True)
         hp_for_log = self._get_rasterized_hparam(hp_dict)
-        
-        exp, ssi, sei = hparams(hparam_dict=hp_for_log, metric_dict=final_metrics)   
+
+        exp, ssi, sei = hparams(hparam_dict=hp_for_log, metric_dict=final_metrics)
         self.tensorboard.file_writer.add_summary(exp)
         self.tensorboard.file_writer.add_summary(ssi)
         self.tensorboard.file_writer.add_summary(sei)
diff --git a/src/netspresso_trainer/loggers/visualizer.py b/src/netspresso_trainer/loggers/visualizer.py
index 3d11934b..349dcd68 100644
--- a/src/netspresso_trainer/loggers/visualizer.py
+++ b/src/netspresso_trainer/loggers/visualizer.py
@@ -55,7 +55,7 @@ def _convert(self, gray_image):
         return color_image
 
     def __call__(self, results: List[Tuple[np.ndarray, np.ndarray]], images=None):
-        
+
         return_images = []
         for image, result in zip(images, results):
             image = image.copy()
@@ -75,12 +75,12 @@ def __call__(self, results: List[Tuple[np.ndarray, np.ndarray]], images=None):
                 text_w, text_h = text_size
                 image = cv2.rectangle(image, (x1, y1-5-text_h), (x1+text_w, y1), color=color, thickness=-1)
                 image = cv2.putText(image, str(class_name), (x1, y1-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
-            
+
             return_images.append(image[np.newaxis, ...])
         return_images = np.concatenate(return_images, axis=0)
         return return_images
-        
-        
+
+
 class SegmentationVisualizer:
     def __init__(self, class_map, pallete=None):
         n = len(class_map)
diff --git a/src/netspresso_trainer/losses/classification/__init__.py b/src/netspresso_trainer/losses/classification/__init__.py
index f072e4cc..e69de29b 100644
--- a/src/netspresso_trainer/losses/classification/__init__.py
+++ b/src/netspresso_trainer/losses/classification/__init__.py
@@ -1,2 +0,0 @@
-from .label_smooth import LabelSmoothingCrossEntropy
-from .soft_target import SoftTargetCrossEntropy
\ No newline at end of file
diff --git a/src/netspresso_trainer/losses/classification/label_smooth.py b/src/netspresso_trainer/losses/classification/label_smooth.py
deleted file mode 100644
index 61e2e377..00000000
--- a/src/netspresso_trainer/losses/classification/label_smooth.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-class LabelSmoothingCrossEntropy(nn.Module):
-    """ NLL loss with label smoothing.
-    """
-    def __init__(self, smoothing=0.1):
-        super(LabelSmoothingCrossEntropy, self).__init__()
-        assert smoothing < 1.0
-        self.smoothing = smoothing
-        self.confidence = 1. - smoothing
-
-    def forward(self, out: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
-        pred = out['pred']
-        logprobs = F.log_softmax(pred, dim=-1)
-        nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1))
-        nll_loss = nll_loss.squeeze(1)
-        smooth_loss = -logprobs.mean(dim=-1)
-        loss = self.confidence * nll_loss + self.smoothing * smooth_loss
-        return loss.mean()
\ No newline at end of file
diff --git a/src/netspresso_trainer/losses/classification/soft_target.py b/src/netspresso_trainer/losses/classification/soft_target.py
deleted file mode 100644
index 2dfc8cd0..00000000
--- a/src/netspresso_trainer/losses/classification/soft_target.py
+++ /dev/null
@@ -1,15 +0,0 @@
-from typing import Dict
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-class SoftTargetCrossEntropy(nn.Module): # cutmix/mixup augmentation
-    def __init__(self):
-        super(SoftTargetCrossEntropy, self).__init__()
-
-    def forward(self, out: Dict, target: torch.Tensor) -> torch.Tensor:
-        pred = out['pred']
-        loss = torch.sum(-target * F.log_softmax(pred, dim=-1), dim=-1)
-        return loss.mean()
\ No newline at end of file
diff --git a/src/netspresso_trainer/losses/common.py b/src/netspresso_trainer/losses/common.py
index eda3b8ba..fa709c00 100644
--- a/src/netspresso_trainer/losses/common.py
+++ b/src/netspresso_trainer/losses/common.py
@@ -1,16 +1,19 @@
-from typing import Dict
+from typing import Dict, Optional
 
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
+from torch import Tensor
 
 
 class CrossEntropyLoss(nn.Module):
-    def __init__(self, ignore_index, **kwargs) -> None:
+    def __init__(self, weight: Optional[Tensor]=None, size_average=None, ignore_index: int=-100,
+                 reduce=None, label_smoothing: float=0.0):
         super(CrossEntropyLoss, self).__init__()
-        self.loss_fn = nn.CrossEntropyLoss(ignore_index=ignore_index, **kwargs)
+        self.loss_fn = nn.CrossEntropyLoss(weight=weight, size_average=size_average, ignore_index=ignore_index,
+                                           reduce=reduce, reduction='mean', label_smoothing=label_smoothing)
 
     def forward(self, out: Dict, target: torch.Tensor) -> torch.Tensor:
         pred = out['pred']
         loss = self.loss_fn(pred, target)
-        return loss
\ No newline at end of file
+        return loss
diff --git a/src/netspresso_trainer/losses/detection/__init__.py b/src/netspresso_trainer/losses/detection/__init__.py
index 24d6942e..2d5f5d05 100644
--- a/src/netspresso_trainer/losses/detection/__init__.py
+++ b/src/netspresso_trainer/losses/detection/__init__.py
@@ -1,2 +1,2 @@
 from .fastrcnn import RoiHeadLoss, RPNLoss
-from .yolox import YOLOXLoss
\ No newline at end of file
+from .yolox import YOLOXLoss
diff --git a/src/netspresso_trainer/losses/detection/fastrcnn.py b/src/netspresso_trainer/losses/detection/fastrcnn.py
index 976a6098..28b2ab18 100644
--- a/src/netspresso_trainer/losses/detection/fastrcnn.py
+++ b/src/netspresso_trainer/losses/detection/fastrcnn.py
@@ -12,7 +12,7 @@
 class RoiHeadLoss(nn.Module):
     def __init__(self) -> None:
         super().__init__()
-    
+
     @staticmethod
     def forward(out: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
         class_logits, box_regression, labels, regression_targets =\
@@ -43,10 +43,10 @@ def forward(out: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
             "loss_classifier": classification_loss,
             "loss_box_reg": box_loss
         }
-        
+
         # TODO: return as dict
         return sum(losses.values())
-        
+
 class RPNLoss(nn.Module):
     def __init__(self,
                  box_fg_iou_thresh=0.5,
@@ -54,7 +54,7 @@ def __init__(self,
                  box_batch_size_per_image=512,
                  box_positive_fraction=0.25) -> None:
         super().__init__()
-        
+
         self.box_coder = det_utils.BoxCoder(weights=(1.0, 1.0, 1.0, 1.0))
         self.box_similarity = box_ops.box_iou
         self.proposal_matcher = det_utils.Matcher(
@@ -63,7 +63,7 @@ def __init__(self,
             allow_low_quality_matches=True,
         )
         self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(box_batch_size_per_image, box_positive_fraction)
-    
+
     def _assign_targets_to_anchors(self, anchors: List[Tensor], targets: List[Dict[str, Tensor]]
     ) -> Tuple[List[Tensor], List[Tensor]]:
 
@@ -100,7 +100,7 @@ def _assign_targets_to_anchors(self, anchors: List[Tensor], targets: List[Dict[s
             labels.append(labels_per_image)
             matched_gt_boxes.append(matched_gt_boxes_per_image)
         return labels, matched_gt_boxes
-        
+
     def _compute_loss(self, objectness: Tensor, pred_bbox_deltas: Tensor, labels: List[Tensor], regression_targets: List[Tensor]
     ) -> Tuple[Tensor, Tensor]:
         """
@@ -137,7 +137,7 @@ def _compute_loss(self, objectness: Tensor, pred_bbox_deltas: Tensor, labels: Li
         objectness_loss = F.binary_cross_entropy_with_logits(objectness[sampled_inds], labels[sampled_inds])
 
         return objectness_loss, box_loss
-    
+
     def forward(self, out: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
         anchors, objectness, pred_bbox_deltas = out['anchors'], out['objectness'], out['pred_bbox_deltas']
         labels, matched_gt_boxes = self._assign_targets_to_anchors(anchors, target)
@@ -150,4 +150,4 @@ def forward(self, out: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
             "loss_rpn_box_reg": loss_rpn_box_reg,
         }
         # TODO: return as dict
-        return sum(losses.values())
\ No newline at end of file
+        return sum(losses.values())
diff --git a/src/netspresso_trainer/losses/detection/yolox.py b/src/netspresso_trainer/losses/detection/yolox.py
index f593cc0a..76263d6f 100644
--- a/src/netspresso_trainer/losses/detection/yolox.py
+++ b/src/netspresso_trainer/losses/detection/yolox.py
@@ -47,9 +47,10 @@ def __init__(self, **kwargs) -> None:
         super(YOLOXLoss, self).__init__()
         self.bcewithlog_loss = nn.BCEWithLogitsLoss(reduction="none")
         self.iou_loss = IOUloss(reduction="none")
-        
+
 
     def forward(self, out: List, target: Dict) -> torch.Tensor:
+        out = out['pred']
         x_shifts = []
         y_shifts = []
         expanded_strides = []
@@ -90,10 +91,10 @@ def forward(self, out: List, target: Dict) -> torch.Tensor:
                     [],
                     dtype=out[0].dtype,
                 )
-        
+
         # TODO: return as dict
         return total_loss
-    
+
     def get_losses(
         self,
         imgs,
@@ -263,7 +264,7 @@ def get_losses(
             #loss_l1,
             num_fg / max(num_gts, 1),
         )
-    
+
     @torch.no_grad()
     def get_assignments(
         self,
@@ -354,7 +355,7 @@ def get_assignments(
             matched_gt_inds,
             num_fg,
         )
-    
+
     def get_geometry_constraint(
         self, gt_bboxes_per_image, expanded_strides, x_shifts, y_shifts,
     ):
@@ -385,7 +386,7 @@ def get_geometry_constraint(
         geometry_relation = is_in_centers[:, anchor_filter]
 
         return anchor_filter, geometry_relation
-    
+
     def simota_matching(self, cost, pair_wise_ious, gt_classes, num_gt, fg_mask):
         matching_matrix = torch.zeros_like(cost, dtype=torch.uint8)
 
@@ -419,7 +420,7 @@ def simota_matching(self, cost, pair_wise_ious, gt_classes, num_gt, fg_mask):
             fg_mask_inboxes
         ]
         return num_fg, gt_matched_classes, pred_ious_this_matching, matched_gt_inds
-    
+
     def get_output_and_grid(self, output, k, stride, dtype):
         grid = self.grids[k]
 
diff --git a/src/netspresso_trainer/losses/registry.py b/src/netspresso_trainer/losses/registry.py
index 61d8f554..7e45b383 100644
--- a/src/netspresso_trainer/losses/registry.py
+++ b/src/netspresso_trainer/losses/registry.py
@@ -1,12 +1,9 @@
-from .classification import LabelSmoothingCrossEntropy, SoftTargetCrossEntropy
 from .common import CrossEntropyLoss
 from .detection import RoiHeadLoss, RPNLoss, YOLOXLoss
 from .segmentation import BoundaryLoss, PIDNetBoundaryAwareCrossEntropy, PIDNetCrossEntropy
 
 LOSS_DICT = {
     'cross_entropy': CrossEntropyLoss,
-    'soft_target_cross_entropy': SoftTargetCrossEntropy,
-    'label_smoothing_cross_entropy': LabelSmoothingCrossEntropy,
     'pidnet_cross_entropy': PIDNetCrossEntropy,
     'boundary_loss': BoundaryLoss,
     'pidnet_cross_entropy_with_boundary': PIDNetBoundaryAwareCrossEntropy,
@@ -15,4 +12,4 @@
     'yolox_loss': YOLOXLoss,
 }
 
-PHASE_LIST = ['train', 'valid', 'test']
\ No newline at end of file
+PHASE_LIST = ['train', 'valid', 'test']
diff --git a/src/netspresso_trainer/losses/segmentation/pidnet.py b/src/netspresso_trainer/losses/segmentation/pidnet.py
index 2bdc7cab..736ab018 100644
--- a/src/netspresso_trainer/losses/segmentation/pidnet.py
+++ b/src/netspresso_trainer/losses/segmentation/pidnet.py
@@ -26,7 +26,7 @@ def __init__(self, ignore_index=IGNORE_INDEX_NONE_VALUE, weight=None):
         self.boundary_aware = False
 
     def _forward(self, out: torch.Tensor, target: torch.Tensor):
-        
+
         return self.loss_fn(out, target)
 
     def forward(self, out: Dict, target: torch.Tensor):
@@ -36,7 +36,7 @@ def forward(self, out: Dict, target: torch.Tensor):
             filler = torch.ones_like(target) * self.ignore_index
             bd_label = torch.where(torch.sigmoid(extra_d[:, 0, :, :]) > 0.8, target, filler)
             return self._forward(pred, bd_label)
-        
+
         pred, extra_p = out['pred'], out['extra_p']
         score = [extra_p, pred]
         return sum([w * self._forward(x, target) for (w, x) in zip(BALANCE_WEIGHTS, score)])
@@ -45,7 +45,7 @@ class PIDNetBoundaryAwareCrossEntropy(PIDNetCrossEntropy):
     def __init__(self, ignore_index=IGNORE_INDEX_NONE_VALUE, weight=None):
         super().__init__(ignore_index, weight)
         self.boundary_aware = True
-    
+
 # class OhemCrossEntropy(nn.Module):
 #     def __init__(self, ignore_label=-1, thres=0.7, min_kept=100000, weight=None):
 #         super(OhemCrossEntropy, self).__init__()
diff --git a/src/netspresso_trainer/metrics/__init__.py b/src/netspresso_trainer/metrics/__init__.py
index 330ddc64..923da55e 100644
--- a/src/netspresso_trainer/metrics/__init__.py
+++ b/src/netspresso_trainer/metrics/__init__.py
@@ -1 +1 @@
-from .builder import build_metrics
\ No newline at end of file
+from .builder import build_metrics
diff --git a/src/netspresso_trainer/metrics/classification/metric.py b/src/netspresso_trainer/metrics/classification/metric.py
index efa4dc29..a4126313 100644
--- a/src/netspresso_trainer/metrics/classification/metric.py
+++ b/src/netspresso_trainer/metrics/classification/metric.py
@@ -8,11 +8,10 @@
 
 
 @torch.no_grad()
-def accuracy_topk(output, target):
+def accuracy_topk(pred, target):
     """Computes the accuracy over the k top predictions for the specified values of k"""
-    maxk = min(TOPK_MAX, output.size()[1])
     batch_size = target.size(0)
-    _, pred = output.topk(maxk, 1, True, True)
+    maxk = pred.size(-1)
     pred = pred.t()
     correct = pred.eq(target.reshape(1, -1).expand_as(pred))
     return lambda topk: correct[:min(topk, maxk)].reshape(-1).float().sum(0) * 100. / batch_size
@@ -25,17 +24,6 @@ class ClassificationMetric(BaseMetric):
     def __init__(self, **kwargs):
         super().__init__()
 
-    @torch.no_grad()
-    @staticmethod
-    def accuracy_topk(output, target):
-        """Computes the accuracy over the k top predictions for the specified values of k"""
-        maxk = min(TOPK_MAX, output.size()[1])
-        batch_size = target.size(0)
-        _, pred = output.topk(maxk, 1, True, True)
-        pred = pred.t()
-        correct = pred.eq(target.reshape(1, -1).expand_as(pred))
-        return lambda topk: correct[:min(topk, maxk)].reshape(-1).float().sum(0) * 100. / batch_size
-
     def calibrate(self, pred, target, **kwargs):
         result_dict = {k: 0. for k in self.metric_names}
         topk_callable = accuracy_topk(pred, target)
diff --git a/src/netspresso_trainer/metrics/detection/metric.py b/src/netspresso_trainer/metrics/detection/metric.py
index 17a83845..93e2070f 100644
--- a/src/netspresso_trainer/metrics/detection/metric.py
+++ b/src/netspresso_trainer/metrics/detection/metric.py
@@ -167,7 +167,7 @@ def average_precisions_per_class(
 class DetectionMetric(BaseMetric):
     metric_names: List[str] = ['map50', 'map75', 'map50_95']
     primary_metric: str = 'map50_95'
-    
+
     def __init__(self, **kwargs):
         super().__init__()
 
diff --git a/src/netspresso_trainer/metrics/registry.py b/src/netspresso_trainer/metrics/registry.py
index 381ab64f..73603e13 100644
--- a/src/netspresso_trainer/metrics/registry.py
+++ b/src/netspresso_trainer/metrics/registry.py
@@ -11,4 +11,4 @@
     'detection': DetectionMetric
 }
 
-PHASE_LIST = ['train', 'valid', 'test']
\ No newline at end of file
+PHASE_LIST = ['train', 'valid', 'test']
diff --git a/src/netspresso_trainer/metrics/segmentation/metric.py b/src/netspresso_trainer/metrics/segmentation/metric.py
index fea6398e..65ca19a8 100644
--- a/src/netspresso_trainer/metrics/segmentation/metric.py
+++ b/src/netspresso_trainer/metrics/segmentation/metric.py
@@ -47,8 +47,7 @@ def calibrate(self, pred, target, **kwargs):
         result_dict = {k: AverageMeter(k) for k in self.metric_names}
         B = pred.size(0)
 
-        output_seg = torch.max(pred, dim=1)[1]  # argmax
-        metrics = self.intersection_and_union_gpu(output_seg, target)
+        metrics = self.intersection_and_union_gpu(pred, target)
         result_dict['iou'].update(sum(metrics['intersection']) / (sum(metrics['union']) + 1e-10), n=B)
         result_dict['pixel_acc'].update(sum(metrics['intersection']) / (sum(metrics['target']) + 1e-10), n=B)
 
diff --git a/src/netspresso_trainer/models/backbones/__init__.py b/src/netspresso_trainer/models/backbones/__init__.py
index 591a70d1..03737edd 100644
--- a/src/netspresso_trainer/models/backbones/__init__.py
+++ b/src/netspresso_trainer/models/backbones/__init__.py
@@ -1,8 +1,9 @@
 # from .core import *
 from .experimental.darknet import cspdarknet
 from .experimental.efficientformer import efficientformer
-from .experimental.mobilenetv3 import mobilenetv3_small
+from .experimental.mixnet import mixnet
+from .experimental.mobilenetv3 import mobilenetv3
 from .experimental.mobilevit import mobilevit
-from .experimental.resnet import resnet50
+from .experimental.resnet import resnet
 from .experimental.segformer import segformer
 from .experimental.vit import vit
diff --git a/src/netspresso_trainer/models/backbones/experimental/darknet.py b/src/netspresso_trainer/models/backbones/experimental/darknet.py
index 3759c0c9..49b134e8 100644
--- a/src/netspresso_trainer/models/backbones/experimental/darknet.py
+++ b/src/netspresso_trainer/models/backbones/experimental/darknet.py
@@ -2,7 +2,9 @@
 Based on the Darknet implementation of Megvii.
 https://github.com/Megvii-BaseDetection/YOLOX/blob/main/yolox/models/darknet.py
 """
+from typing import Dict, Optional, List
 
+from omegaconf import DictConfig
 import torch
 from torch import nn
 
@@ -14,22 +16,25 @@
 
 
 class CSPDarknet(nn.Module):
+
     def __init__(
         self,
-        task,
-        dep_mul,
-        wid_mul,
-        out_features=("dark3", "dark4", "dark5"),
+        task: str,
+        params: Optional[DictConfig] = None,
+        stage_params: Optional[List] = None,
         #depthwise=False,
-        act_type="silu",
-        **kwargs
-    ):
+    ) -> None:
         super().__init__()
+        out_features=("dark3", "dark4", "dark5")
         assert out_features, "please provide output features of Darknet"
 
         self.task = task.lower()
         self.use_intermediate_features = self.task in ['segmentation', 'detection']
 
+        dep_mul = params.dep_mul
+        wid_mul = params.wid_mul
+        act_type = params.act_type
+
         self.out_features = out_features
         Conv = ConvLayer
 
@@ -147,4 +152,4 @@ def task_support(self, task):
 
 
 def cspdarknet(task, conf_model_backbone) -> CSPDarknet:
-    return CSPDarknet(task, **conf_model_backbone)
+    return CSPDarknet(task, conf_model_backbone.params, conf_model_backbone.stage_params)
diff --git a/src/netspresso_trainer/models/backbones/experimental/efficientformer.py b/src/netspresso_trainer/models/backbones/experimental/efficientformer.py
index 6ba4a5e3..23ee2ae7 100644
--- a/src/netspresso_trainer/models/backbones/experimental/efficientformer.py
+++ b/src/netspresso_trainer/models/backbones/experimental/efficientformer.py
@@ -5,8 +5,9 @@
 import itertools
 import math
 import os
-from typing import Dict, Optional
+from typing import Dict, Optional, List
 
+from omegaconf import DictConfig
 import torch
 import torch.nn as nn
 
@@ -329,15 +330,33 @@ def forward(self, x):
 class EfficientFormer(MetaFormer):
 
     def __init__(
-        self, task, num_blocks, hidden_sizes,
-        num_attention_heads, attention_hidden_size, attention_dropout_prob,
-        attention_ratio, attention_bias_resolution,
-        pool_size, intermediate_ratio, hidden_dropout_prob, hidden_activation_type,
-        layer_norm_eps,
-        drop_path_rate=0., use_layer_scale=True, layer_scale_init_value=1e-5,
-        downsamples=None, down_patch_size=3, down_stride=2, down_pad=1,
-        vit_num=1, **kwargs
-    ):
+        self,
+        task: str,
+        params: Optional[DictConfig] = None,
+        stage_params: Optional[List] = None,
+    ) -> None:
+        
+        num_blocks = [stage.num_blocks for stage in stage_params]
+        hidden_sizes = [stage.hidden_sizes for stage in stage_params]
+        downsamples = [stage.downsamples for stage in stage_params]
+
+        num_attention_heads = params.num_attention_heads
+        attention_hidden_size = params.attention_hidden_size
+        attention_dropout_prob = params.attention_dropout_prob
+        attention_ratio = params.attention_ratio
+        attention_bias_resolution = params.attention_bias_resolution
+        pool_size = params.pool_size
+        intermediate_ratio = params.intermediate_ratio
+        hidden_dropout_prob = params.hidden_dropout_prob
+        hidden_activation_type = params.hidden_activation_type
+        layer_norm_eps = params.layer_norm_eps
+        drop_path_rate = params.drop_path_rate
+        use_layer_scale = params.use_layer_scale
+        layer_scale_init_value = params.layer_scale_init_value
+        down_patch_size = params.down_patch_size
+        down_stride = params.down_stride
+        down_pad = params.down_pad
+        vit_num = params.vit_num
 
         super().__init__(hidden_sizes)
         self.task = task.lower()
@@ -374,4 +393,4 @@ def forward(self, x):
 
 
 def efficientformer(task, conf_model_backbone) -> EfficientFormer:
-    return EfficientFormer(task, **conf_model_backbone)
+    return EfficientFormer(task, conf_model_backbone.params, conf_model_backbone.stage_params)
diff --git a/src/netspresso_trainer/models/backbones/experimental/mixnet.py b/src/netspresso_trainer/models/backbones/experimental/mixnet.py
new file mode 100644
index 00000000..ba4c57b6
--- /dev/null
+++ b/src/netspresso_trainer/models/backbones/experimental/mixnet.py
@@ -0,0 +1,286 @@
+"""
+Based on the publicly available MixNet-PyTorch repository.
+https://github.com/romulus0914/MixNet-PyTorch/blob/master/mixnet.py
+"""
+from collections import OrderedDict
+import math
+from typing import Dict, List, Optional
+
+from omegaconf import DictConfig
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torchvision.ops.misc import SqueezeExcitation as SEBlock
+
+from ...op.registry import ACTIVATION_REGISTRY
+from ...op.custom import ConvLayer
+from ...utils import BackboneOutput
+
+
+# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ #
+# GPConv: Grouped Point-wise Convolution for MixDepthBlock
+# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ #
+class GPConv(nn.Module):
+    def __init__(self, in_planes, out_planes, kernel_sizes):
+        super(GPConv, self).__init__()
+        self.num_groups = len(kernel_sizes)
+        assert in_planes % self.num_groups == 0
+        sub_in_dim = in_planes // self.num_groups
+        sub_out_dim = out_planes // self.num_groups
+
+        self.group_point_wise = nn.ModuleList()
+        for _ in kernel_sizes:
+            self.group_point_wise.append(nn.Conv2d(sub_in_dim, sub_out_dim,
+                                                   kernel_size=1, stride=1, padding=0,
+                                                   groups=1, dilation=1, bias=False))
+
+    def forward(self, x):
+        if self.num_groups == 1:
+            return self.group_point_wise[0](x)
+
+        chunks = torch.chunk(x, chunks=self.num_groups, dim=1)
+        mix = [self.group_point_wise[stream](chunks[stream]) for stream in range(self.num_groups)]
+        return torch.cat(mix, dim=1)
+
+
+# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ #
+# MDConv: Mixed Depth-wise Convolution for MixDepthBlock
+# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ #
+class MDConv(nn.Module):
+    def __init__(self, in_planes, kernel_sizes, stride=1, dilate=1):
+        super(MDConv, self).__init__()
+        self.num_groups = len(kernel_sizes)
+        assert in_planes % self.num_groups == 0
+        sub_hidden_dim = in_planes // self.num_groups
+
+        assert stride in [1, 2]
+        dilate = 1 if stride > 1 else dilate
+
+        self.mixed_depth_wise = nn.ModuleList()
+        for kernel_size in kernel_sizes:
+            padding = ((kernel_size - 1) // 2) * dilate
+            self.mixed_depth_wise.append(nn.Conv2d(sub_hidden_dim, sub_hidden_dim,
+                                                   kernel_size=kernel_size, stride=stride, padding=padding,
+                                                   groups=sub_hidden_dim, dilation=dilate, bias=False))
+
+    def forward(self, x):
+        if self.num_groups == 1:
+            return self.mixed_depth_wise[0](x)
+
+        chunks = torch.chunk(x, chunks=self.num_groups, dim=1)
+        mix = [self.mixed_depth_wise[stream](chunks[stream]) for stream in range(self.num_groups)]
+        return torch.cat(mix, dim=1)
+
+
+# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ #
+# MixDepthBlock: MixDepthBlock for MixNet
+# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ #
+class MixDepthBlock(nn.Module):
+    def __init__(self, in_planes, out_planes,
+                 expand_ratio, exp_kernel_sizes, kernel_sizes, poi_kernel_sizes, stride, dilate,
+                 reduction_ratio=4, dropout_rate=0.2, act_type="swish"):
+        super(MixDepthBlock, self).__init__()
+        self.dropout_rate = dropout_rate
+        self.expand_ratio = expand_ratio
+        self.out_channels = out_planes
+
+        self.groups = len(kernel_sizes)
+        self.use_se = (reduction_ratio is not None) and (reduction_ratio > 1)
+        self.use_residual = in_planes == out_planes and stride == 1
+
+        assert stride in [1, 2]
+        dilate = 1 if stride > 1 else dilate
+        hidden_dim = in_planes * expand_ratio
+
+        # step 1. Expansion phase/Point-wise convolution
+        if expand_ratio != 1:
+            self.expansion = nn.Sequential(OrderedDict([
+                ("conv", GPConv(in_planes, hidden_dim, kernel_sizes=exp_kernel_sizes)),
+                ("norm", nn.BatchNorm2d(hidden_dim, eps=1e-3, momentum=0.01)),
+                ("act", ACTIVATION_REGISTRY[act_type]())
+            ]))
+
+        # step 2. Depth-wise convolution phase
+        self.depth_wise = nn.Sequential(OrderedDict([
+            ("conv", MDConv(hidden_dim, kernel_sizes=kernel_sizes, stride=stride, dilate=dilate)),
+            ("norm", nn.BatchNorm2d(hidden_dim, eps=1e-3, momentum=0.01)),
+            ("act", ACTIVATION_REGISTRY[act_type]())
+        ]))
+
+        # step 3. Squeeze and Excitation
+        if self.use_se:
+            reduced_dim = max(1, int(in_planes / reduction_ratio))
+            self.se_block = SEBlock(input_channels=hidden_dim, squeeze_channels=reduced_dim, activation=ACTIVATION_REGISTRY[act_type])
+
+        # step 4. Point-wise convolution phase
+        self.point_wise = nn.Sequential(OrderedDict([
+            ("conv", GPConv(hidden_dim, out_planes, kernel_sizes=poi_kernel_sizes)),
+            ("norm", nn.BatchNorm2d(out_planes, eps=1e-3, momentum=0.01))
+        ]))
+
+    def forward(self, x):
+        res = x
+
+        # step 1. Expansion phase/Point-wise convolution
+        if self.expand_ratio != 1:
+            x = self.expansion(x)
+
+        # step 2. Depth-wise convolution phase
+        x = self.depth_wise(x)
+
+        # step 3. Squeeze and Excitation
+        if self.use_se:
+            x = self.se_block(x)
+
+        # step 4. Point-wise convolution phase
+        x = self.point_wise(x)
+
+        # step 5. Skip connection and drop connect
+        if self.use_residual:
+            if self.training and (self.dropout_rate is not None):
+                x = F.dropout2d(input=x, p=self.dropout_rate,
+                                training=self.training, )
+            x = x + res
+
+        return x
+
+
+class MixNet(nn.Module):
+    def __init__(
+        self,
+        task: str,
+        params: Optional[DictConfig] = None,
+        stage_params: Optional[List] = None,
+    ):
+        super(MixNet, self).__init__()
+        self.task = task.lower()
+        self.use_intermediate_features = self.task in ['segmentation', 'detection']
+
+        stem_planes = params.stem_planes
+        width_multi = params.width_multi
+        depth_multi = params.depth_multi
+        self.dropout_rate = params.dropout_rate
+
+        out_channels = self._round_filters(stem_planes, width_multi)
+        self.mod1 = ConvLayer(in_channels=3, out_channels=out_channels, kernel_size=3,
+                              stride=2, groups=1, dilation=1, act_type="relu")
+
+        in_channels = out_channels
+        drop_rate = self.dropout_rate
+        stages: List[nn.Module] = []
+        for stg_idx, stage_info in enumerate(stage_params):
+            
+            stage: List[nn.Module] = []
+            for block in zip(stage_info.expand_ratio, stage_info.out_channels, stage_info.num_blocks,
+                             stage_info.kernel_sizes, stage_info.exp_kernel_sizes, stage_info.poi_kernel_sizes,
+                             stage_info.stride, stage_info.dilation, stage_info.act_type, stage_info.se_reduction_ratio):
+                t, c, n, k, ek, pk, s, d, a, se = block
+                out_channels = self._round_filters(c, width_multi)
+                repeats = self._round_repeats(n, depth_multi)
+
+                for block_id in range(repeats):
+                    stride = s if block_id == 0 else 1
+                    dilate = d if stride == 1 else 1
+
+                    stage.append(MixDepthBlock(in_channels, out_channels,
+                                               expand_ratio=t, exp_kernel_sizes=ek,
+                                               kernel_sizes=k, poi_kernel_sizes=pk,
+                                               stride=stride, dilate=dilate,
+                                               reduction_ratio=se,
+                                               dropout_rate=drop_rate,
+                                               act_type=a))
+
+                    in_channels = out_channels
+            
+            # add last conv
+            if stg_idx == len(stage_params) - 1:
+                self.last_channels = 1536
+                stage.append(
+                    ConvLayer(in_channels=in_channels,
+                              out_channels=self.last_channels,
+                              kernel_size=1,
+                              stride=1,
+                              groups=1,
+                              dilation=1,
+                              act_type="relu")
+                )
+            
+            stage = nn.Sequential(*stage)
+            stages.append(stage)
+
+        self.stages = nn.ModuleList(stages)
+        self.avgpool = nn.AdaptiveAvgPool2d(1)
+
+        self._feature_dim = self.last_channels
+        self._intermediate_features_dim = [s[-1].out_channels for s in self.stages[:-1]]
+        self._intermediate_features_dim += [self.last_channels]
+
+        self._initialize_weights()
+
+    def _initialize_weights(self):
+        # weight initialization
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out')
+                if m.bias is not None:
+                    nn.init.zeros_(m.bias)
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.ones_(m.weight)
+                nn.init.zeros_(m.bias)
+            elif isinstance(m, nn.Linear):
+                fan_out = m.weight.size(0)
+                init_range = 1.0 / math.sqrt(fan_out)
+                nn.init.uniform_(m.weight, -init_range, init_range)
+                if m.bias is not None:
+                    nn.init.zeros_(m.bias)
+
+    @staticmethod
+    def _make_divisible(value, divisor=8):
+        new_value = max(divisor, int(value + divisor / 2) // divisor * divisor)
+        if new_value < 0.9 * value:
+            new_value += divisor
+        return new_value
+
+    def _round_filters(self, filters, width_multi):
+        if width_multi == 1.0:
+            return filters
+        return int(self._make_divisible(filters * width_multi))
+
+    @staticmethod
+    def _round_repeats(repeats, depth_multi):
+        if depth_multi == 1.0:
+            return repeats
+        return int(math.ceil(depth_multi * repeats))
+    
+    @property
+    def feature_dim(self):
+        return self._feature_dim
+
+    @property
+    def intermediate_features_dim(self):
+        return self._intermediate_features_dim
+
+    def forward(self, x):
+        x = self.mod1(x)
+
+        all_hidden_states = () if self.use_intermediate_features else None
+        for stage in self.stages:
+            x = stage(x)
+            if self.use_intermediate_features:
+                all_hidden_states = all_hidden_states + (x, )
+        
+        if self.use_intermediate_features:
+            return BackboneOutput(intermediate_features=all_hidden_states)
+
+        x = self.avgpool(x)
+        x = torch.flatten(x, 1)
+
+        if self.training and (self.dropout_rate is not None):
+            x = F.dropout(input=x, p=self.dropout_rate,
+                          training=self.training, )
+
+        return BackboneOutput(last_feature=x)
+
+
+def mixnet(task, conf_model_backbone) -> MixNet:
+    return MixNet(task, conf_model_backbone.params, conf_model_backbone.stage_params)
\ No newline at end of file
diff --git a/src/netspresso_trainer/models/backbones/experimental/mobilenetv3.py b/src/netspresso_trainer/models/backbones/experimental/mobilenetv3.py
index d87b9695..f1c1e486 100644
--- a/src/netspresso_trainer/models/backbones/experimental/mobilenetv3.py
+++ b/src/netspresso_trainer/models/backbones/experimental/mobilenetv3.py
@@ -2,8 +2,9 @@
 Based on the Torchvision implementation of MobileNetV3.
 https://pytorch.org/vision/main/_modules/torchvision/models/mobilenetv3.html
 """
-from typing import List
+from typing import List, Dict, Optional
 
+from omegaconf import DictConfig
 import torch
 import torch.nn as nn
 from torch import Tensor
@@ -11,25 +12,18 @@
 from ...op.custom import ConvLayer, InvertedResidual
 from ...utils import BackboneOutput
 
-__all__ = ['mobilenetv3_small']
+__all__ = ['mobilenetv3']
 
 SUPPORTING_TASK = ['classification', 'segmentation']
 
 
-def list_depth(block_info):
-    if isinstance(block_info[0], list):
-        return 1 + list_depth(block_info[0])
-    else:
-        return 1
-
-
 class MobileNetV3(nn.Module):
 
     def __init__(
         self,
         task: str,
-        block_info, # [in_channels, kernel, expended_channels, out_channels, use_se, activation, stride, dilation]
-        **kwargs
+        params: Optional[DictConfig] = None,
+        stage_params: Optional[List] = None,
     ) -> None:
         super(MobileNetV3, self).__init__()
 
@@ -39,7 +33,7 @@ def __init__(
         act_type = 'hard_swish'
 
         # building first layer
-        firstconv_output_channels = block_info[0][0][0]
+        firstconv_output_channels = stage_params[0].in_channels[0]
         self.conv_first = ConvLayer(
             in_channels=3,
             out_channels=firstconv_output_channels,
@@ -52,20 +46,16 @@ def __init__(
         # building inverted residual blocks
         stages: List[nn.Module] = []
 
-        lastconv_input_channels = block_info[-1][-1][3]
+        lastconv_input_channels = stage_params[-1].out_channels[-1]
         lastconv_output_channels = 6 * lastconv_input_channels
-        for stg_idx, stage_info in enumerate(block_info):
+        for stg_idx, stage_info in enumerate(stage_params):
             stage: List[nn.Module] = []
 
-            for block in stage_info:
-                in_channels = block[0]
-                kernel_size = block[1]
-                hidden_channels = block[2]
-                out_channels = block[3]
-                use_se = block[4]
-                act_type_b = block[5].lower()
-                stride = block[6]
-                dilation = block[7]
+            for block in zip(stage_info.in_channels, stage_info.kernel, stage_info.expanded_channels,
+                             stage_info.out_channels, stage_info.use_se, stage_info.activation,
+                             stage_info.stride, stage_info.dilation):
+                in_channels, kernel_size, hidden_channels, out_channels, use_se, act_type_b, stride, dilation = block
+                act_type_b = act_type_b.lower()
                 stage.append(
                     InvertedResidual(in_channels=in_channels,
                                      hidden_channels=hidden_channels,
@@ -79,7 +69,7 @@ def __init__(
                 )
             
             # add last conv
-            if stg_idx == len(block_info) - 1:
+            if stg_idx == len(stage_params) - 1:
                 stage.append(
                     ConvLayer(in_channels=lastconv_input_channels,
                               out_channels=lastconv_output_channels,
@@ -140,5 +130,5 @@ def task_support(self, task):
         return task.lower() in SUPPORTING_TASK
 
 
-def mobilenetv3_small(task, conf_model_backbone) -> MobileNetV3:
-    return MobileNetV3(task, **conf_model_backbone)
+def mobilenetv3(task, conf_model_backbone) -> MobileNetV3:
+    return MobileNetV3(task, conf_model_backbone.params, conf_model_backbone.stage_params)
diff --git a/src/netspresso_trainer/models/backbones/experimental/mobilevit.py b/src/netspresso_trainer/models/backbones/experimental/mobilevit.py
index 1adbd28e..5a8ed0e9 100644
--- a/src/netspresso_trainer/models/backbones/experimental/mobilevit.py
+++ b/src/netspresso_trainer/models/backbones/experimental/mobilevit.py
@@ -5,8 +5,9 @@
 
 import argparse
 import math
-from typing import Any, Dict, Literal, Optional, Tuple, Union
+from typing import Any, Dict, Literal, Optional, Tuple, Union, List
 
+from omegaconf import DictConfig
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
@@ -252,27 +253,38 @@ def forward(
         return out
 
 class MobileViTEncoder(MetaFormerEncoder):
-    def __init__(self, out_channels, block_type, num_blocks, stride, hidden_size, intermediate_size, num_transformer_blocks, dilate, expand_ratio,
-                 patch_embedding_out_channels, local_kernel_size, patch_size,
-                 num_attention_heads, attention_dropout_prob, hidden_dropout_prob, layer_norm_eps, use_fusion_layer) -> None:
+    def __init__(
+        self,
+        params: Optional[DictConfig] = None,
+        stage_params: Optional[List] = None,
+    ) -> None:
         super().__init__()
         stages = []
         
         self.dilation = 1
-        self.local_kernel_size = local_kernel_size
-        self.patch_size = patch_size
-        self.num_attention_heads = num_attention_heads
-        self.attention_dropout_prob = attention_dropout_prob
-        self.hidden_dropout_prob = hidden_dropout_prob
-        self.layer_norm_eps = layer_norm_eps
-        self.use_fusion_layer = use_fusion_layer
+        self.local_kernel_size = params.local_kernel_size
+        self.patch_size = params.patch_size
+        self.num_attention_heads = params.num_attention_heads
+        self.attention_dropout_prob = params.attention_dropout_prob
+        self.hidden_dropout_prob = params.hidden_dropout_prob
+        self.layer_norm_eps = params.layer_norm_eps
+        self.use_fusion_layer = params.use_fusion_layer
         
-        in_channels = patch_embedding_out_channels
-        for idx in range(len(out_channels)):
-            stages.append(self._make_block(out_channels[idx], block_type[idx], num_blocks[idx], stride[idx], hidden_size[idx],
-                                           intermediate_size[idx], num_transformer_blocks[idx], dilate[idx], expand_ratio[idx],
+        in_channels = params.patch_embedding_out_channels
+        for stage in stage_params:
+            out_channels = stage.out_channels
+            block_type = stage.block_type
+            num_blocks = stage.num_blocks
+            stride = stage.stride
+            hidden_size = stage.hidden_size
+            intermediate_size = stage.intermediate_size
+            num_transformer_blocks = stage.num_transformer_blocks
+            dilate = stage.dilate
+            expand_ratio = stage.expand_ratio
+            stages.append(self._make_block(out_channels, block_type, num_blocks, stride, hidden_size,
+                                           intermediate_size, num_transformer_blocks, dilate, expand_ratio,
                                            in_channels))
-            in_channels = out_channels[idx]
+            in_channels = out_channels
         self.blocks = nn.Sequential(*stages)
     
     def _make_block(self, out_channels, block_type: Literal['mv2', 'mobilevit'], num_blocks, stride, hidden_size, intermediate_size, num_transformer_blocks, dilate, expand_ratio, in_channels):
@@ -346,26 +358,23 @@ def _make_mobilevit_blocks(self, num_transformer_blocks, in_channels, out_channe
 
 class MobileViT(MetaFormer):
     def __init__(
-        self, task,
-        out_channels, block_type, num_blocks, stride, hidden_size, intermediate_size, num_transformer_blocks, dilate, expand_ratio,
-        patch_embedding_out_channels, local_kernel_size, patch_size,
-        num_attention_heads, attention_dropout_prob, hidden_dropout_prob,
-        exp_factor, layer_norm_eps=1e-6, use_fusion_layer = True,
-        **kwargs
+        self,
+        task: str,
+        params: Optional[DictConfig] = None,
+        stage_params: Optional[List] = None,
     ) -> None:
-        exp_channels = min(exp_factor * out_channels[-1], 960)
-        hidden_sizes = out_channels + [exp_channels]
+        exp_channels = min(params.exp_factor * stage_params[-1].out_channels, 960)
+        hidden_sizes = [stage.out_channels for stage in stage_params] + [exp_channels]
         super().__init__(hidden_sizes)
         
         self.task = task
         self.intermediate_features = self.task in ['segmentation', 'detection']
         
         image_channels = 3
-        self.patch_embed = MobileViTEmbeddings(image_channels, patch_embedding_out_channels)
-        self.encoder = MobileViTEncoder(out_channels, block_type, num_blocks, stride, hidden_size, intermediate_size, num_transformer_blocks, dilate, expand_ratio,
-                                        patch_embedding_out_channels, local_kernel_size, patch_size, num_attention_heads, attention_dropout_prob, hidden_dropout_prob, layer_norm_eps, use_fusion_layer)
+        self.patch_embed = MobileViTEmbeddings(image_channels, params.patch_embedding_out_channels)
+        self.encoder = MobileViTEncoder(params=params, stage_params=stage_params)
         
-        self.conv_1x1_exp = ConvLayer(in_channels=out_channels[-1], out_channels=exp_channels,
+        self.conv_1x1_exp = ConvLayer(in_channels=stage_params[-1].out_channels, out_channels=exp_channels,
                                       kernel_size=1, stride=1,
                                       use_act=True, use_norm=True, act_type='silu')
         self.pool = GlobalPool(pool_type="mean", keep_dim=False)
@@ -380,4 +389,4 @@ def forward(self, x: FXTensorType):
         return BackboneOutput(last_feature=feat)
     
 def mobilevit(task, conf_model_backbone):
-    return MobileViT(task, **conf_model_backbone)
\ No newline at end of file
+    return MobileViT(task, conf_model_backbone.params, conf_model_backbone.stage_params)
\ No newline at end of file
diff --git a/src/netspresso_trainer/models/backbones/experimental/resnet.py b/src/netspresso_trainer/models/backbones/experimental/resnet.py
index 77baeb5f..1aefbb0b 100644
--- a/src/netspresso_trainer/models/backbones/experimental/resnet.py
+++ b/src/netspresso_trainer/models/backbones/experimental/resnet.py
@@ -4,6 +4,7 @@
 """
 from typing import Dict, List, Literal, Optional, Type, Union
 
+from omegaconf import DictConfig
 import torch
 import torch.nn as nn
 from torch import Tensor
@@ -11,7 +12,7 @@
 from ...op.custom import BasicBlock, Bottleneck, ConvLayer
 from ...utils import BackboneOutput
 
-__all__ = ['resnet50']
+__all__ = ['resnet']
 
 SUPPORTING_TASK = ['classification', 'segmentation']
 
@@ -26,18 +27,18 @@ class ResNet(nn.Module):
     def __init__(
         self,
         task: str,
-        block: Literal['basicblock', 'bottleneck'],
-        layers: List[int],
-        zero_init_residual: bool = False,
-        groups: int = 1,
-        width_per_group: int = 64,
-        replace_stride_with_dilation: Optional[List[bool]] = None,
-        norm_layer: Optional[str] = None,
-        expansion: Optional[int] = None,
-        **kwargs
+        params: Optional[DictConfig] = None,
+        stage_params: Optional[List] = None,
     ) -> None:
         super(ResNet, self).__init__()
 
+        block: Literal['basicblock', 'bottleneck'] = params.block
+        zero_init_residual: bool = params.zero_init_residual
+        groups: int = params.groups
+        width_per_group: int = params.width_per_group
+        norm_layer: Optional[str] = params.norm_layer
+        expansion: Optional[int] = params.expansion
+
         self.task = task.lower()
         block = BLOCK_FROM_LITERAL[block.lower()]
         self.use_intermediate_features = self.task in ['segmentation', 'detection']
@@ -48,13 +49,9 @@ def __init__(
 
         self.inplanes = 64
         self.dilation = 1
-        if replace_stride_with_dilation is None:
-            # each element in the tuple indicates if we should replace
-            # the 2x2 stride with a dilated convolution instead
-            replace_stride_with_dilation = [False, False, False]
-        if len(replace_stride_with_dilation) != 3:
-            raise ValueError("replace_stride_with_dilation should be None "
-                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
+        for i in range(1, len(stage_params)):
+            if 'replace_stride_with_dilation' not in stage_params[i]:
+                stage_params[i]['replace_stride_with_dilation'] = False
         self.groups = groups
         self.base_width = width_per_group
 
@@ -64,22 +61,23 @@ def __init__(
         self.conv1 = ConvLayer(in_channels=3, out_channels=self.inplanes,
                                kernel_size=7, stride=2, padding=3,
                                bias=False, norm_type='batch_norm', act_type='relu')
-
-        planes = [64, 128, 256, 512]
         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-        self.layer1 = self._make_layer(block, planes[0], layers[0], expansion=expansion)
-        self.layer2 = self._make_layer(block, planes[1], layers[1], stride=2,
-                                       dilate=replace_stride_with_dilation[0],
-                                       expansion=expansion)
-        self.layer3 = self._make_layer(block, planes[2], layers[2], stride=2,
-                                       dilate=replace_stride_with_dilation[1],
-                                       expansion=expansion)
-        self.layer4 = self._make_layer(block, planes[3], layers[3], stride=2,
-                                       dilate=replace_stride_with_dilation[2],
-                                       expansion=expansion)
+
+        stages: List[nn.Module] = []
+
+        first_stage = stage_params[0]
+        layer = self._make_layer(block, first_stage['plane'], first_stage['layers'], expansion=expansion)
+        stages.append(layer)
+        for stage in stage_params[1:]:
+            layer = self._make_layer(block, stage['plane'], stage['layers'], stride=2,
+                                     dilate=stage['replace_stride_with_dilation'],
+                                     expansion=expansion)
+            stages.append(layer)
+
+        self.stages = nn.ModuleList(stages)
         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
 
-        hidden_sizes = [h * 4 for h in planes]
+        hidden_sizes = [stage['plane'] * expansion for stage in stage_params]
         self._feature_dim = hidden_sizes[-1]
         self._intermediate_features_dim = hidden_sizes
 
@@ -134,8 +132,8 @@ def forward(self, x: Tensor):
         x = self.maxpool(x)
 
         all_hidden_states = () if self.use_intermediate_features else None
-        for layer in [self.layer1, self.layer2, self.layer3, self.layer4]:
-            x = layer(x)
+        for stage in self.stages:
+            x = stage(x)
             if self.use_intermediate_features:
                 all_hidden_states = all_hidden_states + (x,)
 
@@ -160,8 +158,8 @@ def task_support(self, task):
         return task.lower() in SUPPORTING_TASK
 
 
-def resnet50(task, conf_model_backbone) -> ResNet:
+def resnet(task, conf_model_backbone) -> ResNet:
     """
-        ResNet-50 model from "Deep Residual Learning for Image Recognition" https://arxiv.org/pdf/1512.03385.pdf.
+        ResNet model from "Deep Residual Learning for Image Recognition" https://arxiv.org/pdf/1512.03385.pdf.
     """
-    return ResNet(task, **conf_model_backbone)
+    return ResNet(task, conf_model_backbone.params, conf_model_backbone.stage_params)
diff --git a/src/netspresso_trainer/models/backbones/experimental/segformer.py b/src/netspresso_trainer/models/backbones/experimental/segformer.py
index 9fb55c87..8dee0d82 100644
--- a/src/netspresso_trainer/models/backbones/experimental/segformer.py
+++ b/src/netspresso_trainer/models/backbones/experimental/segformer.py
@@ -1,6 +1,7 @@
 import math
-from typing import Optional
+from typing import Optional, List, Dict
 
+from omegaconf import DictConfig
 import torch
 import torch.nn as nn
 
@@ -135,42 +136,59 @@ def forward(self, x, height, width):
 
 
 class SegFormer(MetaFormer):
-    def __init__(self, task, num_modules, num_blocks, embedding_patch_sizes, embedding_strides, hidden_sizes,
-                 num_attention_heads, attention_dropout_prob, sr_ratios,
-                 intermediate_ratio, hidden_dropout_prob, hidden_activation_type, layer_norm_eps,
-                 **kwargs):
-        super().__init__(hidden_sizes)
+    def __init__(
+        self,
+        task: str,
+        params: Optional[DictConfig] = None,
+        stage_params: Optional[List] = None,
+    ) -> None:
+        super().__init__([stage.hidden_sizes for stage in stage_params])
         self.task = task
         self.use_intermediate_features = self.task in ['segmentation', 'detection']
 
-        image_channels = 3
+        intermediate_ratio = params.intermediate_ratio
+        hidden_activation_type = params.hidden_activation_type
+        hidden_dropout_prob = params.hidden_dropout_prob
+        attention_dropout_prob = params.attention_dropout_prob
+        layer_norm_eps = params.layer_norm_eps
+
+        in_channels = 3
         
         self.encoder_modules = nn.ModuleList()
-        for i in range(num_modules):
+        for blocks in stage_params:
+            num_blocks = blocks.num_blocks
+            sr_ratios = blocks.sr_ratios
+            hidden_sizes = blocks.hidden_sizes
+            embedding_patch_sizes = blocks.embedding_patch_sizes
+            embedding_strides = blocks.embedding_strides
+            num_attention_heads = blocks.num_attention_heads
+
             module = nn.ModuleDict(
                 {
                     'patch_embed': SegformerOverlapPatchEmbeddings(
-                        embedding_patch_sizes[i],
-                        embedding_strides[i],
-                        image_channels if i == 0 else hidden_sizes[i - 1],
-                        hidden_sizes[i]
+                        embedding_patch_sizes,
+                        embedding_strides,
+                        in_channels,
+                        hidden_sizes
                     ),
                     'encoder': SegformerEncoder(
-                        num_blocks[i],
-                        hidden_sizes[i],
-                        num_attention_heads[i],
+                        num_blocks,
+                        hidden_sizes,
+                        num_attention_heads,
                         attention_dropout_prob,
-                        sr_ratios[i],
+                        sr_ratios,
                         intermediate_ratio,
                         hidden_dropout_prob,
                         hidden_activation_type,
                         layer_norm_eps
                     ),
-                    'norm': nn.LayerNorm(hidden_sizes[i])
+                    'norm': nn.LayerNorm(hidden_sizes)
                 }
             )
             self.encoder_modules.append(module)
 
+            in_channels = hidden_sizes
+
     def forward(self, x):
         B = x.size(0)
         all_hidden_states = () if self.use_intermediate_features else None
@@ -194,4 +212,4 @@ def forward(self, x):
 
 
 def segformer(task, conf_model_backbone) -> SegformerEncoder:
-    return SegFormer(task, **conf_model_backbone)
+    return SegFormer(task, conf_model_backbone.params, conf_model_backbone.stage_params)
diff --git a/src/netspresso_trainer/models/backbones/experimental/vit.py b/src/netspresso_trainer/models/backbones/experimental/vit.py
index f5344f20..31cedd8a 100644
--- a/src/netspresso_trainer/models/backbones/experimental/vit.py
+++ b/src/netspresso_trainer/models/backbones/experimental/vit.py
@@ -3,8 +3,9 @@
 https://github.com/apple/ml-cvnets/blob/84d992f413e52c0468f86d23196efd9dad885e6f/cvnets/models/classification/vit.py
 """
 import argparse
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Dict, Optional, Tuple, Union, List
 
+from omegaconf import DictConfig
 import torch
 import torch.nn as nn
 
@@ -93,19 +94,21 @@ def __init__(self, num_blocks, hidden_size, num_attention_heads, attention_dropo
 class VisionTransformer(MetaFormer):
     def __init__(
         self,
-        task,
-        patch_size,
-        hidden_size,
-        num_blocks,
-        num_attention_heads,
-        attention_dropout_prob,
-        intermediate_size,
-        hidden_dropout_prob,
-        layer_norm_eps=1e-6,
-        use_cls_token=True,
-        vocab_size=1000,
-        **kwargs
+        task: str,
+        params: Optional[DictConfig] = None,
+        stage_params: Optional[List] = None,
     ) -> None:
+        patch_size = params.patch_size
+        hidden_size = params.hidden_size
+        num_blocks = params.num_blocks
+        num_attention_heads = params.num_attention_heads
+        attention_dropout_prob = params.attention_dropout_prob
+        intermediate_size = params.intermediate_size
+        hidden_dropout_prob = params.hidden_dropout_prob
+        layer_norm_eps = params.layer_norm_eps
+        use_cls_token = params.use_cls_token
+        vocab_size = params.vocab_size
+
         hidden_sizes = hidden_size if isinstance(hidden_size, list) else [hidden_size] * num_blocks
         super().__init__(hidden_sizes)
         self.task = task
@@ -119,4 +122,4 @@ def __init__(
 
 def vit(task, conf_model_backbone):
     # ViT tiny
-    return VisionTransformer(task, **conf_model_backbone)
\ No newline at end of file
+    return VisionTransformer(task, conf_model_backbone.params, conf_model_backbone.stage_params)
\ No newline at end of file
diff --git a/src/netspresso_trainer/models/base.py b/src/netspresso_trainer/models/base.py
index ec341eb2..a063dae5 100644
--- a/src/netspresso_trainer/models/base.py
+++ b/src/netspresso_trainer/models/base.py
@@ -7,7 +7,7 @@
 import torch.nn as nn
 from omegaconf import OmegaConf
 
-from .registry import MODEL_BACKBONE_DICT, MODEL_HEAD_DICT
+from .registry import MODEL_BACKBONE_DICT, MODEL_HEAD_DICT, MODEL_NECK_DICT
 from .utils import BackboneOutput, DetectionModelOutput, ModelOutput, load_from_checkpoint
 
 logger = logging.getLogger("netspresso_trainer")
@@ -22,18 +22,24 @@ def __init__(self, conf_model, task, backbone_name, head_name, num_classes, mode
         self.head_name = head_name
 
         backbone_fn: Callable[..., nn.Module] = MODEL_BACKBONE_DICT[backbone_name]
-        conf_model_backbone = OmegaConf.to_object(conf_model.architecture.backbone)
-        self.backbone: nn.Module = backbone_fn(task=self.task, conf_model_backbone=conf_model_backbone)
+        self.backbone: nn.Module = backbone_fn(task=self.task, conf_model_backbone=conf_model.architecture.backbone)
 
         self.backbone = load_from_checkpoint(self.backbone, model_checkpoint)
 
+        intermediate_features_dim = self.backbone.intermediate_features_dim
+        if getattr(conf_model.architecture, 'neck', None):
+            neck_name = conf_model.architecture.neck.name
+            neck_fn: Callable[..., nn.Module] = MODEL_NECK_DICT[neck_name]
+            self.neck = neck_fn(intermediate_features_dim=self.backbone.intermediate_features_dim)
+            intermediate_features_dim = self.neck.intermediate_features_dim
+
         head_module = MODEL_HEAD_DICT[self.task][head_name]
         if task == 'classification':
             self.head = head_module(num_classes=num_classes, feature_dim=self.backbone.feature_dim)
         elif task in ['segmentation', 'detection']:
             img_size = img_size if isinstance(img_size, (int, None)) else tuple(img_size)
             self.head = head_module(num_classes=num_classes,
-                                    intermediate_features_dim=self.backbone.intermediate_features_dim,
+                                    intermediate_features_dim=intermediate_features_dim,
                                     label_size=img_size)
 
         if freeze_backbone:
@@ -74,6 +80,8 @@ def __init__(self, conf_model, task, backbone_name, head_name, num_classes, mode
 
     def forward(self, x, label_size=None, targets=None):
         features: BackboneOutput = self.backbone(x)
+        if self.neck:
+            features: BackboneOutput = self.neck(features['intermediate_features'])
         out: ModelOutput = self.head(features['intermediate_features'])
         return out
 
@@ -85,5 +93,7 @@ def __init__(self, conf_model, task, backbone_name, head_name, num_classes, mode
 
     def forward(self, x, label_size=None, targets=None):
         features: BackboneOutput = self.backbone(x)
+        if self.neck:
+            features: BackboneOutput = self.neck(features['intermediate_features'])
         out: DetectionModelOutput = self.head(features['intermediate_features'])
         return out
diff --git a/src/netspresso_trainer/models/builder.py b/src/netspresso_trainer/models/builder.py
index 78e9d13f..6c9202a2 100644
--- a/src/netspresso_trainer/models/builder.py
+++ b/src/netspresso_trainer/models/builder.py
@@ -16,14 +16,14 @@
 
 def load_full_model(conf_model, model_name, num_classes, model_checkpoint):
     model_fn: Callable[..., nn.Module] = MODEL_FULL_DICT[model_name]
-    conf_model_full = OmegaConf.to_object(conf_model.architecture.full)
-    model: nn.Module = model_fn(num_classes=num_classes, conf_model_full=conf_model_full)
+    model: nn.Module = model_fn(num_classes=num_classes, conf_model_full=conf_model.architecture.full)
     model = load_from_checkpoint(model, model_checkpoint)
 
     return model
 
 
-def load_backbone_and_head_model(conf_model, task, backbone_name, head_name, num_classes, model_checkpoint, img_size, freeze_backbone):
+def load_backbone_and_head_model(
+        conf_model, task, backbone_name, head_name, num_classes, model_checkpoint, img_size, freeze_backbone):
     TASK_MODEL_DICT: Dict[str, Type[TaskModel]] = {
         'classification': ClassificationModel,
         'segmentation': SegmentationModel,
@@ -31,9 +31,11 @@ def load_backbone_and_head_model(conf_model, task, backbone_name, head_name, num
     }
 
     if task not in TASK_MODEL_DICT:
-        raise ValueError(f"No such task(s) named: {task}. This should be included in SUPPORTING_TASK_LIST ({SUPPORTING_TASK_LIST})")
+        raise ValueError(
+            f"No such task(s) named: {task}. This should be included in SUPPORTING_TASK_LIST ({SUPPORTING_TASK_LIST})")
 
-    return TASK_MODEL_DICT[task](conf_model, task, backbone_name, head_name, num_classes, model_checkpoint, img_size, freeze_backbone)
+    return TASK_MODEL_DICT[task](
+        conf_model, task, backbone_name, head_name, num_classes, model_checkpoint, img_size, freeze_backbone)
 
 
 def build_model(conf_model, task, num_classes, model_checkpoint, img_size) -> nn.Module:
@@ -45,4 +47,5 @@ def build_model(conf_model, task, num_classes, model_checkpoint, img_size) -> nn
     backbone_name = str(conf_model.architecture.backbone.name).lower()
     head_name = str(conf_model.architecture.head.name).lower()
     freeze_backbone = conf_model.freeze_backbone
-    return load_backbone_and_head_model(conf_model, task, backbone_name, head_name, num_classes, model_checkpoint, img_size, freeze_backbone)
+    return load_backbone_and_head_model(
+        conf_model, task, backbone_name, head_name, num_classes, model_checkpoint, img_size, freeze_backbone)
diff --git a/src/netspresso_trainer/models/full/experimental/pidnet.py b/src/netspresso_trainer/models/full/experimental/pidnet.py
index b3b630c9..10d3ac04 100644
--- a/src/netspresso_trainer/models/full/experimental/pidnet.py
+++ b/src/netspresso_trainer/models/full/experimental/pidnet.py
@@ -3,6 +3,7 @@
 # ------------------------------------------------------------------------------
 import logging
 import time
+from typing import Optional, List, Dict
 
 import torch
 import torch.nn as nn
@@ -17,8 +18,19 @@
 
 class PIDNet(nn.Module):
 
-    def __init__(self, num_classes=19, m=2, n=3, planes=64, ppm_planes=96, head_planes=128, is_training=True, **kwargs):
+    def __init__(
+            self, 
+            params: Optional[Dict] = None
+    ) -> None:
         super(PIDNet, self).__init__()
+        num_classes = params.num_classes
+        m = params.m
+        n = params.n
+        planes = params.planes
+        ppm_planes = params.ppm_planes
+        head_planes = params.head_planes
+        is_training = params.is_training
+
         self.is_training = is_training
 
         # I Branch
@@ -195,5 +207,6 @@ def forward(self, x: FXTensorType, label_size=None) -> PIDNetModelOutput:
 
 def pidnet(num_classes: int, conf_model_full) -> PIDNet:
     # PIDNet-S
-    return PIDNet(num_classes=num_classes, is_training=True, **conf_model_full)
-
+    conf_model_full.num_classes = num_classes
+    conf_model_full.is_training = True
+    return PIDNet(params=conf_model_full)
\ No newline at end of file
diff --git a/src/netspresso_trainer/models/heads/detection/__init__.py b/src/netspresso_trainer/models/heads/detection/__init__.py
index 455a00ee..8d362011 100644
--- a/src/netspresso_trainer/models/heads/detection/__init__.py
+++ b/src/netspresso_trainer/models/heads/detection/__init__.py
@@ -1,2 +1,2 @@
 from .experimental.faster_rcnn import faster_rcnn
-from .experimental.yolo_head import yolo_head
\ No newline at end of file
+from .experimental.yolox_head import yolox_head
\ No newline at end of file
diff --git a/src/netspresso_trainer/models/heads/detection/experimental/detection/generalized_rcnn.py b/src/netspresso_trainer/models/heads/detection/experimental/detection/generalized_rcnn.py
index 54095e8a..70436fcd 100644
--- a/src/netspresso_trainer/models/heads/detection/experimental/detection/generalized_rcnn.py
+++ b/src/netspresso_trainer/models/heads/detection/experimental/detection/generalized_rcnn.py
@@ -19,19 +19,15 @@ class GeneralizedRCNN(nn.Module):
             detections / masks from it.
     """
 
-    def __init__(self, neck:nn.Module, rpn: nn.Module, roi_heads: nn.Module, image_size: Tuple[int, int]) -> None:
+    def __init__(self, rpn: nn.Module, roi_heads: nn.Module, image_size: Tuple[int, int]) -> None:
         super().__init__()
         # _log_api_usage_once(self)
-        self.neck = neck
         self.rpn = rpn
         self.roi_heads = roi_heads
 
         self.image_size = image_size
 
     def forward(self, features: FXTensorListType) -> DetectionModelOutput:
-        if self.neck:
-            features = self.neck(features)
-
         features = {str(k): v for k, v in enumerate(features)}
         rpn_features = self.rpn(features, self.image_size)
         roi_features = self.roi_heads(features, rpn_features['boxes'], self.image_size)
diff --git a/src/netspresso_trainer/models/heads/detection/experimental/faster_rcnn.py b/src/netspresso_trainer/models/heads/detection/experimental/faster_rcnn.py
index 5127d4f2..6b9d46a0 100644
--- a/src/netspresso_trainer/models/heads/detection/experimental/faster_rcnn.py
+++ b/src/netspresso_trainer/models/heads/detection/experimental/faster_rcnn.py
@@ -2,7 +2,6 @@
 import torch.nn.functional as F
 
 from .detection import AnchorGenerator, RPNHead, RegionProposalNetwork, RoIHeads, GeneralizedRCNN, MultiScaleRoIAlign
-from .fpn import FPN
 
 IMAGE_SIZE = (512, 512) # TODO: Get from configuration
 
@@ -43,8 +42,6 @@ def __init__(
     ):
         assert fpn_num_outs == len(anchor_sizes)
 
-        neck = FPN(in_channels=intermediate_features_dim, out_channels=intermediate_features_dim[-1], num_outs=fpn_num_outs)
-
         out_channels = intermediate_features_dim[-1]
 
         aspect_ratios = (aspect_ratios,) * len(anchor_sizes)
@@ -65,7 +62,7 @@ def __init__(
             score_thresh=rpn_score_thresh,
         )
 
-        featmap_names = [str(i) for i in range(neck.num_outs)]
+        featmap_names = [str(i) for i in range(len(intermediate_features_dim))]
         box_roi_pool = MultiScaleRoIAlign(featmap_names=featmap_names, output_size=roi_output_size, sampling_ratio=roi_sampling_ratio)
 
         box_head = TwoMLPHead(out_channels * roi_output_size**2, roi_representation_size)
@@ -87,7 +84,7 @@ def __init__(
             box_detections_per_img,
         )
 
-        super().__init__(neck, rpn, roi_heads, IMAGE_SIZE)
+        super().__init__(rpn, roi_heads, IMAGE_SIZE)
 
 
 class TwoMLPHead(nn.Module):
diff --git a/src/netspresso_trainer/models/heads/detection/experimental/yolo_head.py b/src/netspresso_trainer/models/heads/detection/experimental/yolox_head.py
similarity index 95%
rename from src/netspresso_trainer/models/heads/detection/experimental/yolo_head.py
rename to src/netspresso_trainer/models/heads/detection/experimental/yolox_head.py
index 9376a2d5..7135775b 100644
--- a/src/netspresso_trainer/models/heads/detection/experimental/yolo_head.py
+++ b/src/netspresso_trainer/models/heads/detection/experimental/yolox_head.py
@@ -6,7 +6,7 @@
 import torch.nn as nn
 
 from ....op.custom import ConvLayer
-from .fpn import PAFPN
+from ....utils import ModelOutput
 
 
 class YOLOXHead(nn.Module):
@@ -25,8 +25,6 @@ def __init__(
 
         self.num_classes = num_classes
 
-        self.neck = PAFPN(in_channels=intermediate_features_dim, act_type=act_type)
-
         self.cls_convs = nn.ModuleList()
         self.reg_convs = nn.ModuleList()
         self.cls_preds = nn.ModuleList()
@@ -116,7 +114,6 @@ def __init__(
 
     def forward(self, xin):
         outputs = []
-        xin = self.neck(xin)
 
         for k, (cls_conv, reg_conv, x) in enumerate(zip(self.cls_convs, self.reg_convs, xin)):
             x = self.stems[k](x)
@@ -134,10 +131,10 @@ def forward(self, xin):
 
             outputs.append(output)
 
-        return outputs
+        return ModelOutput(pred=outputs)
 
 
-def yolo_head(num_classes, intermediate_features_dim, **kwargs):
+def yolox_head(num_classes, intermediate_features_dim, **kwargs):
     configuration = {
         'act_type': 'silu',
     }
diff --git a/src/netspresso_trainer/models/necks/__init__.py b/src/netspresso_trainer/models/necks/__init__.py
new file mode 100644
index 00000000..dfbeec68
--- /dev/null
+++ b/src/netspresso_trainer/models/necks/__init__.py
@@ -0,0 +1,2 @@
+from .experimental.fpn import fpn
+from .experimental.pafpn import pafpn
diff --git a/src/netspresso_trainer/models/necks/core/.gitkeep b/src/netspresso_trainer/models/necks/core/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/src/netspresso_trainer/models/necks/core/__init__.py b/src/netspresso_trainer/models/necks/core/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/netspresso_trainer/models/necks/experimental/__init__.py b/src/netspresso_trainer/models/necks/experimental/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/netspresso_trainer/models/heads/detection/experimental/fpn.py b/src/netspresso_trainer/models/necks/experimental/fpn.py
similarity index 64%
rename from src/netspresso_trainer/models/heads/detection/experimental/fpn.py
rename to src/netspresso_trainer/models/necks/experimental/fpn.py
index 27de479c..16ca0717 100644
--- a/src/netspresso_trainer/models/heads/detection/experimental/fpn.py
+++ b/src/netspresso_trainer/models/necks/experimental/fpn.py
@@ -1,8 +1,7 @@
-import torch
 import torch.nn as nn
 import torch.nn.functional as F
 
-from ....op.custom import ConvLayer, CSPLayer
+from ...utils import BackboneOutput
 
 
 class FPN(nn.Module):
@@ -92,6 +91,8 @@ def __init__(self,
                 extra_fpn_conv = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=2, padding=1)
                 self.fpn_convs.append(extra_fpn_conv)
 
+        self._intermediate_features_dim = [out_channels for _ in range(num_outs)]
+
     def forward(self, inputs):
         """Forward function."""
         assert len(inputs) == len(self.in_channels)
@@ -144,118 +145,28 @@ def forward(self, inputs):
                         outs.append(self.fpn_convs[i](F.relu(outs[-1])))
                     else:
                         outs.append(self.fpn_convs[i](outs[-1]))
-        return outs
-
-
-class PAFPN(nn.Module):
-    """
-    YOLOv3 model. Darknet 53 is the default backbone of this model.
-    """
-
-    def __init__(
-        self,
-        in_channels,
-        act_type="silu",
-    ):
-        super().__init__()
-        
-        self.in_channels = in_channels
-        Conv = ConvLayer
-
-        # TODO: Get from config
-        depth = 0.33
-
-        self.upsample = nn.Upsample(scale_factor=2, mode="nearest")
-        self.lateral_conv0 = ConvLayer(
-            in_channels=int(in_channels[2]), 
-            out_channels=int(in_channels[1]), 
-            kernel_size=1, 
-            stride=1,
-            act_type=act_type
-        )
-        self.C3_p4 = CSPLayer(
-            in_channels=int(2 * in_channels[1]),
-            out_channels=int(in_channels[1]),
-            n=round(3 * depth),
-            shortcut=False,
-            act_type=act_type,
-        )  # cat
-
-        self.reduce_conv1 = ConvLayer(
-            in_channels=int(in_channels[1]), 
-            out_channels=int(in_channels[0]), 
-            kernel_size=1, 
-            stride=1, 
-            act_type=act_type
-        )
-        self.C3_p3 = CSPLayer(
-            in_channels=int(2 * in_channels[0]),
-            out_channels=int(in_channels[0]),
-            n=round(3 * depth),
-            shortcut=False,
-            act_type=act_type,
-        )
-
-        # bottom-up conv
-        self.bu_conv2 = Conv(
-            in_channels=int(in_channels[0]), 
-            out_channels=int(in_channels[0]), 
-            kernel_size=3, 
-            stride=2, 
-            act_type=act_type
-        )
-        self.C3_n3 = CSPLayer(
-            in_channels=int(2 * in_channels[0]),
-            out_channels=int(in_channels[1]),
-            n=round(3 * depth),
-            shortcut=False,
-            act_type=act_type,
-        )
-
-        # bottom-up conv
-        self.bu_conv1 = Conv(
-            in_channels=int(in_channels[1]), 
-            out_channels=int(in_channels[1]), 
-            kernel_size=3, 
-            stride=2, 
-            act_type=act_type
-        )
-        self.C3_n4 = CSPLayer(
-            in_channels=int(2 * in_channels[1]),
-            out_channels=int(in_channels[2]),
-            n=round(3 * depth),
-            shortcut=False,
-            act_type=act_type,
-        )
-
-    def forward(self, inputs):
-        """
-        Args:
-            inputs: input images.
-
-        Returns:
-            Tuple[Tensor]: FPN feature.
-        """
-
-        [x2, x1, x0] = inputs
-
-        fpn_out0 = self.lateral_conv0(x0)  # 1024->512/32
-        f_out0 = self.upsample(fpn_out0)  # 512/16
-        f_out0 = torch.cat([f_out0, x1], 1)  # 512->1024/16
-        f_out0 = self.C3_p4(f_out0)  # 1024->512/16
-
-        fpn_out1 = self.reduce_conv1(f_out0)  # 512->256/16
-        f_out1 = self.upsample(fpn_out1)  # 256/8
-        f_out1 = torch.cat([f_out1, x2], 1)  # 256->512/8
-        pan_out2 = self.C3_p3(f_out1)  # 512->256/8
-
-        p_out1 = self.bu_conv2(pan_out2)  # 256->256/16
-        p_out1 = torch.cat([p_out1, fpn_out1], 1)  # 256->512/16
-        pan_out1 = self.C3_n3(p_out1)  # 512->512/16
-
-        p_out0 = self.bu_conv1(pan_out1)  # 512->512/32
-        p_out0 = torch.cat([p_out0, fpn_out0], 1)  # 512->1024/32
-        pan_out0 = self.C3_n4(p_out0)  # 1024->1024/32
-
-        outputs = (pan_out2, pan_out1, pan_out0)
-        return outputs
+        return BackboneOutput(intermediate_features=outs)
+    
+    @property
+    def intermediate_features_dim(self):
+        return self._intermediate_features_dim
+
+
+def fpn(intermediate_features_dim, **kwargs):
+    configuration = {
+        'num_outs': 4,
+        'start_level': 0,
+        'end_level': -1,
+        'add_extra_convs': False,
+        'relu_before_extra_convs': False,
+        'no_norm_on_lateral': False,
+        'conv_cfg': None,
+        'norm_cfg': None,
+        'act_cfg': None,
+        'upsample_cfg': None,
+        'init_cfg': None
+    }
+
+    return FPN(in_channels=intermediate_features_dim,
+               out_channels=intermediate_features_dim[-1],
+               **configuration)
diff --git a/src/netspresso_trainer/models/necks/experimental/pafpn.py b/src/netspresso_trainer/models/necks/experimental/pafpn.py
new file mode 100644
index 00000000..50de7228
--- /dev/null
+++ b/src/netspresso_trainer/models/necks/experimental/pafpn.py
@@ -0,0 +1,132 @@
+import torch
+import torch.nn as nn
+
+from ...op.custom import ConvLayer, CSPLayer
+from ...utils import BackboneOutput
+
+
+class PAFPN(nn.Module):
+    """
+    YOLOv3 model. Darknet 53 is the default backbone of this model.
+    """
+
+    def __init__(
+        self,
+        in_channels,
+        act_type="silu",
+    ):
+        super().__init__()
+        
+        self.in_channels = in_channels
+        Conv = ConvLayer
+
+        # TODO: Get from config
+        depth = 0.33
+
+        self.upsample = nn.Upsample(scale_factor=2, mode="nearest")
+        self.lateral_conv0 = ConvLayer(
+            in_channels=int(in_channels[2]), 
+            out_channels=int(in_channels[1]), 
+            kernel_size=1, 
+            stride=1,
+            act_type=act_type
+        )
+        self.C3_p4 = CSPLayer(
+            in_channels=int(2 * in_channels[1]),
+            out_channels=int(in_channels[1]),
+            n=round(3 * depth),
+            shortcut=False,
+            act_type=act_type,
+        )  # cat
+
+        self.reduce_conv1 = ConvLayer(
+            in_channels=int(in_channels[1]), 
+            out_channels=int(in_channels[0]), 
+            kernel_size=1, 
+            stride=1, 
+            act_type=act_type
+        )
+        self.C3_p3 = CSPLayer(
+            in_channels=int(2 * in_channels[0]),
+            out_channels=int(in_channels[0]),
+            n=round(3 * depth),
+            shortcut=False,
+            act_type=act_type,
+        )
+
+        # bottom-up conv
+        self.bu_conv2 = Conv(
+            in_channels=int(in_channels[0]), 
+            out_channels=int(in_channels[0]), 
+            kernel_size=3, 
+            stride=2, 
+            act_type=act_type
+        )
+        self.C3_n3 = CSPLayer(
+            in_channels=int(2 * in_channels[0]),
+            out_channels=int(in_channels[1]),
+            n=round(3 * depth),
+            shortcut=False,
+            act_type=act_type,
+        )
+
+        # bottom-up conv
+        self.bu_conv1 = Conv(
+            in_channels=int(in_channels[1]), 
+            out_channels=int(in_channels[1]), 
+            kernel_size=3, 
+            stride=2, 
+            act_type=act_type
+        )
+        self.C3_n4 = CSPLayer(
+            in_channels=int(2 * in_channels[1]),
+            out_channels=int(in_channels[2]),
+            n=round(3 * depth),
+            shortcut=False,
+            act_type=act_type,
+        )
+
+        self._intermediate_features_dim = in_channels
+
+    def forward(self, inputs):
+        """
+        Args:
+            inputs: input images.
+
+        Returns:
+            Tuple[Tensor]: FPN feature.
+        """
+
+        [x2, x1, x0] = inputs
+
+        fpn_out0 = self.lateral_conv0(x0)  # 1024->512/32
+        f_out0 = self.upsample(fpn_out0)  # 512/16
+        f_out0 = torch.cat([f_out0, x1], 1)  # 512->1024/16
+        f_out0 = self.C3_p4(f_out0)  # 1024->512/16
+
+        fpn_out1 = self.reduce_conv1(f_out0)  # 512->256/16
+        f_out1 = self.upsample(fpn_out1)  # 256/8
+        f_out1 = torch.cat([f_out1, x2], 1)  # 256->512/8
+        pan_out2 = self.C3_p3(f_out1)  # 512->256/8
+
+        p_out1 = self.bu_conv2(pan_out2)  # 256->256/16
+        p_out1 = torch.cat([p_out1, fpn_out1], 1)  # 256->512/16
+        pan_out1 = self.C3_n3(p_out1)  # 512->512/16
+
+        p_out0 = self.bu_conv1(pan_out1)  # 512->512/32
+        p_out0 = torch.cat([p_out0, fpn_out0], 1)  # 512->1024/32
+        pan_out0 = self.C3_n4(p_out0)  # 1024->1024/32
+
+        outputs = (pan_out2, pan_out1, pan_out0)
+        return BackboneOutput(intermediate_features=outputs)
+    
+    @property
+    def intermediate_features_dim(self):
+        return self._intermediate_features_dim
+
+def pafpn(intermediate_features_dim, **kwargs):
+    configuration = {
+        'act_type': 'silu',
+    }
+
+    return PAFPN(in_channels=intermediate_features_dim, **configuration)
diff --git a/src/netspresso_trainer/models/op/base_metaformer.py b/src/netspresso_trainer/models/op/base_metaformer.py
index 5a23d5cb..65237e74 100644
--- a/src/netspresso_trainer/models/op/base_metaformer.py
+++ b/src/netspresso_trainer/models/op/base_metaformer.py
@@ -53,7 +53,7 @@ def __init__(
         attention_bias_resolution = 16,
     ) -> None:
         super().__init__()
-        
+
         attention_hidden_size = attention_hidden_size if attention_hidden_size is not None else hidden_size
         value_hidden_size = value_hidden_size if value_hidden_size is not None else attention_hidden_size
 
@@ -62,17 +62,17 @@ def __init__(
                 f"The hidden size {attention_hidden_size,} is not a multiple of the number of attention "
                 f"heads {num_attention_heads}."
             )
-            
+
         if value_hidden_size % num_attention_heads != 0:
             raise ValueError(
                 f"The hidden size {value_hidden_size,} is not a multiple of the number of attention "
                 f"heads {num_attention_heads}."
             )
-        
+
         self.num_attention_heads = num_attention_heads
         self.attention_head_size = int(attention_hidden_size / num_attention_heads)
         self.value_attention_head_size = int(value_hidden_size / num_attention_heads)
-        
+
         self.head_size = self.num_attention_heads * self.attention_head_size
         self.value_head_size = self.num_attention_heads * self.value_attention_head_size
         self.attention_scale = attention_scale if attention_scale is not None \
@@ -82,7 +82,7 @@ def __init__(
         self.query = nn.Linear(hidden_size, self.head_size, bias=use_qkv_bias)  # ... x C -> ... x C_qk
         self.key = nn.Linear(hidden_size, self.head_size, bias=use_qkv_bias)  # ... x C -> ... x C_qk
         self.value = nn.Linear(hidden_size, self.value_head_size, bias=use_qkv_bias)  # ... x C -> ... x C_v
-        
+
         self.linear = nn.Linear(self.value_head_size, hidden_size)  # ... x C_v -> ... x C
 
         self.dropout = nn.Dropout(attention_dropout_prob)
@@ -118,14 +118,14 @@ def __init__(
         #         torch.zeros(self.num_attention_heads, len(attention_offsets)))
         #     self.register_buffer('attention_bias_idxs_seg',
         #                          torch.LongTensor(idxs).view(N, N))
-    
+
         self.use_cross_attention = use_cross_attention
 
     def transpose_for_scores(self, x: Tensor, attention_head_size: int) -> Tensor:
         new_x_shape = x.size()[:-1] + (self.num_attention_heads, attention_head_size)
         x = x.view(new_x_shape)
         return x.permute(0, 2, 1, 3)
-    
+
     def sequence_reduce(self, x: Tensor, height: int, width: int) -> Tensor:
         """SegFormer
         """
@@ -167,7 +167,7 @@ def forward(
         """
 
         mixed_query_layer = self.query(query_states)  # B x S_s x C_qk
-        
+
         if not self.use_cross_attention:  # Self-attention
             key_value_states = query_states  # B x S_t(=S_s) x C_qk
         if self.use_sequence_reduction:
@@ -180,7 +180,7 @@ def forward(
         attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))  # B x {head} x S_s x S_t
 
         attention_scores = attention_scores / self.attention_scale  # B x {head} x S_s x S_t
-        
+
         if self.use_attention_bias:
             bias = self.attention_biases[:, self.attention_bias_idxs]
             bias = nn.functional.interpolate(bias.unsqueeze(0), size=(attention_scores.size(-2), attention_scores.size(-1)), mode='bicubic')
@@ -199,15 +199,15 @@ def forward(
         context_layer = context_layer.permute(0, 2, 1, 3).contiguous()  # B x S_s x {head} x C_vsplit
         new_context_layer_shape = context_layer.size()[:-2] + (self.value_head_size,)
         context_layer = context_layer.view(new_context_layer_shape)  # B x S_s x C_v
-        
+
         context_layer = self.linear(context_layer)  # B x S_s x C
         context_layer = self.dropout(context_layer)  # B x S_s x C
 
         if self.output_with_attentions:
             return (context_layer, attention_probs)
-        
+
         return context_layer  # B x S_s x C
-    
+
 class ChannelMLP(nn.Module):
     def __init__(self, hidden_size, intermediate_size, hidden_dropout_prob, hidden_activation_type='silu'):
         super().__init__()
@@ -218,7 +218,7 @@ def __init__(self, hidden_size, intermediate_size, hidden_dropout_prob, hidden_a
         self.ffn.add_module('dense2', nn.Linear(in_features=intermediate_size, out_features=hidden_size, bias=True))
 
         self.dropout = nn.Dropout(p=hidden_dropout_prob)
-    
+
     def forward(self, x):
         x = self.ffn(x)
         x = self.dropout(x)
@@ -231,20 +231,20 @@ def __init__(self, hidden_size, layer_norm_eps) -> None:
         self.layernorm_after = nn.LayerNorm(hidden_size)
         self.token_mixer = nn.Identity()  # MultiHeadAttention()
         self.channel_mlp = nn.Identity()  # ChannelMLP()
-    
+
     def forward(self, x):
         out_token_mixer = self.layernorm_before(x)
         out_token_mixer = self.token_mixer(out_token_mixer)
-        
+
         out_token_mixer = out_token_mixer + x
-        
+
         out_final = self.layernorm_after(out_token_mixer)
         out_final = self.channel_mlp(out_final)
-        
+
         out_final = out_final + out_token_mixer
-        
+
         return out_final
-    
+
 class MetaFormerEncoder(nn.Module):
     def __init__(self) -> None:
         super().__init__()
@@ -252,7 +252,7 @@ def __init__(self) -> None:
         # self.blocks = nn.Sequential(
         #     *[MetaFormerBlock(hidden_size, layer_norm_eps) for _ in range(num_layers)]
         # )
-    
+
     def forward(self, x):
         x = self.blocks(x)
         return x
@@ -262,7 +262,7 @@ def __init__(self, hidden_sizes) -> None:
         super().__init__()
         self._feature_dim = hidden_sizes[-1]
         self._intermediate_features_dim = hidden_sizes
-        
+
         self.patch_embed = nn.Identity()
         self.encoder = MetaFormerEncoder()
         self.norm = nn.Identity()
@@ -270,14 +270,14 @@ def __init__(self, hidden_sizes) -> None:
     @property
     def feature_dim(self):
         return self._feature_dim
-    
+
     @property
     def intermediate_features_dim(self):
         return self._intermediate_features_dim
-    
+
     def forward(self, x: FXTensorType):
         x = self.patch_embed(x)
         x = self.encoder(x)
         x = self.norm(x)
         feat = torch.mean(x, dim=1)
-        return BackboneOutput(last_feature=feat)
\ No newline at end of file
+        return BackboneOutput(last_feature=feat)
diff --git a/src/netspresso_trainer/models/op/custom.py b/src/netspresso_trainer/models/op/custom.py
index 78715625..7dd1752c 100644
--- a/src/netspresso_trainer/models/op/custom.py
+++ b/src/netspresso_trainer/models/op/custom.py
@@ -296,9 +296,9 @@ def __init__(
         # project
         layers.append(
             ConvLayer(
-                in_channels=hidden_channels, 
-                out_channels=out_channels, 
-                kernel_size=1, 
+                in_channels=hidden_channels,
+                out_channels=out_channels,
+                kernel_size=1,
                 norm_type=norm_type,
                 use_act=False
             )
@@ -365,7 +365,7 @@ def __init__(
 
         self.patch_dim = patch_dim
         self.register_buffer("pe", pos_encoding)
-        
+
     def forward_patch_last(
         self, x, indices: Optional[Tensor] = None, *args, **kwargs
     ) -> Tensor:
@@ -385,8 +385,8 @@ def forward_others(
         self, x, indices: Optional[Tensor] = None, *args, **kwargs
     ) -> Tensor:
         # seq_length should be the second last dim
-        
-        # @deepkyu: [fx tracing] Always `indices` is None 
+
+        # @deepkyu: [fx tracing] Always `indices` is None
         # if indices is None:
         #     x = x + self.pe[..., : x.shape[-2], :]
         # else:
@@ -396,10 +396,10 @@ def forward_others(
         #     pe = self.pe.expand(repeat_size)
         #     selected_pe = torch.gather(pe, index=indices, dim=-2)
         #     x = x + selected_pe
-        
+
         # x = x + self.pe[..., :seq_index, :]
         x = x + tensor_slice(self.pe, dim=1, index=x.shape[-2])
-        
+
         return x
 
     def forward(self, x, indices: Optional[Tensor] = None, *args, **kwargs) -> Tensor:
@@ -480,7 +480,7 @@ def forward(self, x: Tensor) -> Tensor:
         #     dims = [-3, -2, -1]
         # else:
         #     raise NotImplementedError("Currently 2D and 3D global pooling supported")
-        
+
         return self._global_pool(x, dims=(-2, -1))
 
     # def profile_module(self, input: Tensor) -> Tuple[Tensor, float, float]:
@@ -497,9 +497,9 @@ class Focus(nn.Module):
     def __init__(self, in_channels, out_channels, ksize=1, stride=1, act_type="silu"):
         super().__init__()
         self.conv = ConvLayer(in_channels=in_channels * 4,
-                              out_channels=out_channels, 
-                              kernel_size=ksize, 
-                              stride=stride, 
+                              out_channels=out_channels,
+                              kernel_size=ksize,
+                              stride=stride,
                               act_type=act_type)
 
     def forward(self, x):
@@ -542,25 +542,25 @@ def __init__(
         # ch_in, ch_out, number, shortcut, groups, expansion
         super().__init__()
         hidden_channels = int(out_channels * expansion)  # hidden channels
-        self.conv1 = ConvLayer(in_channels=in_channels, 
+        self.conv1 = ConvLayer(in_channels=in_channels,
                                out_channels=hidden_channels,
-                               kernel_size=1, 
+                               kernel_size=1,
                                stride=1, act_type=act_type)
         self.conv2 = ConvLayer(in_channels=in_channels,
-                              out_channels=hidden_channels, 
-                              kernel_size=1, 
+                              out_channels=hidden_channels,
+                              kernel_size=1,
                               stride=1, act_type=act_type)
-        self.conv3 = ConvLayer(in_channels=2 * hidden_channels, 
-                               out_channels=out_channels, 
-                               kernel_size=1, 
+        self.conv3 = ConvLayer(in_channels=2 * hidden_channels,
+                               out_channels=out_channels,
+                               kernel_size=1,
                                stride=1, act_type=act_type)
-        
+
         block = DarknetBlock
 
         module_list = [
             block(
-                in_channels=hidden_channels, 
-                out_channels=hidden_channels, 
+                in_channels=hidden_channels,
+                out_channels=hidden_channels,
                 shortcut=shortcut,
                 expansion=1.0,
                 act_type=act_type
@@ -585,7 +585,7 @@ def __init__(
     ):
         super().__init__()
         hidden_channels = in_channels // 2
-        self.conv1 = ConvLayer(in_channels=in_channels, out_channels=hidden_channels, 
+        self.conv1 = ConvLayer(in_channels=in_channels, out_channels=hidden_channels,
                                kernel_size=1, stride=1, act_type=act_type)
         self.m = nn.ModuleList(
             [
@@ -594,7 +594,7 @@ def __init__(
             ]
         )
         conv2_channels = hidden_channels * (len(kernel_sizes) + 1)
-        self.conv2 = ConvLayer(in_channels=conv2_channels, out_channels=out_channels, 
+        self.conv2 = ConvLayer(in_channels=conv2_channels, out_channels=out_channels,
                                kernel_size=1, stride=1, act_type=act_type)
 
     def forward(self, x):
@@ -618,9 +618,9 @@ def __init__(
     ):
         super().__init__()
         hidden_channels = int(out_channels * expansion)
-        self.conv1 = ConvLayer(in_channels=in_channels, out_channels=hidden_channels, 
+        self.conv1 = ConvLayer(in_channels=in_channels, out_channels=hidden_channels,
                                 kernel_size=1, stride=1, act_type=act_type)
-        self.conv2 = ConvLayer(in_channels=hidden_channels, out_channels=out_channels, 
+        self.conv2 = ConvLayer(in_channels=hidden_channels, out_channels=out_channels,
                                 kernel_size=3, stride=1, act_type=act_type)
         self.use_add = shortcut and in_channels == out_channels
 
diff --git a/src/netspresso_trainer/models/op/depth.py b/src/netspresso_trainer/models/op/depth.py
index 9c626bb7..276527d3 100644
--- a/src/netspresso_trainer/models/op/depth.py
+++ b/src/netspresso_trainer/models/op/depth.py
@@ -31,4 +31,4 @@ def __init__(self, drop_prob: float = 0., scale_by_keep: bool = True):
         self.scale_by_keep = scale_by_keep
 
     def forward(self, x):
-        return drop_path(x, self.drop_prob, self.training, self.scale_by_keep)
\ No newline at end of file
+        return drop_path(x, self.drop_prob, self.training, self.scale_by_keep)
diff --git a/src/netspresso_trainer/models/op/registry.py b/src/netspresso_trainer/models/op/registry.py
index 4666e9f5..10c6f8cb 100644
--- a/src/netspresso_trainer/models/op/registry.py
+++ b/src/netspresso_trainer/models/op/registry.py
@@ -16,4 +16,4 @@
     'silu': nn.SiLU,
     'swish': nn.SiLU,
     'hard_swish': nn.Hardswish,
-}
\ No newline at end of file
+}
diff --git a/src/netspresso_trainer/models/registry.py b/src/netspresso_trainer/models/registry.py
index 59ee6cda..999ed517 100644
--- a/src/netspresso_trainer/models/registry.py
+++ b/src/netspresso_trainer/models/registry.py
@@ -3,20 +3,27 @@
 
 import torch.nn as nn
 
-from .backbones import cspdarknet, efficientformer, mobilenetv3_small, mobilevit, resnet50, segformer, vit
+from .backbones import cspdarknet, efficientformer, mixnet, mobilenetv3, mobilevit, resnet, segformer, vit
 from .full import pidnet
 from .heads.classification import fc
-from .heads.detection import faster_rcnn, yolo_head
+from .heads.detection import faster_rcnn, yolox_head
 from .heads.segmentation import all_mlp_decoder
+from .necks import fpn, pafpn
 
 MODEL_BACKBONE_DICT: Dict[str, Callable[..., nn.Module]] = {
-    'resnet50': resnet50,
-    'mobilenetv3_small': mobilenetv3_small,
+    'resnet': resnet,
+    'mobilenetv3': mobilenetv3,
     'segformer': segformer,
     'mobilevit': mobilevit,
     'vit': vit,
     'efficientformer': efficientformer,
-    'cspdarknet': cspdarknet
+    'cspdarknet': cspdarknet,
+    'mixnet': mixnet,
+}
+
+MODEL_NECK_DICT: Dict[str, Callable[..., nn.Module]] = {
+    'fpn': fpn,
+    'pafpn': pafpn,
 }
 
 MODEL_HEAD_DICT: Dict[str, Callable[..., nn.Module]] = {
@@ -28,7 +35,7 @@
     },
     'detection': {
         'faster_rcnn': faster_rcnn,
-        'yolo_head': yolo_head
+        'yolox_head': yolox_head
     },
 }
 
diff --git a/src/netspresso_trainer/models/utils.py b/src/netspresso_trainer/models/utils.py
index 0307eb6a..aa34c6a5 100644
--- a/src/netspresso_trainer/models/utils.py
+++ b/src/netspresso_trainer/models/utils.py
@@ -13,6 +13,18 @@
 FXTensorType = Union[Tensor, Proxy]
 FXTensorListType = Union[List[Tensor], List[Proxy]]
 
+MODEL_CHECKPOINT_URL_DICT = {
+    'resnet50': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/resnet/resnet50.pth",
+    'mobilenet_v3_small': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/mobilenetv3/mobilenet_v3_small.pth",
+    'segformer': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/segformer/segformer.pth",
+    'mobilevit': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/mobilevit/mobilevit_s.pth",
+    'vit': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/vit/vit-tiny.pth",
+    'efficientformer': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/efficientformer/efficientformer_l1_1000d.pth",
+    'mixnet_s': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/mixnet/mixnet_s.pth",
+    'mixnet_m': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/mixnet/mixnet_m.pth",
+    'mixnet_l': "https://netspresso-trainer-public.s3.ap-northeast-2.amazonaws.com/checkpoint/mixnet/mixnet_l.pth",
+}
+
 
 class BackboneOutput(TypedDict):
     intermediate_features: Optional[FXTensorListType]
@@ -43,8 +55,29 @@ class PIDNetModelOutput(ModelOutput):
     extra_d: Optional[FXTensorType]
 
 
-def load_from_checkpoint(model: nn.Module, model_checkpoint: Optional[Union[str, Path]]) -> nn.Module:
+def download_model_checkpoint(model_checkpoint: Union[str, Path], model_name: str) -> Path:
+    checkpoint_url = MODEL_CHECKPOINT_URL_DICT[model_name]
+    model_checkpoint = Path(model_checkpoint)
+    model_checkpoint.parent.mkdir(parents=True, exist_ok=True)
+    # Safer switch: only extension, user can use the custom name for checkpoint file
+    model_checkpoint = model_checkpoint.with_suffix(Path(checkpoint_url).suffix)
+    if not model_checkpoint.exists():
+        torch.hub.download_url_to_file(checkpoint_url, model_checkpoint)
+
+    return model_checkpoint
+
+
+def load_from_checkpoint(
+    model: nn.Module,
+    model_checkpoint: Optional[Union[str, Path]]
+) -> nn.Module:
     if model_checkpoint is not None:
+        if not Path(model_checkpoint).exists():
+            model_name = Path(model_checkpoint).stem
+            assert model_name in MODEL_CHECKPOINT_URL_DICT, \
+                f"model_name {model_name} in path {model_checkpoint} is not valid name!"
+            model_checkpoint = download_model_checkpoint(model_checkpoint, model_name)
+
         model_state_dict = torch.load(model_checkpoint, map_location='cpu')
         missing_keys, unexpected_keys = model.load_state_dict(model_state_dict, strict=False)
 
diff --git a/src/netspresso_trainer/optimizers/__init__.py b/src/netspresso_trainer/optimizers/__init__.py
index b1b64e76..871ebc6e 100644
--- a/src/netspresso_trainer/optimizers/__init__.py
+++ b/src/netspresso_trainer/optimizers/__init__.py
@@ -1 +1 @@
-from .builder import build_optimizer
\ No newline at end of file
+from .builder import build_optimizer
diff --git a/src/netspresso_trainer/optimizers/builder.py b/src/netspresso_trainer/optimizers/builder.py
index 3d2d2bf6..460af3fe 100644
--- a/src/netspresso_trainer/optimizers/builder.py
+++ b/src/netspresso_trainer/optimizers/builder.py
@@ -21,7 +21,7 @@ def build_optimizer(
                       'adam', 'adamw', 'adamax',
                       'adadelta', 'adagrad', 'rmsprop'] = opt.lower()
     assert opt_name in OPTIMIZER_DICT
-    
+
     conf_optim = {'weight_decay': wd, 'lr': lr}
 
     if opt_name in ['sgd', 'nesterov', 'momentum', 'rmsprop']:
@@ -32,7 +32,7 @@ def build_optimizer(
         conf_optim.update({'nesterov': True})
     if opt_name in ['momentum']:
         conf_optim.update({'nesterov': False})
-        
+
     optimizer = OPTIMIZER_DICT[opt_name](parameters, **conf_optim)
 
     return optimizer
diff --git a/src/netspresso_trainer/optimizers/registry.py b/src/netspresso_trainer/optimizers/registry.py
index f4c3fc22..4f1ff759 100644
--- a/src/netspresso_trainer/optimizers/registry.py
+++ b/src/netspresso_trainer/optimizers/registry.py
@@ -14,4 +14,4 @@
     'sgd': optim.SGD,
     'nesterov': optim.SGD,
     'momentum': optim.SGD,
-}
\ No newline at end of file
+}
diff --git a/src/netspresso_trainer/pipelines/base.py b/src/netspresso_trainer/pipelines/base.py
index f43fb2dd..58dd7aa6 100644
--- a/src/netspresso_trainer/pipelines/base.py
+++ b/src/netspresso_trainer/pipelines/base.py
@@ -15,6 +15,7 @@
 from ..losses import build_losses
 from ..metrics import build_metrics
 from ..optimizers import build_optimizer
+from ..postprocessors import build_postprocessor
 from ..schedulers import build_scheduler
 from ..utils.fx import save_graphmodule
 from ..utils.logger import yaml_for_logging
@@ -87,6 +88,7 @@ def set_train(self):
         self.scheduler, _ = build_scheduler(self.optimizer, self.conf.training)
         self.loss_factory = build_losses(self.conf.model, ignore_index=self.ignore_index)
         self.metric_factory = build_metrics(self.task, self.conf.model, ignore_index=self.ignore_index, num_classes=self.num_classes)
+        self.postprocessor = build_postprocessor(self.task, self.conf.model)
         resume_optimizer_checkpoint = self.conf.model.resume_optimizer_checkpoint
         if resume_optimizer_checkpoint is not None:
             resume_optimizer_checkpoint = Path(resume_optimizer_checkpoint)
diff --git a/src/netspresso_trainer/pipelines/builder.py b/src/netspresso_trainer/pipelines/builder.py
index 4773261c..18698855 100644
--- a/src/netspresso_trainer/pipelines/builder.py
+++ b/src/netspresso_trainer/pipelines/builder.py
@@ -9,9 +9,9 @@ def build_pipeline(conf, task, model_name, model, devices, train_dataloader, eva
         task_ = "detection-two-stage" if conf.model.architecture.head.name in ["faster_rcnn"] else "detection-one-stage"
 
     task_pipeline = TASK_PIPELINE[task_]
-    
+
     trainer = task_pipeline(conf, task, model_name, model, devices,
                             train_dataloader, eval_dataloader, class_map,
                             is_graphmodule_training=is_graphmodule_training, profile=profile)
 
-    return trainer
\ No newline at end of file
+    return trainer
diff --git a/src/netspresso_trainer/pipelines/classification.py b/src/netspresso_trainer/pipelines/classification.py
index 709da58b..53c48fc5 100644
--- a/src/netspresso_trainer/pipelines/classification.py
+++ b/src/netspresso_trainer/pipelines/classification.py
@@ -28,7 +28,10 @@ def train_step(self, batch):
 
         out = self.model(images)
         self.loss_factory.calc(out, target, phase='train')
-        self.metric_factory.calc(out['pred'], target, phase='train')
+        if target.dim() > 1: # Soft label to label number
+            target = torch.argmax(target, dim=-1)
+        pred = self.postprocessor(out)
+        self.metric_factory.calc(pred, target, phase='train')
 
         self.loss_factory.backward()
         self.optimizer.step()
@@ -44,7 +47,10 @@ def valid_step(self, batch):
 
         out = self.model(images)
         self.loss_factory.calc(out, target, phase='valid')
-        self.metric_factory.calc(out['pred'], target, phase='valid')
+        if target.dim() > 1: # Soft label to label number
+            target = torch.argmax(target, dim=-1)
+        pred = self.postprocessor(out)
+        self.metric_factory.calc(pred, target, phase='valid')
 
         if self.conf.distributed:
             torch.distributed.barrier()
@@ -55,7 +61,7 @@ def test_step(self, batch):
         images = images.to(self.devices)
 
         out = self.model(images.unsqueeze(0))
-        _, pred = out['pred'].topk(1, 1, True, True)
+        pred = self.postprocessor(out, k=1)
 
         if self.conf.distributed:
             torch.distributed.barrier()
diff --git a/src/netspresso_trainer/pipelines/detection.py b/src/netspresso_trainer/pipelines/detection.py
index 3d7e1ab5..b3897fc8 100644
--- a/src/netspresso_trainer/pipelines/detection.py
+++ b/src/netspresso_trainer/pipelines/detection.py
@@ -33,6 +33,15 @@ def __init__(self, conf, task, model_name, model, devices, train_dataloader, eva
             model = model.to(device=devices)
             self.model = model
 
+        if conf.distributed:
+            self.backbone_to_train = model.module.backbone
+            self.neck_to_train = model.module.neck
+            self.head_to_train = model.module.head
+        else:
+            self.backbone_to_train = model.backbone
+            self.neck = model.neck
+            self.head_to_train = model.head
+
     def train_step(self, batch):
         self.model.train()
         images, labels, bboxes = batch['pixel_values'], batch['label'], batch['bbox']
@@ -43,12 +52,12 @@ def train_step(self, batch):
         self.optimizer.zero_grad()
 
         # forward to rpn
-        backbone = self.model.backbone
-        head = self.model.head
+        backbone = self.backbone_to_train
+        neck = self.neck_to_train
+        head = self.head_to_train
 
         features = backbone(images)['intermediate_features']
-        if head.neck:
-            features = head.neck(features)
+        features = neck(features)['intermediate_features']
 
         features = {str(k): v for k, v in enumerate(features)}
         rpn_features = head.rpn(features, head.image_size)
@@ -86,7 +95,7 @@ def valid_step(self, batch):
         out = self.model(images)
 
         # Compute loss
-        head = self.model.head
+        head = self.head_to_train
         matched_idxs, roi_head_labels = head.roi_heads.assign_targets_to_proposals(out['boxes'], bboxes, labels)
         matched_gt_boxes = [bbox[idx] for idx, bbox in zip(matched_idxs, bboxes)]
         regression_targets = head.roi_heads.box_coder.encode(matched_gt_boxes, out['boxes'])
@@ -139,7 +148,7 @@ def get_metric_with_all_outputs(self, outputs, phase: Literal['train', 'valid'])
                 pred_on_image['post_labels'] = class_idx
                 pred.append(pred_on_image)
         self.metric_factory.calc(pred, target=targets, phase=phase)
-        
+
     def save_checkpoint(self, epoch: int):
 
         # Check whether the valid loss is minimum at this epoch
@@ -211,9 +220,9 @@ def train_step(self, batch):
         images = images.to(self.devices)
         targets = [{"boxes": box.to(self.devices), "labels": label.to(self.devices),}
                    for box, label in zip(bboxes, labels)]
-        
-        targets = {'gt': targets, 
-                   'img_size': images.size(-1), 
+
+        targets = {'gt': targets,
+                   'img_size': images.size(-1),
                    'num_classes': self.num_classes,}
 
         self.optimizer.zero_grad()
@@ -224,9 +233,7 @@ def train_step(self, batch):
         self.loss_factory.backward()
         self.optimizer.step()
 
-        # TODO: This step will be moved to postprocessor module
-        pred = self.decode_outputs(out, dtype=out[0].type(), stage_strides=[images.shape[-1] // o.shape[-1] for o in out])
-        pred = self.postprocess(pred, self.num_classes)
+        pred = self.postprocessor(out, original_shape=images[0].shape, num_classes=self.num_classes)
 
         if self.conf.distributed:
             torch.distributed.barrier()
@@ -235,7 +242,7 @@ def train_step(self, batch):
             'target': [(bbox.detach().cpu().numpy(), label.detach().cpu().numpy())
                        for bbox, label in zip(bboxes, labels)],
             'pred': [(torch.cat([p[:, :4], p[:, 5:6]], dim=-1).detach().cpu().numpy(),
-                      p[:, 6].to(torch.int).detach().cpu().numpy()) 
+                      p[:, 6].to(torch.int).detach().cpu().numpy())
                       if p is not None else (np.array([[]]), np.array([]))
                       for p in pred]
         }
@@ -247,9 +254,9 @@ def valid_step(self, batch):
         images = images.to(self.devices)
         targets = [{"boxes": box.to(self.devices), "labels": label.to(self.devices)}
                    for box, label in zip(bboxes, labels)]
-        
-        targets = {'gt': targets, 
-                   'img_size': images.size(-1), 
+
+        targets = {'gt': targets,
+                   'img_size': images.size(-1),
                    'num_classes': self.num_classes,}
 
         self.optimizer.zero_grad()
@@ -257,9 +264,7 @@ def valid_step(self, batch):
         out = self.model(images)
         self.loss_factory.calc(out, targets, phase='valid')
 
-        # TODO: This step will be moved to postprocessor module
-        pred = self.decode_outputs(out, dtype=out[0].type(), stage_strides=[images.shape[-1] // o.shape[-1] for o in out])
-        pred = self.postprocess(pred, self.num_classes)
+        pred = self.postprocessor(out, original_shape=images[0].shape, num_classes=self.num_classes)
 
         if self.conf.distributed:
             torch.distributed.barrier()
@@ -269,7 +274,7 @@ def valid_step(self, batch):
             'target': [(bbox.detach().cpu().numpy(), label.detach().cpu().numpy())
                        for bbox, label in zip(bboxes, labels)],
             'pred': [(torch.cat([p[:, :4], p[:, 5:6]], dim=-1).detach().cpu().numpy(),
-                      p[:, 6].to(torch.int).detach().cpu().numpy()) 
+                      p[:, 6].to(torch.int).detach().cpu().numpy())
                       if p is not None else (np.array([[]]), np.array([]))
                       for p in pred]
         }
@@ -282,9 +287,7 @@ def test_step(self, batch):
 
         out = self.model(images.unsqueeze(0))
 
-        # TODO: This step will be moved to postprocessor module
-        pred = self.decode_outputs(out, dtype=out[0].type(), stage_strides=[images.shape[-1] // o.shape[-1] for o in out])
-        pred = self.postprocess(pred, self.num_classes)
+        pred = self.postprocessor(out, original_shape=images[0].shape, num_classes=self.num_classes)
 
         results = [(p[:, :4].detach().cpu().numpy(), p[:, 6].to(torch.int).detach().cpu().numpy())
                    if p is not None else (np.array([[]]), np.array([]))
@@ -309,73 +312,3 @@ def get_metric_with_all_outputs(self, outputs, phase: Literal['train', 'valid'])
                 pred_on_image['post_labels'] = class_idx
                 pred.append(pred_on_image)
         self.metric_factory.calc(pred, target=targets, phase=phase)
-
-    # TODO: Temporary defined in pipeline, it will be moved to postprocessor module.
-    def decode_outputs(self, outputs, dtype, stage_strides):
-        hw = [x.shape[-2:] for x in outputs]
-        # [batch, n_anchors_all, num_classes + 5]
-        outputs = torch.cat([x.flatten(start_dim=2) for x in outputs], dim=2).permute(0, 2, 1)
-        outputs[..., 4:] = outputs[..., 4:].sigmoid()
-
-        grids = []
-        strides = []
-        for (hsize, wsize), stride in zip(hw, stage_strides):
-            yv, xv = torch.meshgrid(torch.arange(hsize), torch.arange(wsize), indexing='ij')
-            grid = torch.stack((xv, yv), 2).view(1, -1, 2)
-            grids.append(grid)
-            shape = grid.shape[:2]
-            strides.append(torch.full((*shape, 1), stride))
-
-        grids = torch.cat(grids, dim=1).type(dtype)
-        strides = torch.cat(strides, dim=1).type(dtype)
-
-        outputs = torch.cat([
-            (outputs[..., 0:2] + grids) * strides,
-            torch.exp(outputs[..., 2:4]) * strides,
-            outputs[..., 4:]
-        ], dim=-1)
-        return outputs
-
-    # TODO: Temporary defined in pipeline, it will be moved to postprocessor module.
-    def postprocess(self, prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agnostic=False):
-        box_corner = prediction.new(prediction.shape)
-        box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
-        box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
-        box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
-        box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
-        prediction[:, :, :4] = box_corner[:, :, :4]
-
-        output = [torch.zeros(0, 7).to(prediction.device) for i in range(len(prediction))]
-        for i, image_pred in enumerate(prediction):
-
-            # If none are remaining => process next image
-            if not image_pred.size(0):
-                continue
-            # Get score and class with highest confidence
-            class_conf, class_pred = torch.max(image_pred[:, 5: 5 + num_classes], 1, keepdim=True)
-
-            conf_mask = (image_pred[:, 4] * class_conf.squeeze() >= conf_thre).squeeze()
-            # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred)
-            detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1)
-            detections = detections[conf_mask]
-            if not detections.size(0):
-                continue
-
-            if class_agnostic:
-                nms_out_index = torchvision.ops.nms(
-                    detections[:, :4],
-                    detections[:, 4] * detections[:, 5],
-                    nms_thre,
-                )
-            else:
-                nms_out_index = torchvision.ops.batched_nms(
-                    detections[:, :4],
-                    detections[:, 4] * detections[:, 5],
-                    detections[:, 6],
-                    nms_thre,
-                )
-
-            detections = detections[nms_out_index]
-            output[i] = torch.cat((output[i], detections))
-
-        return output
diff --git a/src/netspresso_trainer/pipelines/registry.py b/src/netspresso_trainer/pipelines/registry.py
index 61fb0dd2..b0110bee 100644
--- a/src/netspresso_trainer/pipelines/registry.py
+++ b/src/netspresso_trainer/pipelines/registry.py
@@ -13,4 +13,4 @@
     'segmentation': SegmentationPipeline,
     'detection-two-stage': TwoStageDetectionPipeline,
     'detection-one-stage': OneStageDetectionPipeline,
-}
\ No newline at end of file
+}
diff --git a/src/netspresso_trainer/pipelines/segmentation.py b/src/netspresso_trainer/pipelines/segmentation.py
index 1da5af67..782d4952 100644
--- a/src/netspresso_trainer/pipelines/segmentation.py
+++ b/src/netspresso_trainer/pipelines/segmentation.py
@@ -41,7 +41,8 @@ def train_step(self, batch):
         self.optimizer.step()
 
         out = {k: v.detach() for k, v in out.items()}
-        self.metric_factory.calc(out['pred'], target, phase='train')
+        pred = self.postprocessor(out)
+        self.metric_factory.calc(pred, target, phase='train')
 
         if self.conf.distributed:
             torch.distributed.barrier()
@@ -62,7 +63,8 @@ def valid_step(self, batch):
         else:
             self.loss_factory.calc(out, target, phase='valid')
 
-        self.metric_factory.calc(out['pred'], target, phase='valid')
+        pred = self.postprocessor(out)
+        self.metric_factory.calc(pred, target, phase='valid')
 
         if self.conf.distributed:
             torch.distributed.barrier()
@@ -87,9 +89,9 @@ def test_step(self, batch):
 
         out = self.model(images.unsqueeze(0))
 
-        output_seg = torch.max(out['pred'], dim=1)[1]  # argmax
+        pred = self.postprocessor(out)
 
-        return output_seg
+        return pred
 
     def get_metric_with_all_outputs(self, outputs, phase: Literal['train', 'valid']):
         pass
diff --git a/src/netspresso_trainer/postprocessors/__init__.py b/src/netspresso_trainer/postprocessors/__init__.py
new file mode 100644
index 00000000..5fddd44e
--- /dev/null
+++ b/src/netspresso_trainer/postprocessors/__init__.py
@@ -0,0 +1 @@
+from .builder import build_postprocessor
diff --git a/src/netspresso_trainer/postprocessors/builder.py b/src/netspresso_trainer/postprocessors/builder.py
new file mode 100644
index 00000000..abb93ffe
--- /dev/null
+++ b/src/netspresso_trainer/postprocessors/builder.py
@@ -0,0 +1,8 @@
+from .register import POSTPROCESSOR_DICT
+
+
+def build_postprocessor(task: str, conf_model):
+    head_name = conf_model.architecture.full.name if conf_model.single_task_model else conf_model.architecture.head.name
+    if head_name not in POSTPROCESSOR_DICT:
+        return None
+    return POSTPROCESSOR_DICT[head_name](conf_model)
diff --git a/src/netspresso_trainer/postprocessors/classification.py b/src/netspresso_trainer/postprocessors/classification.py
new file mode 100644
index 00000000..37969325
--- /dev/null
+++ b/src/netspresso_trainer/postprocessors/classification.py
@@ -0,0 +1,18 @@
+from typing import Optional
+
+from ..models.utils import ModelOutput
+
+TOPK_MAX = 20
+
+
+class ClassificationPostprocessor():
+    def __init__(self, conf_model):
+        pass
+
+    def __call__(self, outputs: ModelOutput, k: Optional[int]=None):
+        pred = outputs['pred']
+        maxk = min(TOPK_MAX, pred.size()[1])
+        if k:
+            maxk = min(k, maxk)
+        _, pred = pred.topk(maxk, 1, True, True)
+        return pred
diff --git a/src/netspresso_trainer/postprocessors/detection.py b/src/netspresso_trainer/postprocessors/detection.py
new file mode 100644
index 00000000..8e710ac7
--- /dev/null
+++ b/src/netspresso_trainer/postprocessors/detection.py
@@ -0,0 +1,96 @@
+import torch
+import torchvision
+
+from ..models.utils import ModelOutput
+
+
+def yolox_decode_outputs(pred, original_shape):
+    dtype = pred[0].type()
+    stage_strides= [original_shape[-1] // o.shape[-1] for o in pred]
+
+    hw = [x.shape[-2:] for x in pred]
+    # [batch, n_anchors_all, num_classes + 5]
+    pred = torch.cat([x.flatten(start_dim=2) for x in pred], dim=2).permute(0, 2, 1)
+    pred[..., 4:] = pred[..., 4:].sigmoid()
+
+    grids = []
+    strides = []
+    for (hsize, wsize), stride in zip(hw, stage_strides):
+        yv, xv = torch.meshgrid(torch.arange(hsize), torch.arange(wsize), indexing='ij')
+        grid = torch.stack((xv, yv), 2).view(1, -1, 2)
+        grids.append(grid)
+        shape = grid.shape[:2]
+        strides.append(torch.full((*shape, 1), stride))
+
+    grids = torch.cat(grids, dim=1).type(dtype)
+    strides = torch.cat(strides, dim=1).type(dtype)
+
+    pred = torch.cat([
+        (pred[..., 0:2] + grids) * strides,
+        torch.clamp(torch.exp(pred[..., 2:4]) * strides, min=torch.iinfo(torch.int32).min, max=torch.iinfo(torch.int32).max),
+        pred[..., 4:]
+    ], dim=-1)
+
+    box_corner = pred.new(pred.shape)
+    box_corner[:, :, 0] = pred[:, :, 0] - pred[:, :, 2] / 2
+    box_corner[:, :, 1] = pred[:, :, 1] - pred[:, :, 3] / 2
+    box_corner[:, :, 2] = pred[:, :, 0] + pred[:, :, 2] / 2
+    box_corner[:, :, 3] = pred[:, :, 1] + pred[:, :, 3] / 2
+    pred[:, :, :4] = box_corner[:, :, :4]
+    return pred
+
+
+def nms(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agnostic=False):
+    output = [torch.zeros(0, 7).to(prediction.device) for i in range(len(prediction))]
+    for i, image_pred in enumerate(prediction):
+
+        # If none are remaining => process next image
+        if not image_pred.size(0):
+            continue
+        # Get score and class with highest confidence
+        class_conf, class_pred = torch.max(image_pred[:, 5: 5 + num_classes], 1, keepdim=True)
+
+        conf_mask = (image_pred[:, 4] * class_conf.squeeze() >= conf_thre).squeeze()
+        # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred)
+        detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1)
+        detections = detections[conf_mask]
+        if not detections.size(0):
+            continue
+
+        if class_agnostic:
+            nms_out_index = torchvision.ops.nms(
+                detections[:, :4],
+                detections[:, 4] * detections[:, 5],
+                nms_thre,
+            )
+        else:
+            nms_out_index = torchvision.ops.batched_nms(
+                detections[:, :4],
+                detections[:, 4] * detections[:, 5],
+                detections[:, 6],
+                nms_thre,
+            )
+
+        detections = detections[nms_out_index]
+        output[i] = torch.cat((output[i], detections))
+
+    return output
+
+
+class DetectionPostprocessor:
+    def __init__(self, conf_model):
+        HEAD_POSTPROCESS_MAPPING = {
+            'yolox_head': [yolox_decode_outputs, nms]
+        }
+
+        head_name = conf_model.architecture.head.name
+        self.decode_outputs, self.postprocess = HEAD_POSTPROCESS_MAPPING[head_name]
+
+    def __call__(self, outputs: ModelOutput, original_shape, num_classes, conf_thresh=0.7, nms_thre=0.45, class_agnostic=False):
+        pred = outputs['pred']
+
+        if self.decode_outputs:
+            pred = self.decode_outputs(pred, original_shape)
+        if self.postprocess:
+            pred = self.postprocess(pred, num_classes=num_classes, conf_thre=conf_thresh, nms_thre=nms_thre, class_agnostic=class_agnostic)
+        return pred
diff --git a/src/netspresso_trainer/postprocessors/register.py b/src/netspresso_trainer/postprocessors/register.py
new file mode 100644
index 00000000..fc137c79
--- /dev/null
+++ b/src/netspresso_trainer/postprocessors/register.py
@@ -0,0 +1,12 @@
+from typing import Dict, Type
+
+from .classification import ClassificationPostprocessor
+from .detection import DetectionPostprocessor
+from .segmentation import SegmentationPostprocessor
+
+POSTPROCESSOR_DICT = {
+    'fc': ClassificationPostprocessor,
+    'all_mlp_decoder': SegmentationPostprocessor,
+    'yolox_head': DetectionPostprocessor,
+    'pidnet': SegmentationPostprocessor,
+}
diff --git a/src/netspresso_trainer/postprocessors/segmentation.py b/src/netspresso_trainer/postprocessors/segmentation.py
new file mode 100644
index 00000000..f40334c1
--- /dev/null
+++ b/src/netspresso_trainer/postprocessors/segmentation.py
@@ -0,0 +1,15 @@
+from typing import Any, Optional
+
+import torch
+
+from ..models.utils import ModelOutput
+
+
+class SegmentationPostprocessor:
+    def __init__(self, conf_model):
+        pass
+
+    def __call__(self, outputs: ModelOutput):
+        pred = outputs['pred']
+        pred = torch.max(pred, dim=1)[1]  # argmax
+        return pred
diff --git a/src/netspresso_trainer/schedulers/builder.py b/src/netspresso_trainer/schedulers/builder.py
index ce566554..df4b49f3 100644
--- a/src/netspresso_trainer/schedulers/builder.py
+++ b/src/netspresso_trainer/schedulers/builder.py
@@ -15,8 +15,8 @@ def build_scheduler(optimizer, conf_training):
         'total_iters': num_epochs,
         'iters_per_phase': conf_training.iters_per_phase,  # TODO: config for StepLR
     })
-    
+
     assert scheduler_name in SCHEDULER_DICT, f"{scheduler_name} not in scheduler dict!"
     lr_scheduler = SCHEDULER_DICT[scheduler_name](optimizer, **conf_sched)
-    
+
     return lr_scheduler, num_epochs
diff --git a/src/netspresso_trainer/schedulers/cosine_lr.py b/src/netspresso_trainer/schedulers/cosine_lr.py
index aac4d301..e24286a4 100644
--- a/src/netspresso_trainer/schedulers/cosine_lr.py
+++ b/src/netspresso_trainer/schedulers/cosine_lr.py
@@ -31,10 +31,10 @@ def get_lr(self):
         if not self._get_lr_called_within_step:
             warnings.warn("To get the last learning rate computed by the scheduler, "
                           "please use `get_last_lr()`.", UserWarning, stacklevel=2)
-        
+
         if self.last_epoch > self.T_max:
             return [group['lr'] for group in self.optimizer.param_groups]
-        
+
         if self.last_epoch >= 0 and self.last_epoch < self.warmup_iters:
             return [self.warmup_bias_lr + (float(self.last_epoch + 1) / float(max(1, self.warmup_iters))) * (base_lr - self.warmup_bias_lr)
                     for base_lr in self.base_lrs]
@@ -63,4 +63,4 @@ def _get_closed_form_lr(self):
                 )
             )
             for base_lr in self.base_lrs
-        ]
\ No newline at end of file
+        ]
diff --git a/src/netspresso_trainer/schedulers/cosine_warm_restart.py b/src/netspresso_trainer/schedulers/cosine_warm_restart.py
index 4e114142..ba0f501f 100644
--- a/src/netspresso_trainer/schedulers/cosine_warm_restart.py
+++ b/src/netspresso_trainer/schedulers/cosine_warm_restart.py
@@ -91,7 +91,7 @@ def get_reassigned_t_i(current_t_i, next_t_i, remain_epochs):
             return remain_epochs, remain_epochs
 
         return current_t_i, remain_epochs
-    
+
     def _step_without_given_epoch(self) -> int:
         if self.last_epoch < 0:
             epoch = 0
@@ -105,7 +105,7 @@ def _step_without_given_epoch(self) -> int:
             self.T_i = self.T_i * self.T_mult
             self.T_i, self.remain_iters = self.get_reassigned_t_i(self.T_i, self.T_i * self.T_mult, self.remain_iters)
         return epoch
-        
+
     def step(self, epoch=None):
         """Step could be called after every batch update
 
@@ -139,7 +139,7 @@ def step(self, epoch=None):
         else:
             if epoch < 0:
                 raise ValueError("Expected non-negative epoch, but got {}".format(epoch))
-            
+
             if epoch >= self.T_0:
                 if self.T_mult == 1:
                     self.T_cur = epoch % self.T_0
diff --git a/src/netspresso_trainer/schedulers/poly_lr.py b/src/netspresso_trainer/schedulers/poly_lr.py
index 9c9b21c2..d7f62461 100644
--- a/src/netspresso_trainer/schedulers/poly_lr.py
+++ b/src/netspresso_trainer/schedulers/poly_lr.py
@@ -34,7 +34,7 @@ def get_lr(self):
 
         if self.last_epoch > self.total_iters:
             return [group["lr"] for group in self.optimizer.param_groups]
-        
+
         if self.last_epoch >= 0 and self.last_epoch < self.warmup_iters:
             return [self.warmup_bias_lr + (float(self.last_epoch + 1) / float(max(1, self.warmup_iters))) * (base_lr - self.warmup_bias_lr)
                     for base_lr in self.base_lrs]
@@ -45,7 +45,7 @@ def get_lr(self):
         return [self.min_lr + (group["lr"] - self.min_lr) * decay_factor for group in self.optimizer.param_groups]
 
     def _get_closed_form_lr(self):
-        decay_steps = self.total_iters - self.warmup_iters        
+        decay_steps = self.total_iters - self.warmup_iters
         return [
             (
                 min(
@@ -54,4 +54,4 @@ def _get_closed_form_lr(self):
                 )
             )
             for base_lr in self.base_lrs
-        ]
\ No newline at end of file
+        ]
diff --git a/src/netspresso_trainer/schedulers/registry.py b/src/netspresso_trainer/schedulers/registry.py
index d389ae0d..992a9d3c 100644
--- a/src/netspresso_trainer/schedulers/registry.py
+++ b/src/netspresso_trainer/schedulers/registry.py
@@ -13,4 +13,4 @@
     'cosine_no_sgdr': CosineAnnealingLRWithCustomWarmUp,
     'poly': PolynomialLRWithWarmUp,
     'step': StepLR
-}
\ No newline at end of file
+}
diff --git a/src/netspresso_trainer/schedulers/step_lr.py b/src/netspresso_trainer/schedulers/step_lr.py
index 26776e7c..ab97636f 100644
--- a/src/netspresso_trainer/schedulers/step_lr.py
+++ b/src/netspresso_trainer/schedulers/step_lr.py
@@ -52,4 +52,4 @@ def get_lr(self):
 
     def _get_closed_form_lr(self):
         return [base_lr * self.gamma ** (self.last_epoch // self.step_size)
-                for base_lr in self.base_lrs]
\ No newline at end of file
+                for base_lr in self.base_lrs]
diff --git a/src/netspresso_trainer/trainer_cli.py b/src/netspresso_trainer/trainer_cli.py
index 56cde7b8..e0fa0c64 100644
--- a/src/netspresso_trainer/trainer_cli.py
+++ b/src/netspresso_trainer/trainer_cli.py
@@ -4,6 +4,7 @@
 from pathlib import Path
 from typing import Union
 
+import torch
 from omegaconf import DictConfig, OmegaConf
 
 from netspresso_trainer.trainer_common import train_common
@@ -12,17 +13,17 @@
 
 
 def run_distributed_training_script(gpu_ids, data, augmentation, model, training, logging, environment, log_level):
-    
+
     command = [
         "--data", data,
-        "--augmentation", augmentation, 
+        "--augmentation", augmentation,
         "--model", model,
         "--training", training,
         "--logging", logging,
         "--environment", environment,
         "--log_level", log_level,
     ]
-    
+
     # Distributed training script
     command = [
         'python', '-m', 'torch.distributed.launch',
@@ -45,10 +46,10 @@ def parse_gpu_ids(gpu_arg: str):
     """Parse comma-separated GPU IDs and return as a list of integers."""
     try:
         gpu_ids = [int(id) for id in gpu_arg.split(',')]
-        
+
         if len(gpu_ids) == 1:  # Single GPU
             return gpu_ids[0]
-        
+
         gpu_ids = sorted(gpu_ids)
         return gpu_ids
     except ValueError as e:
@@ -60,7 +61,7 @@ def parse_args_netspresso(with_gpus=False):
     parser = argparse.ArgumentParser(description="Parser for NetsPresso configuration")
 
     # -------- User arguments ----------------------------------------
-    
+
     if with_gpus:
         parser.add_argument(
             '--gpus', type=parse_gpu_ids, default=0,
@@ -110,7 +111,7 @@ def parse_args_netspresso(with_gpus=False):
 def set_arguments(data: Union[Path, str], augmentation: Union[Path, str],
                   model: Union[Path, str], training: Union[Path, str],
                   logging: Union[Path, str], environment: Union[Path, str]) -> DictConfig:
-    
+
     conf_data = OmegaConf.load(data)
     conf_augmentation = OmegaConf.load(augmentation)
     conf_model = OmegaConf.load(model)
@@ -125,18 +126,19 @@ def set_arguments(data: Union[Path, str], augmentation: Union[Path, str],
     conf.merge_with(conf_training)
     conf.merge_with(conf_logging)
     conf.merge_with(conf_environment)
-    
+
     return conf
 
 
 def train_with_yaml_impl(gpus: Union[list, int], data: Union[Path, str], augmentation: Union[Path, str],
                          model: Union[Path, str], training: Union[Path, str],
                          logging: Union[Path, str], environment: Union[Path, str], log_level: str = LOG_LEVEL):
-    
+
     assert isinstance(gpus, (list, int))
     gpu_ids_str = ','.join(map(str, gpus)) if isinstance(gpus, list) else str(gpus)
     os.environ['CUDA_VISIBLE_DEVICES'] = gpu_ids_str
-    
+    torch.cuda.empty_cache()  # Reinitialize CUDA to apply the change
+
     if isinstance(gpus, int):
         conf = set_arguments(data, augmentation, model, training, logging, environment)
         train_common(conf, log_level=log_level)
@@ -146,7 +148,7 @@ def train_with_yaml_impl(gpus: Union[list, int], data: Union[Path, str], augment
 
 def train_cli() -> None:
     args_parsed = parse_args_netspresso(with_gpus=True)
-    
+
     train_with_yaml_impl(
         gpus=args_parsed.gpus,
         data=args_parsed.data,
@@ -161,7 +163,7 @@ def train_cli() -> None:
 
 def train_cli_without_additional_gpu_check() -> None:
     args_parsed = parse_args_netspresso(with_gpus=False)
-    
+
     conf = set_arguments(
         data=args_parsed.data,
         augmentation=args_parsed.augmentation,
@@ -175,6 +177,6 @@ def train_cli_without_additional_gpu_check() -> None:
 
 
 if __name__ == "__main__":
-    
+
     # Execute by `run_distributed_training_script`
-    train_cli_without_additional_gpu_check()
\ No newline at end of file
+    train_cli_without_additional_gpu_check()
diff --git a/src/netspresso_trainer/trainer_common.py b/src/netspresso_trainer/trainer_common.py
index daa45300..6d76b36a 100644
--- a/src/netspresso_trainer/trainer_common.py
+++ b/src/netspresso_trainer/trainer_common.py
@@ -29,10 +29,9 @@ def train_common(conf: DictConfig, log_level: Literal['DEBUG', 'INFO', 'WARNING'
 
     # TODO: Get model name from checkpoint
     single_task_model = is_single_task_model(conf.model)
-    conf_model_sub = conf.model.architecture.full if single_task_model else conf.model.architecture.backbone
     conf.model.single_task_model = single_task_model
 
-    model_name = str(conf_model_sub.name).lower()
+    model_name = str(conf.model.name).lower()
 
     if is_graphmodule_training:
         model_name += "_graphmodule"
diff --git a/src/netspresso_trainer/trainer_inline.py b/src/netspresso_trainer/trainer_inline.py
index 556194df..d291bce4 100644
--- a/src/netspresso_trainer/trainer_inline.py
+++ b/src/netspresso_trainer/trainer_inline.py
@@ -2,6 +2,7 @@
 from pathlib import Path
 from typing import List, Literal, Union
 
+import torch
 from omegaconf import DictConfig, OmegaConf
 
 from netspresso_trainer.cfg import TrainerConfig
@@ -12,7 +13,7 @@
 
 def set_struct_recursive(conf: DictConfig, value: bool) -> None:
     OmegaConf.set_struct(conf, value)
-    
+
     for _, conf_value in conf.items():
         if isinstance(conf_value, DictConfig):
             set_struct_recursive(conf_value, value)
@@ -23,18 +24,26 @@ def export_config_as_yaml(config: TrainerConfig) -> str:
     return OmegaConf.to_yaml(conf)
 
 
-def train_with_config(config: TrainerConfig, log_level: Literal['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] = 'INFO') -> None:
+def train_with_config(gpus: str, config: TrainerConfig, log_level: Literal['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] = 'INFO') -> None:
+
+    gpus: Union[List, int] = parse_gpu_ids(gpus)
+    assert isinstance(gpus, int), f"Currently, only single-GPU training is supported in this API. Your gpu(s): {gpus}"
+
+    os.environ['CUDA_VISIBLE_DEVICES'] = str(gpus)
+    torch.cuda.empty_cache()  # Reinitialize CUDA to apply the change
+
     conf: DictConfig = OmegaConf.create(config)
     set_struct_recursive(conf, False)
+
     train_common(conf, log_level=log_level)
 
 
 def train_with_yaml(gpus: str, data: Union[Path, str], augmentation: Union[Path, str],
                     model: Union[Path, str], training: Union[Path, str],
                     logging: Union[Path, str], environment: Union[Path, str], log_level: str = LOG_LEVEL):
-    
+
     gpus: Union[List, int] = parse_gpu_ids(gpus)
-    
+
     train_with_yaml_impl(
         gpus=gpus,
         data=data,
@@ -44,4 +53,4 @@ def train_with_yaml(gpus: str, data: Union[Path, str], augmentation: Union[Path,
         logging=logging,
         environment=environment,
         log_level=log_level
-    )
\ No newline at end of file
+    )
diff --git a/src/netspresso_trainer/utils/logger.py b/src/netspresso_trainer/utils/logger.py
index bf296de1..6b320a09 100644
--- a/src/netspresso_trainer/utils/logger.py
+++ b/src/netspresso_trainer/utils/logger.py
@@ -24,7 +24,7 @@ def _custom_logger(name: str, level: str, distributed: bool):
     else:
         fmt = '%(asctime)s | %(levelname)s\t\t| %(funcName)s:<%(filename)s>:%(lineno)s >>> %(message)s'
     logger = logging.getLogger(name)
-    
+
     if not logger.hasHandlers():
         handler = logging.StreamHandler()
 
@@ -46,7 +46,7 @@ def set_logger(logger_name="netspresso_trainer", level: str = 'INFO', distribute
         print("Skipping timezone setting.")
     _level: Literal['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] = level.upper()
     _custom_logger(logger_name, _level, distributed)
-    
+
     logger = logging.getLogger(logger_name)
     if _level == 'DEBUG':
         logger.setLevel(logging.DEBUG)
diff --git a/src/netspresso_trainer/utils/stats.py b/src/netspresso_trainer/utils/stats.py
index aacc385d..326721d2 100644
--- a/src/netspresso_trainer/utils/stats.py
+++ b/src/netspresso_trainer/utils/stats.py
@@ -12,7 +12,7 @@ def get_params_and_macs(model: nn.Module, sample_input: torch.Tensor):
     sample_input = sample_input.to(get_device(model))
     # From v0.0.9
     macs, params = _params_and_macs_fvcore(model, sample_input)
-    
+
     # # Before v0.0.9
     # macs, params = _params_and_macs_thop(model, sample_input)
 
@@ -25,4 +25,4 @@ def _params_and_macs_fvcore(model: nn.Module, sample_input: torch.Tensor):
 
 def _params_and_macs_thop(model: nn.Module, sample_input: torch.Tensor):
     macs, params = thop.profile(model, inputs=(sample_input,), verbose=False)
-    return macs, params
\ No newline at end of file
+    return macs, params
diff --git a/tools/config_test.py b/tools/config_test.py
index ea1c451d..81f7bc5d 100644
--- a/tools/config_test.py
+++ b/tools/config_test.py
@@ -8,13 +8,16 @@
 if __name__ == "__main__":
     
     from netspresso_trainer.cfg import (
+        AugmentationConfig,
         ClassificationAugmentationConfig,
         ClassificationResNetModelConfig,
         ColorJitter,
+        RandomResizedCrop,
+        RandomHorizontalFlip,
         ExampleBeansDataset,
     )
     
-    augmentation_config = ClassificationAugmentationConfig(color_jitter=ColorJitter(colorjitter_p=0.9))
+    augmentation_config = ClassificationAugmentationConfig()
     example_dataset = ExampleBeansDataset
     example_model = ClassificationResNetModelConfig()
     cfg = TrainerConfig(
@@ -32,12 +35,12 @@
     
     # OK: update value of subclass in the main dataclass
     cfg_new: TrainerConfig = deepcopy(cfg)
-    cfg_new.augmentation.color_jitter.saturation = 0.0
+    cfg_new.augmentation.transforms[-1].saturation = 0.0
     # print(OmegaConf.to_yaml(OmegaConf.structured(cfg_new)))
     
     # OK: update value from OmegaConf Config
     config_new: TrainerConfig = deepcopy(config)
-    config_new.augmentation.color_jitter.hue = 0.5
+    cfg_new.augmentation.transforms[-1].hue = 0.5
     # print(OmegaConf.to_yaml(config_new))
 
 
diff --git a/train.py b/train.py
index e2b937d0..7c286a26 100644
--- a/train.py
+++ b/train.py
@@ -1,7 +1,7 @@
 from netspresso_trainer import train_cli
 
 def train_with_inline_cfg():
-    from netspresso_trainer import TrainerConfig, train, export_config_as_yaml  
+    from netspresso_trainer import TrainerConfig, train_with_config, export_config_as_yaml  
     from netspresso_trainer.cfg import ClassificationResNetModelConfig, ExampleBeansDataset
     
     """
@@ -38,7 +38,9 @@ def train_with_inline_cfg():
     
     print(export_config_as_yaml(cfg))
     
-    train(cfg, log_level='INFO')
+    train_with_config(gpus="1",
+                      config=cfg,
+                      log_level='INFO')
 
 def train_with_inline_yaml():
     from netspresso_trainer import train_with_yaml
@@ -52,10 +54,10 @@ def train_with_inline_yaml():
 
 
 if __name__ == '__main__':
-    train_cli()
+    # train_cli()
 
     # With inline yaml
     # train_with_inline_yaml()
     
     # With inline pythonic config
-    # train_with_inline_cfg()
\ No newline at end of file
+    train_with_inline_cfg()
\ No newline at end of file