PaddlePaddle · AshburnLee · Jun 9, 2021
diff --git a/PaddleCV/image_classification/scripts/train/Resnet50_bf16.sh b/PaddleCV/image_classification/scripts/train/Resnet50_bf16.sh
@@ -0,0 +1,52 @@
+#!/bin/bash -ex
+
+export FLAGS_conv_workspace_size_limit=4000 #MB
+export FLAGS_cudnn_exhaustive_search=1
+export FLAGS_cudnn_batchnorm_spatial_persistent=1
+
+DATA_DIR="Your image dataset path, e.g. /work/datasets/ILSVRC2012/"
+DATA_FORMAT="NHWC"
+
+USE_AMP_BF16=true
+USE_PURE_BF16=false
+
+USE_DALI=false
+USE_ADDTO=true
+
+if ${USE_ADDTO} ;then
+    export FLAGS_max_inplace_grad_add=8
+fi
+
+if ${USE_DALI}; then
+    export FLAGS_fraction_of_gpu_memory_to_use=0.8
+fi
+
+python3.7 train.py \
+       --model=ResNet50 \
+       --num_epochs=20 \
+       --data_dir=${DATA_DIR} \
+       --batch_size=256 \
+       --total_images=1281167 \
+       --image_shape 4 224 224 \
+       --class_dim=1000 \
+       --print_step=10 \
+       --model_save_dir=output/ \
+       --lr_strategy=piecewise_decay \
+       --scale_loss=128.0 \
+       --use_dynamic_loss_scaling=true \
+       --data_format=${DATA_FORMAT} \
+       --fuse_elewise_add_act_ops=true \
+       --fuse_bn_act_ops=true \
+       --fuse_bn_add_act_ops=true \
+       --enable_addto=${USE_ADDTO} \
+       --validate=true \
+       --is_profiler=false \
+       --profiler_path=profile/ \
+       --reader_thread=10 \
+       --reader_buf_size=4000 \
+       --use_dali=${USE_DALI} \
+       --lr=0.1  \
+       --use_amp_bf16=${USE_AMP_BF16} \
+       --use_pure_bf16=${USE_PURE_BF16}
+
+
diff --git a/PaddleCV/image_classification/train.py b/PaddleCV/image_classification/train.py
@@ -98,6 +98,14 @@ def build_program(is_train, main_prog, startup_prog, args):
                         use_dynamic_loss_scaling=args.use_dynamic_loss_scaling,
                         use_pure_fp16=args.use_pure_fp16,
                         use_fp16_guard=True)
+                elif args.use_amp_bf16:
+                    optimizer = paddle.static.amp.bf16.decorate_bf16(
+                        optimizer,
+                        amp_lists=paddle.static.amp.bf16.
+                        AutoMixedPrecisionListsBF16(
+                            custom_bf16_list={"conv2d"}),
+                        use_bf16_guard=None,
+                        use_pure_bf16=args.use_pure_bf16)
 
                 optimizer.minimize(avg_cost)
                 if args.use_ema:
@@ -220,10 +228,11 @@ def train(args):
     #init model by checkpoint or pretrianed model.
     init_model(exe, args, train_prog)
 
-    if args.use_amp:
-        optimizer.amp_init(place,
-                scope=paddle.static.global_scope(),
-                test_program=test_prog if args.validate else None)
+    if args.use_amp or args.use_amp_bf16:
+        optimizer.amp_init(
+            place,
+            scope=paddle.static.global_scope(),
+            test_program=test_prog if args.validate else None)
 
     num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
     if args.use_dali:

diff --git a/PaddleCV/image_classification/utils/utility.py b/PaddleCV/image_classification/utils/utility.py
@@ -148,7 +148,9 @@ def parse_args():
     add_arg('fuse_bn_act_ops',          bool,   False,                  "Whether to use batch_norm and act fusion.")
     add_arg('fuse_bn_add_act_ops',      bool,   True,                   "Whether to use batch_norm, elementwise_add and act fusion. This is only used for AMP training.")
     add_arg('enable_addto',             bool,   False,                  "Whether to enable the addto strategy for gradient accumulation or not. This is only used for AMP training.")
-
+    add_arg('use_amp_bf16',             bool,   False,                  "Whether to enable mixed precision training with bf16." )
+    add_arg('use_pure_bf16',            bool,   False,                  "Whether to use the pure bf16 training." )
+
     add_arg('use_label_smoothing',      bool,   False,                  "Whether to use label_smoothing")
     add_arg('label_smoothing_epsilon',  float,  0.1,                    "The value of label_smoothing_epsilon parameter")
     #NOTE: (2019/08/08) temporary disable use_distill
@@ -538,7 +540,7 @@ def best_strategy_compiled(args,
                 "PaddlePaddle version 1.7.0 or higher is "
                 "required when you want to fuse batch_norm and activation_op.")
         build_strategy.fuse_elewise_add_act_ops = args.fuse_elewise_add_act_ops
-        
+
         try:
             build_strategy.fuse_bn_add_act_ops = args.fuse_bn_add_act_ops
         except Exception as e:
@@ -548,9 +550,8 @@ def best_strategy_compiled(args,
         try:
             build_strategy.enable_addto = args.enable_addto
         except Exception as e:
-            logger.info(
-                "PaddlePaddle 2.0-rc or higher is "
-                "required when you want to enable addto strategy.")
+            logger.info("PaddlePaddle 2.0-rc or higher is "
+                        "required when you want to enable addto strategy.")
 
         exec_strategy = fluid.ExecutionStrategy()