PaddlePaddle · mmglove · Dec 22, 2020 · Jan 10, 2021 · Jan 18, 2021 · Jan 18, 2021
diff --git a/ce/python3.7_linux_gpu_train_slim/PaddleDetection_repo_slim/.run_ce.sh b/ce/python3.7_linux_gpu_train_slim/PaddleDetection_repo_slim/.run_ce.sh
@@ -140,7 +140,7 @@ for i in $(seq 0 2); do
     -o weights=./output/${quan_models[$i]}/best_model >${log_path}/${model}_${quan_models[$i]} 2>&1
 print_info $? ${model}_${quan_models[$i]}
 mkdir dete_quan_${quan_models[$i]}_combined
-cp ./quan_export/dete_quan_${quan_models[$i]}/float/* ./dete_quan_${quan_models[$i]}_combined/
+cp ./quan_export/dete_quan_${quan_models[$i]}/float/dete_quan_${quan_models[$i]}/* ./dete_quan_${quan_models[$i]}_combined/
 # for lite
 copy_for_lite dete_quan_${quan_models[$i]}_combined ${models_from_train}
 done
@@ -200,7 +200,7 @@ for i in $(seq 0 1); do
     --output_dir ./quan_export/dete_quan_${quan_ssd_models[$i]} >${log_path}/${model}_${quan_ssd_models[$i]} 2>&1
 print_info $? ${model}_${quan_ssd_models[$i]}
 mkdir dete_quan_${quan_ssd_models[$i]}_combined
-cp ./quan_export/dete_quan_${quan_ssd_models[$i]}/float/* ./dete_quan_${quan_ssd_models[$i]}_combined/
+cp ./quan_export/dete_quan_${quan_ssd_models[$i]}/float/dete_quan_${quan_ssd_models[$i]}/* ./dete_quan_${quan_ssd_models[$i]}_combined/
 # for lite
 copy_for_lite dete_quan_${quan_ssd_models[$i]}_combined ${models_from_train}
 done

diff --git a/ce/python3.7_linux_gpu_train_slim/PaddleSlim_repo/.run_ce.sh b/ce/python3.7_linux_gpu_train_slim/PaddleSlim_repo/.run_ce.sh
@@ -251,6 +251,43 @@ fi
 #fi
 
 
+dy_quant(){
+cd ${slim_dir}/demo/dygraph/quant
+CUDA_VISIBLE_DEVICES=${cudaid1} python train.py --model='mobilenet_v1' \
+--pretrained_model '../../pretrain/MobileNetV1_pretrained' \
+--num_epochs 1 \
+--batch_size 128 \
+> ${log_path}/dy_quant_v1_gpu1 2>&1
+print_info $? dy_quant_v1_gpu1
+CUDA_VISIBLE_DEVICES=${cudaid4} python -m paddle.distributed.launch --gpus="4,5,6,7" \
+train.py --model='mobilenet_v1' \
+--pretrained_model '../../pretrain/MobileNetV1_pretrained' \
+--num_epochs 1 \
+--batch_size 128 \
+> ${log_path}/dy_quant_v1_gpu4 2>&1
+print_info $? dy_quant_v1_gpu4
+# dy_pact_v3
+CUDA_VISIBLE_DEVICES=${cudaid1}  python train.py  --lr=0.001 \
+--batch_size 128 \
+--use_pact=True --num_epochs=1 --l2_decay=2e-5 --ls_epsilon=0.1 \
+--pretrained_model ../../pretrain/MobileNetV3_large_x1_0_ssld_pretrained \
+--num_epochs 1 > ${log_path}/dy_pact_quant_v3_gpu1 2>&1
+print_info $? dy_pact_quant_v3_gpu1
+# 多卡训练，以0到3号卡为例
+CUDA_VISIBLE_DEVICES=${cudaid4}  python -m paddle.distributed.launch \
+--gpus="4,5,6,7" \
+train.py  --lr=0.001 \
+--pretrained_model ../../pretrain/MobileNetV3_large_x1_0_ssld_pretrained \
+--use_pact=True --num_epochs=1 \
+--l2_decay=2e-5 \
+--ls_epsilon=0.1 \
+--batch_size=128 \
+--model_save_dir output > ${log_path}/dy_pact_quant_v3_gpu4 2>&1
+print_info $? dy_pact_quant_v3_gpu4
+}
+
+dy_quant
+
 #3.1 prune MobileNetV1
 cd ${current_dir}/demo/prune
 CUDA_VISIBLE_DEVICES=${cudaid1} python train.py --model "MobileNet" \
@@ -260,18 +297,18 @@ cd ${current_dir}
 cat ${log_path}/prune_v1_T_1card |grep Final |awk -F ' ' 'END{print "kpis\tprune_v1_acc_top1_gpu1\t"$8"\nkpis\tprune_v1_acc_top5_gpu1\t"$10}' |tr -d ";" | python _ce.py
 cd ${current_dir}/demo/prune
 CUDA_VISIBLE_DEVICES=${cudaid8} python train.py --model "MobileNet" \
---pruned_ratio 0.31 --data "imagenet" --pretrained_model ./MobileNetV1_pretrained/ \
+--pruned_ratio 0.31 --data "imagenet" --pretrained_model ../pretrain/MobileNetV1_pretrained/ \
 --num_epochs 1  --save_inference True >${log_path}/prune_v1_T_8card 2>&1
 # for lite uncombined
 mkdir slim_prune_MobileNetv1_uncombined
-cp ./models/infer_models/0/* ./slim_prune_MobileNetv1_uncombined/
+cp ./models/infer_models/0.* ./slim_prune_MobileNetv1_uncombined/
 copy_for_lite slim_prune_MobileNetv1_uncombined ${models_from_train}
 cd ${current_dir}
 cat ${log_path}/prune_v1_T_8card |grep Final |awk -F ' ' 'END{print "kpis\tprune_v1_acc_top1_gpu8\t"$8"\nkpis\tprune_v1_acc_top5_gpu8\t"$10}' |tr -d ";" | python _ce.py
 # 3.2 prune eval
 cd ${current_dir}/demo/prune
 model=slim_prune_eval
-python eval.py --model "MobileNet" --data "imagenet" --model_path "./models/0"  >${log_path}/${model} 2>&1
+python eval.py --model "MobileNet" --data "imagenet" --model_path "./models/infer_models/0"  >${log_path}/${model} 2>&1
 print_info $? ${model}
 if [ -d "models" ];then
     mv  models MobileNet_models
@@ -302,7 +339,7 @@ cd ${current_dir}/demo/prune
 CUDA_VISIBLE_DEVICES=${cudaid8} slim_prune_fpgm_v1 >${log_path}/slim_prune_fpgm_v1_f50_T_8card 2>&1
 # for lite uncombined
 mkdir slim_prune_fpgm_v1_f50_uncombined
-cp ./fpgm_mobilenetv1_models/infer_models/0/* ./slim_prune_fpgm_v1_f50_uncombined/
+cp ./fpgm_mobilenetv1_models/infer_models/0.* ./slim_prune_fpgm_v1_f50_uncombined/
 copy_for_lite slim_prune_fpgm_v1_f50_uncombined ${models_from_train}
 cd ${current_dir}
 cat ${log_path}/slim_prune_fpgm_v1_f50_T_8card |grep Final |awk -F ' ' 'END{print "kpis\tprune_fpgm_v1_f50_acc_top1_gpu8\t"$8"\nkpis\tprune_fpgm_v1_f50_acc_top5_gpu8\t"$10}' |tr -d ";" | python _ce.py
@@ -337,7 +374,7 @@ cd ${current_dir}/demo/prune
 CUDA_VISIBLE_DEVICES=${cudaid8} slim_prune_fpgm_v2 >${log_path}/slim_prune_fpgm_v2_f50_T_8card 2>&1
 # for lite uncombined
 mkdir slim_prune_fpgm_v2_f50_uncombined
-cp ./fpgm_mobilenetv2_models/infer_models/0/* ./slim_prune_fpgm_v2_f50_uncombined/
+cp ./fpgm_mobilenetv2_models/infer_models/0.* ./slim_prune_fpgm_v2_f50_uncombined/
 copy_for_lite slim_prune_fpgm_v2_f50_uncombined ${models_from_train}
 cd ${current_dir}
 cat ${log_path}/slim_prune_fpgm_v2_f50_T_8card |grep Final |awk -F ' ' 'END{print "kpis\tprune_fpgm_v2_f50_acc_top1_gpu8\t"$8"\nkpis\tprune_fpgm_v2_f50_acc_top5_gpu8\t"$10}' |tr -d ";" | python _ce.py
@@ -373,7 +410,7 @@ cd ${current_dir}/demo/prune
 CUDA_VISIBLE_DEVICES=${cudaid8} slim_prune_fpgm_v1 >${log_path}/slim_prune_fpgm_resnet34_f50_T_8card 2>&1
 # for lite uncombined
 mkdir slim_prune_fpgm_resnet34_f50_uncombined
-cp ./fpgm_resnet34_models/infer_models/0/* ./slim_prune_fpgm_resnet34_f50_uncombined/
+cp ./fpgm_resnet34_models/infer_models/0.* ./slim_prune_fpgm_resnet34_f50_uncombined/
 copy_for_lite slim_prune_fpgm_resnet34_f50_uncombined ${models_from_train}
 cd ${current_dir}
 cat ${log_path}/slim_prune_fpgm_resnet34_f50_T_8card |grep Final |awk -F ' ' 'END{print "kpis\tprune_fpgm_resnet34_f50_acc_top1_gpu8\t"$8"\nkpis\tprune_fpgm_resnet34_f50_acc_top5_gpu8\t"$10}' |tr -d ";" | python _ce.py
@@ -393,11 +430,12 @@ python train.py \
 --data "imagenet" \
 --save_inference True \
 --pretrained_model ../pretrain/$1_pretrained/ \
---num_epochs 1
+--num_epochs 1 \
+--batch_size 128
 }
 
 eval_prune(){
-    python eval.py --model $1 --data "imagenet" --model_path "./models/0"
+    python eval.py --model $1 --data "imagenet" --model_path "./models/infer_models/0"
 }
 for i in $(seq 0 0); do
     CUDA_VISIBLE_DEVICES=${cudaid1} train_prune ${prune_models[$i]} >${log_path}/prune_${prune_models[$i]}_gpu1 2>&1
@@ -410,14 +448,76 @@ for i in $(seq 0 0); do
     #move models for lite uncombined
     cd ${current_dir}/demo/prune
     mkdir slim_prune_${prune_models[$i]}_uncombined
-    cp ./models/infer_models/0/* slim_prune_${prune_models[$i]}_uncombined/
+    cp ./models/infer_models/0.* slim_prune_${prune_models[$i]}_uncombined/
     copy_for_lite slim_prune_${prune_models[$i]}_uncombined ${models_from_train}
     eval_prune ${prune_models[$i]} >${log_path}/prune_${prune_models[$i]}_eval 2>&1
     print_info $? prune_${prune_models[$i]}_eval
     if [ -d "models" ];then
 	    mv  models ${prune_models[$i]}_models
     fi
 done
+
+# 3.4 dygraph
+dy_prune_ResNet50_f42(){
+cd ${slim_dir}/demo/dygraph/pruning
+ln -s ${dataset_path}/slim/data data
+CUDA_VISIBLE_DEVICES=${cudaid1} python train.py \
+    --use_gpu=True \
+    --model="resnet34" \
+    --data="imagenet" \
+    --pruned_ratio=0.25 \
+    --num_epochs=1 \
+    --batch_size=128 \
+    --lr_strategy="cosine_decay" \
+    --criterion="fpgm" \
+    --model_path="./fpgm_resnet34_025_120_models" >${log_path}/dy_prune_ResNet50_f42_gpu1 2>&1
+print_info $? dy_prune_ResNet50_f42_gpu1
+CUDA_VISIBLE_DEVICES=${cudaid4}  python -m paddle.distributed.launch \
+--gpus="4,5,6,7" \
+--log_dir="fpgm_resnet34_f-42_train_log" \
+train.py \
+    --use_gpu=True \
+    --model="resnet34" \
+    --data="imagenet" \
+    --pruned_ratio=0.25 \
+    --num_epochs=1 \
+    --batch_size=128 \
+    --lr_strategy="cosine_decay" \
+    --criterion="fpgm" \
+    --model_path="./fpgm_resnet34_025_120_models" > ${log_path}/dy_prune_ResNet50_f42_gpu4 &
+print_info $? dy_prune_ResNet50_f42_gpu4
+#2.3 恢复训练  通过设置checkpoint选项进行恢复训练：
+CUDA_VISIBLE_DEVICES=${cudaid1} python train.py \
+    --use_gpu=True \
+    --model="resnet34" \
+    --data="imagenet" \
+    --pruned_ratio=0.25 \
+    --num_epochs=2 \
+    --batch_size=128 \
+    --lr_strategy="cosine_decay" \
+    --criterion="fpgm" \
+    --model_path="./fpgm_resnet34_025_120_models" \
+    --checkpoint="./fpgm_resnet34_025_120_models/0" >${log_path}/dy_prune_ResNet50_f42_gpu1_load 2>&1
+print_info $? dy_prune_ResNet50_f42_gpu1_load
+
+#2.4. 评估  通过调用eval.py脚本，对剪裁和重训练后的模型在测试数据上进行精度：
+CUDA_VISIBLE_DEVICES=${cudaid1} python eval.py \
+--checkpoint=./fpgm_resnet34_025_120_models/1 \
+--model="resnet34" \
+--pruned_ratio=0.25 \
+--batch_size=128 >${log_path}/dy_prune_ResNet50_f42_gpu1_eval 2>&1
+print_info $? dy_prune_ResNet50_f42_gpu1_eval
+
+#2.5. 导出模型   执行以下命令导出用于预测的模型：
+CUDA_VISIBLE_DEVICES=${cudaid1} python export_model.py \
+--checkpoint=./fpgm_resnet34_025_120_models/final \
+--model="resnet34" \
+--pruned_ratio=0.25 \
+--output_path=./infer_final/resnet > ${log_path}/dy_prune_ResNet50_f42_gpu1_export 2>&1
+print_info $? dy_prune_ResNet50_f42_gpu1_export
+}
+dy_prune_ResNet50_f42
+
 #4 nas
 # 4.1 sa_nas_mobilenetv2
 cd ${current_dir}/demo/nas
@@ -445,39 +545,43 @@ CUDA_VISIBLE_DEVICES=${cudaid8} python rl_nas_mobilenetv2.py --search_steps 1 --
 print_info $? ${model}
 
 # 4.4 parl_nas
+parl_nas(){
 model=parl_nas_v2_T_1card
 CUDA_VISIBLE_DEVICES=${cudaid1} python parl_nas_mobilenetv2.py --search_steps 1 --port 8887 >${log_path}/${model} 2>&1
 print_info $? ${model}
 model=parl_nas_v2_T_8card
 CUDA_VISIBLE_DEVICES=${cudaid8} python parl_nas_mobilenetv2.py --search_steps 1 --port 8889 >${log_path}/${model} 2>&1
 print_info $? ${model}
-
+}
+#parl_nas
 # 5 darts
 # search 1card # DARTS一阶近似搜索方法
 cd ${current_dir}/demo/darts
 model=darts1_search_1card
-CUDA_VISIBLE_DEVICES=${cudaid1} python search.py --epochs 1 --use_multiprocess False >${log_path}/${model} 2>&1
+CUDA_VISIBLE_DEVICES=${cudaid1} python search.py --epochs 1 --use_multiprocess False --batch_size 32 >${log_path}/${model} 2>&1
 print_info $? ${model}
 model=darts1_search_8card
-CUDA_VISIBLE_DEVICES=${cudaid8} python search.py --epochs 1 --use_multiprocess False >${log_path}/${model} 2>&1
+CUDA_VISIBLE_DEVICES=${cudaid8} python search.py --epochs 1 --use_multiprocess False --batch_size 32 >${log_path}/${model} 2>&1
 print_info $? ${model}
 # # DARTS 二阶近似搜索方法
 model=darts2_search_1card
-CUDA_VISIBLE_DEVICES=${cudaid1} python search.py --epochs 1 --unrolled=True --use_multiprocess False >${log_path}/${model} 2>&1
+CUDA_VISIBLE_DEVICES=${cudaid1} python search.py --epochs 1 --unrolled=True --use_multiprocess False --batch_size 32 >${log_path}/${model} 2>&1
 print_info $? ${model}
 model=darts2_search_8card
-CUDA_VISIBLE_DEVICES=${cudaid8} python search.py --epochs 1 --unrolled=True --use_multiprocess False >${log_path}/${model} 2>&1
+CUDA_VISIBLE_DEVICES=${cudaid8} python search.py --epochs 1 --unrolled=True --use_multiprocess False --batch_size 32 >${log_path}/${model} 2>&1
 print_info $? ${model}
 # PC-DARTS
 model=pcdarts_search_1card
-CUDA_VISIBLE_DEVICES=${cudaid1} python search.py --epochs 1 --method='PC-DARTS' --use_multiprocess False --batch_size=256 --learning_rate=0.1 --arch_learning_rate=6e-4 --epochs_no_archopt=15 >${log_path}/${model} 2>&1
+CUDA_VISIBLE_DEVICES=${cudaid1} python search.py --epochs 1 --method='PC-DARTS' --use_multiprocess False --batch_size=128 --learning_rate=0.1 --arch_learning_rate=6e-4 --epochs_no_archopt=15 >${log_path}/${model} 2>&1
 print_info $? ${model}
 model=pcdarts_search_8card
-CUDA_VISIBLE_DEVICES=${cudaid8} python search.py --epochs 1 --method='PC-DARTS' --use_multiprocess False --batch_size=256 --learning_rate=0.1 --arch_learning_rate=6e-4 --epochs_no_archopt=15 >${log_path}/${model} 2>&1
+CUDA_VISIBLE_DEVICES=${cudaid8} python search.py --epochs 1 --method='PC-DARTS' --use_multiprocess False --batch_size=128 --learning_rate=0.1 --arch_learning_rate=6e-4 --epochs_no_archopt=15 >${log_path}/${model} 2>&1
 print_info $? ${model}
 # 分布式 search
 model=darts1_search_distributed
-CUDA_VISIBLE_DEVICES=${cudaid4} python -m paddle.distributed.launch --gpus=0,1,2,3 --log_dir ./mylog_search  search.py --use_data_parallel 1 --epochs 1 --use_multiprocess False >${log_path}/${model} 2>&1
+CUDA_VISIBLE_DEVICES=${cudaid4} python -m paddle.distributed.launch --gpus=0,1,2,3 \
+--log_dir ./mylog_search  search.py --use_data_parallel 1 \
+--epochs 1 --use_multiprocess False >${log_path}/${model} 2>&1
 print_info $? ${model}
 model=darts2_search_distributed
 CUDA_VISIBLE_DEVICES=${cudaid4} python -m paddle.distributed.launch --gpus=0,1,2,3 --log_dir ./mylog_search  search.py --use_data_parallel 1 --epochs 1 --unrolled=True --use_multiprocess False >${log_path}/${model} 2>&1
@@ -487,20 +591,26 @@ CUDA_VISIBLE_DEVICES=${cudaid4} python -m paddle.distributed.launch --gpus=0,1,2
 print_info $? ${model}
 #train
 model=pcdarts_train_1card
-CUDA_VISIBLE_DEVICES=${cudaid1} python train.py --arch='PC_DARTS' --epochs 1 --use_multiprocess False >${log_path}/${model} 2>&1
+CUDA_VISIBLE_DEVICES=${cudaid1} python train.py --arch='PC_DARTS' \
+--epochs 1 --use_multiprocess False --batch_size 64 >${log_path}/${model} 2>&1
 print_info $? ${model}
 model=pcdarts_train_imagenet_8card
-CUDA_VISIBLE_DEVICES=${cudaid8} python train_imagenet.py --arch='PC_DARTS' --epochs 1 --use_multiprocess False --data_dir ../data/ILSVRC2012 >${log_path}/${model} 2>&1
+CUDA_VISIBLE_DEVICES=${cudaid8} python train_imagenet.py --arch='PC_DARTS' \
+--epochs 1 --use_multiprocess False --data_dir ../data/ILSVRC2012 --batch_size 64 >${log_path}/${model} 2>&1
 print_info $? ${model}
 # 分布式 train
 model=dartsv2_train_distributed
-CUDA_VISIBLE_DEVICES=${cudaid4} python -m paddle.distributed.launch --gpus=0,1,2,3 --log_dir ./mylog_train train.py --use_data_parallel 1 --arch='DARTS_V2' >${log_path}/${model} 2>&1
+CUDA_VISIBLE_DEVICES=${cudaid4} python -m paddle.distributed.launch \
+--gpus=0,1,2,3 --log_dir ./mylog_train train.py --use_data_parallel 1 \
+--arch='DARTS_V2' >${log_path}/${model} 2>&1
 print_info $? ${model}
 model=dartsv2_train_imagenet_distributed
-CUDA_VISIBLE_DEVICES=${cudaid4} python -m paddle.distributed.launch --gpus=0,1,2,3 --log_dir ./mylog_train_imagenet train_imagenet.py --use_data_parallel 1 --arch='DARTS_V2' --data_dir ../data/ILSVRC2012 >${log_path}/${model} 2>&1
+CUDA_VISIBLE_DEVICES=${cudaid4} python -m paddle.distributed.launch \
+--gpus=0,1,2,3 --log_dir ./mylog_train_imagenet train_imagenet.py \
+--use_data_parallel 1 --arch='DARTS_V2' --data_dir ../data/ILSVRC2012 >${log_path}/${model} 2>&1
 print_info $? ${model}
 # 可视化
-yum -y install graphviz
+pip install graphviz
 model=slim_darts_visualize_pcdarts
 python visualize.py PC_DARTS > ${log_path}/${model} 2>&1
 print_info $? ${model}
@@ -530,4 +640,4 @@ CUDA_VISIBLE_DEVICES=${cudaid1} python train_eval.py \
 --test_data_dir=./lfw/ >${log_path}/${model} 2>&1
 print_info $? ${model}
 }
-slimfacenet
+#slimfacenet