Skip to content
This repository has been archived by the owner on Jan 24, 2024. It is now read-only.

add slim dygraph ce #402

Open
wants to merge 10 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ for i in $(seq 0 2); do
-o weights=./output/${quan_models[$i]}/best_model >${log_path}/${model}_${quan_models[$i]} 2>&1
print_info $? ${model}_${quan_models[$i]}
mkdir dete_quan_${quan_models[$i]}_combined
cp ./quan_export/dete_quan_${quan_models[$i]}/float/* ./dete_quan_${quan_models[$i]}_combined/
cp ./quan_export/dete_quan_${quan_models[$i]}/float/dete_quan_${quan_models[$i]}/* ./dete_quan_${quan_models[$i]}_combined/
# for lite
copy_for_lite dete_quan_${quan_models[$i]}_combined ${models_from_train}
done
Expand Down Expand Up @@ -200,7 +200,7 @@ for i in $(seq 0 1); do
--output_dir ./quan_export/dete_quan_${quan_ssd_models[$i]} >${log_path}/${model}_${quan_ssd_models[$i]} 2>&1
print_info $? ${model}_${quan_ssd_models[$i]}
mkdir dete_quan_${quan_ssd_models[$i]}_combined
cp ./quan_export/dete_quan_${quan_ssd_models[$i]}/float/* ./dete_quan_${quan_ssd_models[$i]}_combined/
cp ./quan_export/dete_quan_${quan_ssd_models[$i]}/float/dete_quan_${quan_ssd_models[$i]}/* ./dete_quan_${quan_ssd_models[$i]}_combined/
# for lite
copy_for_lite dete_quan_${quan_ssd_models[$i]}_combined ${models_from_train}
done
Expand Down
156 changes: 133 additions & 23 deletions ce/python3.7_linux_gpu_train_slim/PaddleSlim_repo/.run_ce.sh
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,43 @@ fi
#fi


dy_quant(){
cd ${slim_dir}/demo/dygraph/quant
CUDA_VISIBLE_DEVICES=${cudaid1} python train.py --model='mobilenet_v1' \
--pretrained_model '../../pretrain/MobileNetV1_pretrained' \
--num_epochs 1 \
--batch_size 128 \
> ${log_path}/dy_quant_v1_gpu1 2>&1
print_info $? dy_quant_v1_gpu1
CUDA_VISIBLE_DEVICES=${cudaid4} python -m paddle.distributed.launch --gpus="4,5,6,7" \
train.py --model='mobilenet_v1' \
--pretrained_model '../../pretrain/MobileNetV1_pretrained' \
--num_epochs 1 \
--batch_size 128 \
> ${log_path}/dy_quant_v1_gpu4 2>&1
print_info $? dy_quant_v1_gpu4
# dy_pact_v3
CUDA_VISIBLE_DEVICES=${cudaid1} python train.py --lr=0.001 \
--batch_size 128 \
--use_pact=True --num_epochs=1 --l2_decay=2e-5 --ls_epsilon=0.1 \
--pretrained_model ../../pretrain/MobileNetV3_large_x1_0_ssld_pretrained \
--num_epochs 1 > ${log_path}/dy_pact_quant_v3_gpu1 2>&1
print_info $? dy_pact_quant_v3_gpu1
# 多卡训练,以0到3号卡为例
CUDA_VISIBLE_DEVICES=${cudaid4} python -m paddle.distributed.launch \
--gpus="4,5,6,7" \
train.py --lr=0.001 \
--pretrained_model ../../pretrain/MobileNetV3_large_x1_0_ssld_pretrained \
--use_pact=True --num_epochs=1 \
--l2_decay=2e-5 \
--ls_epsilon=0.1 \
--batch_size=128 \
--model_save_dir output > ${log_path}/dy_pact_quant_v3_gpu4 2>&1
print_info $? dy_pact_quant_v3_gpu4
}

dy_quant

#3.1 prune MobileNetV1
cd ${current_dir}/demo/prune
CUDA_VISIBLE_DEVICES=${cudaid1} python train.py --model "MobileNet" \
Expand All @@ -260,18 +297,18 @@ cd ${current_dir}
cat ${log_path}/prune_v1_T_1card |grep Final |awk -F ' ' 'END{print "kpis\tprune_v1_acc_top1_gpu1\t"$8"\nkpis\tprune_v1_acc_top5_gpu1\t"$10}' |tr -d ";" | python _ce.py
cd ${current_dir}/demo/prune
CUDA_VISIBLE_DEVICES=${cudaid8} python train.py --model "MobileNet" \
--pruned_ratio 0.31 --data "imagenet" --pretrained_model ./MobileNetV1_pretrained/ \
--pruned_ratio 0.31 --data "imagenet" --pretrained_model ../pretrain/MobileNetV1_pretrained/ \
--num_epochs 1 --save_inference True >${log_path}/prune_v1_T_8card 2>&1
# for lite uncombined
mkdir slim_prune_MobileNetv1_uncombined
cp ./models/infer_models/0/* ./slim_prune_MobileNetv1_uncombined/
cp ./models/infer_models/0.* ./slim_prune_MobileNetv1_uncombined/
copy_for_lite slim_prune_MobileNetv1_uncombined ${models_from_train}
cd ${current_dir}
cat ${log_path}/prune_v1_T_8card |grep Final |awk -F ' ' 'END{print "kpis\tprune_v1_acc_top1_gpu8\t"$8"\nkpis\tprune_v1_acc_top5_gpu8\t"$10}' |tr -d ";" | python _ce.py
# 3.2 prune eval
cd ${current_dir}/demo/prune
model=slim_prune_eval
python eval.py --model "MobileNet" --data "imagenet" --model_path "./models/0" >${log_path}/${model} 2>&1
python eval.py --model "MobileNet" --data "imagenet" --model_path "./models/infer_models/0" >${log_path}/${model} 2>&1
print_info $? ${model}
if [ -d "models" ];then
mv models MobileNet_models
Expand Down Expand Up @@ -302,7 +339,7 @@ cd ${current_dir}/demo/prune
CUDA_VISIBLE_DEVICES=${cudaid8} slim_prune_fpgm_v1 >${log_path}/slim_prune_fpgm_v1_f50_T_8card 2>&1
# for lite uncombined
mkdir slim_prune_fpgm_v1_f50_uncombined
cp ./fpgm_mobilenetv1_models/infer_models/0/* ./slim_prune_fpgm_v1_f50_uncombined/
cp ./fpgm_mobilenetv1_models/infer_models/0.* ./slim_prune_fpgm_v1_f50_uncombined/
copy_for_lite slim_prune_fpgm_v1_f50_uncombined ${models_from_train}
cd ${current_dir}
cat ${log_path}/slim_prune_fpgm_v1_f50_T_8card |grep Final |awk -F ' ' 'END{print "kpis\tprune_fpgm_v1_f50_acc_top1_gpu8\t"$8"\nkpis\tprune_fpgm_v1_f50_acc_top5_gpu8\t"$10}' |tr -d ";" | python _ce.py
Expand Down Expand Up @@ -337,7 +374,7 @@ cd ${current_dir}/demo/prune
CUDA_VISIBLE_DEVICES=${cudaid8} slim_prune_fpgm_v2 >${log_path}/slim_prune_fpgm_v2_f50_T_8card 2>&1
# for lite uncombined
mkdir slim_prune_fpgm_v2_f50_uncombined
cp ./fpgm_mobilenetv2_models/infer_models/0/* ./slim_prune_fpgm_v2_f50_uncombined/
cp ./fpgm_mobilenetv2_models/infer_models/0.* ./slim_prune_fpgm_v2_f50_uncombined/
copy_for_lite slim_prune_fpgm_v2_f50_uncombined ${models_from_train}
cd ${current_dir}
cat ${log_path}/slim_prune_fpgm_v2_f50_T_8card |grep Final |awk -F ' ' 'END{print "kpis\tprune_fpgm_v2_f50_acc_top1_gpu8\t"$8"\nkpis\tprune_fpgm_v2_f50_acc_top5_gpu8\t"$10}' |tr -d ";" | python _ce.py
Expand Down Expand Up @@ -373,7 +410,7 @@ cd ${current_dir}/demo/prune
CUDA_VISIBLE_DEVICES=${cudaid8} slim_prune_fpgm_v1 >${log_path}/slim_prune_fpgm_resnet34_f50_T_8card 2>&1
# for lite uncombined
mkdir slim_prune_fpgm_resnet34_f50_uncombined
cp ./fpgm_resnet34_models/infer_models/0/* ./slim_prune_fpgm_resnet34_f50_uncombined/
cp ./fpgm_resnet34_models/infer_models/0.* ./slim_prune_fpgm_resnet34_f50_uncombined/
copy_for_lite slim_prune_fpgm_resnet34_f50_uncombined ${models_from_train}
cd ${current_dir}
cat ${log_path}/slim_prune_fpgm_resnet34_f50_T_8card |grep Final |awk -F ' ' 'END{print "kpis\tprune_fpgm_resnet34_f50_acc_top1_gpu8\t"$8"\nkpis\tprune_fpgm_resnet34_f50_acc_top5_gpu8\t"$10}' |tr -d ";" | python _ce.py
Expand All @@ -393,11 +430,12 @@ python train.py \
--data "imagenet" \
--save_inference True \
--pretrained_model ../pretrain/$1_pretrained/ \
--num_epochs 1
--num_epochs 1 \
--batch_size 128
}

eval_prune(){
python eval.py --model $1 --data "imagenet" --model_path "./models/0"
python eval.py --model $1 --data "imagenet" --model_path "./models/infer_models/0"
}
for i in $(seq 0 0); do
CUDA_VISIBLE_DEVICES=${cudaid1} train_prune ${prune_models[$i]} >${log_path}/prune_${prune_models[$i]}_gpu1 2>&1
Expand All @@ -410,14 +448,76 @@ for i in $(seq 0 0); do
#move models for lite uncombined
cd ${current_dir}/demo/prune
mkdir slim_prune_${prune_models[$i]}_uncombined
cp ./models/infer_models/0/* slim_prune_${prune_models[$i]}_uncombined/
cp ./models/infer_models/0.* slim_prune_${prune_models[$i]}_uncombined/
copy_for_lite slim_prune_${prune_models[$i]}_uncombined ${models_from_train}
eval_prune ${prune_models[$i]} >${log_path}/prune_${prune_models[$i]}_eval 2>&1
print_info $? prune_${prune_models[$i]}_eval
if [ -d "models" ];then
mv models ${prune_models[$i]}_models
fi
done

# 3.4 dygraph
dy_prune_ResNet50_f42(){
cd ${slim_dir}/demo/dygraph/pruning
ln -s ${dataset_path}/slim/data data
CUDA_VISIBLE_DEVICES=${cudaid1} python train.py \
--use_gpu=True \
--model="resnet34" \
--data="imagenet" \
--pruned_ratio=0.25 \
--num_epochs=1 \
--batch_size=128 \
--lr_strategy="cosine_decay" \
--criterion="fpgm" \
--model_path="./fpgm_resnet34_025_120_models" >${log_path}/dy_prune_ResNet50_f42_gpu1 2>&1
print_info $? dy_prune_ResNet50_f42_gpu1
CUDA_VISIBLE_DEVICES=${cudaid4} python -m paddle.distributed.launch \
--gpus="4,5,6,7" \
--log_dir="fpgm_resnet34_f-42_train_log" \
train.py \
--use_gpu=True \
--model="resnet34" \
--data="imagenet" \
--pruned_ratio=0.25 \
--num_epochs=1 \
--batch_size=128 \
--lr_strategy="cosine_decay" \
--criterion="fpgm" \
--model_path="./fpgm_resnet34_025_120_models" > ${log_path}/dy_prune_ResNet50_f42_gpu4 &
print_info $? dy_prune_ResNet50_f42_gpu4
#2.3 恢复训练 通过设置checkpoint选项进行恢复训练:
CUDA_VISIBLE_DEVICES=${cudaid1} python train.py \
--use_gpu=True \
--model="resnet34" \
--data="imagenet" \
--pruned_ratio=0.25 \
--num_epochs=2 \
--batch_size=128 \
--lr_strategy="cosine_decay" \
--criterion="fpgm" \
--model_path="./fpgm_resnet34_025_120_models" \
--checkpoint="./fpgm_resnet34_025_120_models/0" >${log_path}/dy_prune_ResNet50_f42_gpu1_load 2>&1
print_info $? dy_prune_ResNet50_f42_gpu1_load

#2.4. 评估 通过调用eval.py脚本,对剪裁和重训练后的模型在测试数据上进行精度:
CUDA_VISIBLE_DEVICES=${cudaid1} python eval.py \
--checkpoint=./fpgm_resnet34_025_120_models/1 \
--model="resnet34" \
--pruned_ratio=0.25 \
--batch_size=128 >${log_path}/dy_prune_ResNet50_f42_gpu1_eval 2>&1
print_info $? dy_prune_ResNet50_f42_gpu1_eval

#2.5. 导出模型 执行以下命令导出用于预测的模型:
CUDA_VISIBLE_DEVICES=${cudaid1} python export_model.py \
--checkpoint=./fpgm_resnet34_025_120_models/final \
--model="resnet34" \
--pruned_ratio=0.25 \
--output_path=./infer_final/resnet > ${log_path}/dy_prune_ResNet50_f42_gpu1_export 2>&1
print_info $? dy_prune_ResNet50_f42_gpu1_export
}
dy_prune_ResNet50_f42

#4 nas
# 4.1 sa_nas_mobilenetv2
cd ${current_dir}/demo/nas
Expand Down Expand Up @@ -445,39 +545,43 @@ CUDA_VISIBLE_DEVICES=${cudaid8} python rl_nas_mobilenetv2.py --search_steps 1 --
print_info $? ${model}

# 4.4 parl_nas
parl_nas(){
model=parl_nas_v2_T_1card
CUDA_VISIBLE_DEVICES=${cudaid1} python parl_nas_mobilenetv2.py --search_steps 1 --port 8887 >${log_path}/${model} 2>&1
print_info $? ${model}
model=parl_nas_v2_T_8card
CUDA_VISIBLE_DEVICES=${cudaid8} python parl_nas_mobilenetv2.py --search_steps 1 --port 8889 >${log_path}/${model} 2>&1
print_info $? ${model}

}
#parl_nas
# 5 darts
# search 1card # DARTS一阶近似搜索方法
cd ${current_dir}/demo/darts
model=darts1_search_1card
CUDA_VISIBLE_DEVICES=${cudaid1} python search.py --epochs 1 --use_multiprocess False >${log_path}/${model} 2>&1
CUDA_VISIBLE_DEVICES=${cudaid1} python search.py --epochs 1 --use_multiprocess False --batch_size 32 >${log_path}/${model} 2>&1
print_info $? ${model}
model=darts1_search_8card
CUDA_VISIBLE_DEVICES=${cudaid8} python search.py --epochs 1 --use_multiprocess False >${log_path}/${model} 2>&1
CUDA_VISIBLE_DEVICES=${cudaid8} python search.py --epochs 1 --use_multiprocess False --batch_size 32 >${log_path}/${model} 2>&1
print_info $? ${model}
# # DARTS 二阶近似搜索方法
model=darts2_search_1card
CUDA_VISIBLE_DEVICES=${cudaid1} python search.py --epochs 1 --unrolled=True --use_multiprocess False >${log_path}/${model} 2>&1
CUDA_VISIBLE_DEVICES=${cudaid1} python search.py --epochs 1 --unrolled=True --use_multiprocess False --batch_size 32 >${log_path}/${model} 2>&1
print_info $? ${model}
model=darts2_search_8card
CUDA_VISIBLE_DEVICES=${cudaid8} python search.py --epochs 1 --unrolled=True --use_multiprocess False >${log_path}/${model} 2>&1
CUDA_VISIBLE_DEVICES=${cudaid8} python search.py --epochs 1 --unrolled=True --use_multiprocess False --batch_size 32 >${log_path}/${model} 2>&1
print_info $? ${model}
# PC-DARTS
model=pcdarts_search_1card
CUDA_VISIBLE_DEVICES=${cudaid1} python search.py --epochs 1 --method='PC-DARTS' --use_multiprocess False --batch_size=256 --learning_rate=0.1 --arch_learning_rate=6e-4 --epochs_no_archopt=15 >${log_path}/${model} 2>&1
CUDA_VISIBLE_DEVICES=${cudaid1} python search.py --epochs 1 --method='PC-DARTS' --use_multiprocess False --batch_size=128 --learning_rate=0.1 --arch_learning_rate=6e-4 --epochs_no_archopt=15 >${log_path}/${model} 2>&1
print_info $? ${model}
model=pcdarts_search_8card
CUDA_VISIBLE_DEVICES=${cudaid8} python search.py --epochs 1 --method='PC-DARTS' --use_multiprocess False --batch_size=256 --learning_rate=0.1 --arch_learning_rate=6e-4 --epochs_no_archopt=15 >${log_path}/${model} 2>&1
CUDA_VISIBLE_DEVICES=${cudaid8} python search.py --epochs 1 --method='PC-DARTS' --use_multiprocess False --batch_size=128 --learning_rate=0.1 --arch_learning_rate=6e-4 --epochs_no_archopt=15 >${log_path}/${model} 2>&1
print_info $? ${model}
# 分布式 search
model=darts1_search_distributed
CUDA_VISIBLE_DEVICES=${cudaid4} python -m paddle.distributed.launch --gpus=0,1,2,3 --log_dir ./mylog_search search.py --use_data_parallel 1 --epochs 1 --use_multiprocess False >${log_path}/${model} 2>&1
CUDA_VISIBLE_DEVICES=${cudaid4} python -m paddle.distributed.launch --gpus=0,1,2,3 \
--log_dir ./mylog_search search.py --use_data_parallel 1 \
--epochs 1 --use_multiprocess False >${log_path}/${model} 2>&1
print_info $? ${model}
model=darts2_search_distributed
CUDA_VISIBLE_DEVICES=${cudaid4} python -m paddle.distributed.launch --gpus=0,1,2,3 --log_dir ./mylog_search search.py --use_data_parallel 1 --epochs 1 --unrolled=True --use_multiprocess False >${log_path}/${model} 2>&1
Expand All @@ -487,20 +591,26 @@ CUDA_VISIBLE_DEVICES=${cudaid4} python -m paddle.distributed.launch --gpus=0,1,2
print_info $? ${model}
#train
model=pcdarts_train_1card
CUDA_VISIBLE_DEVICES=${cudaid1} python train.py --arch='PC_DARTS' --epochs 1 --use_multiprocess False >${log_path}/${model} 2>&1
CUDA_VISIBLE_DEVICES=${cudaid1} python train.py --arch='PC_DARTS' \
--epochs 1 --use_multiprocess False --batch_size 64 >${log_path}/${model} 2>&1
print_info $? ${model}
model=pcdarts_train_imagenet_8card
CUDA_VISIBLE_DEVICES=${cudaid8} python train_imagenet.py --arch='PC_DARTS' --epochs 1 --use_multiprocess False --data_dir ../data/ILSVRC2012 >${log_path}/${model} 2>&1
CUDA_VISIBLE_DEVICES=${cudaid8} python train_imagenet.py --arch='PC_DARTS' \
--epochs 1 --use_multiprocess False --data_dir ../data/ILSVRC2012 --batch_size 64 >${log_path}/${model} 2>&1
print_info $? ${model}
# 分布式 train
model=dartsv2_train_distributed
CUDA_VISIBLE_DEVICES=${cudaid4} python -m paddle.distributed.launch --gpus=0,1,2,3 --log_dir ./mylog_train train.py --use_data_parallel 1 --arch='DARTS_V2' >${log_path}/${model} 2>&1
CUDA_VISIBLE_DEVICES=${cudaid4} python -m paddle.distributed.launch \
--gpus=0,1,2,3 --log_dir ./mylog_train train.py --use_data_parallel 1 \
--arch='DARTS_V2' >${log_path}/${model} 2>&1
print_info $? ${model}
model=dartsv2_train_imagenet_distributed
CUDA_VISIBLE_DEVICES=${cudaid4} python -m paddle.distributed.launch --gpus=0,1,2,3 --log_dir ./mylog_train_imagenet train_imagenet.py --use_data_parallel 1 --arch='DARTS_V2' --data_dir ../data/ILSVRC2012 >${log_path}/${model} 2>&1
CUDA_VISIBLE_DEVICES=${cudaid4} python -m paddle.distributed.launch \
--gpus=0,1,2,3 --log_dir ./mylog_train_imagenet train_imagenet.py \
--use_data_parallel 1 --arch='DARTS_V2' --data_dir ../data/ILSVRC2012 >${log_path}/${model} 2>&1
print_info $? ${model}
# 可视化
yum -y install graphviz
pip install graphviz
model=slim_darts_visualize_pcdarts
python visualize.py PC_DARTS > ${log_path}/${model} 2>&1
print_info $? ${model}
Expand Down Expand Up @@ -530,4 +640,4 @@ CUDA_VISIBLE_DEVICES=${cudaid1} python train_eval.py \
--test_data_dir=./lfw/ >${log_path}/${model} 2>&1
print_info $? ${model}
}
slimfacenet
#slimfacenet