From c1e595d4ccedc026a51f393e6184a6d91e0bdd70 Mon Sep 17 00:00:00 2001 From: borisgin Date: Thu, 2 Nov 2017 12:40:48 -0700 Subject: [PATCH] tuned bvlc_googlenet/lars* --- common_plot.py | 2 +- models/bvlc_googlenet/solver_lars.prototxt | 19 ++- models/bvlc_googlenet/train_googlenet_lars.sh | 2 +- models/bvlc_googlenet/train_val_lars.prototxt | 136 +++++++++--------- 4 files changed, 82 insertions(+), 77 deletions(-) diff --git a/common_plot.py b/common_plot.py index 8891cda0910..28479e258f3 100644 --- a/common_plot.py +++ b/common_plot.py @@ -41,7 +41,7 @@ def get_train_loss(log): def get_epochs(log): gpus = re.findall(r' GPU (\d*):', log) num_gpus = len(gpus) - print num_gpus + #print num_gpus max_iter = re.findall(r'max_iter: (\d*)', log) iter_size = re.findall(r'iter_size: (\d*)', log) batch_size = re.findall(r'batch_size: (\d*)',log) diff --git a/models/bvlc_googlenet/solver_lars.prototxt b/models/bvlc_googlenet/solver_lars.prototxt index 112c557566a..efdde406ff2 100644 --- a/models/bvlc_googlenet/solver_lars.prototxt +++ b/models/bvlc_googlenet/solver_lars.prototxt @@ -1,25 +1,30 @@ net: "models/bvlc_googlenet/train_val_lars.prototxt" test_iter: 1562 # 50000/32 -test_interval: 5000 +test_interval: 150 test_initialization: false -display: 500 +display: 50 -max_iter: 500000 # 100 epoch -#iter_size: 4 +max_iter: 15600 # 100 epoch +iter_size: 8 -base_lr: 4.00 # B=256 +base_lr: 6.00 # B=256 local_lr_auto: true local_gw_ratio: 0.001 -#rampup_lr: 1.0 -#rampup_interval: 10000 +rampup_lr: 0.2 +rampup_interval: 2000 lr_policy: "poly" power: 2.0 + momentum: 0.9 +#momentum: 0.8 +#momentum_policy: "poly" +#max_momentum: 0.92 + weight_decay: 1e-4 snapshot: 1000000 diff --git a/models/bvlc_googlenet/train_googlenet_lars.sh b/models/bvlc_googlenet/train_googlenet_lars.sh index 4a35ab72c63..4e06235b612 100755 --- a/models/bvlc_googlenet/train_googlenet_lars.sh +++ b/models/bvlc_googlenet/train_googlenet_lars.sh @@ -2,4 +2,4 @@ ./build/tools/caffe train \ --solver=models/bvlc_googlenet/solver_lars.prototxt -gpu=all \ - 2>&1 | tee models/bvlc_googlenet/logs/googlenet_lars_fp16_b256_lr4.log + 2>&1 | tee models/bvlc_googlenet/logs/googlenet_lars_fp16_b8K_lr6.log diff --git a/models/bvlc_googlenet/train_val_lars.prototxt b/models/bvlc_googlenet/train_val_lars.prototxt index cf97e2eebf0..41b95862c53 100644 --- a/models/bvlc_googlenet/train_val_lars.prototxt +++ b/models/bvlc_googlenet/train_val_lars.prototxt @@ -2,10 +2,10 @@ name: "GoogleNet_fp16" default_forward_type: FLOAT16 default_backward_type: FLOAT16 -#default_forward_math: FLOAT -#default_backward_math: FLOAT +default_forward_math: FLOAT16 +default_backward_math: FLOAT16 -global_grad_scale: 100. +global_grad_scale: 1000. layer { name: "data" @@ -17,7 +17,7 @@ layer { # source: "/data/imagenet/train-lmdb-256x256" source: "/data/imagenet/train-c2lmdb-480" backend: LMDB - batch_size: 256 + batch_size: 1024 cache: true shuffle: true } @@ -74,7 +74,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -119,7 +119,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -143,7 +143,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -188,7 +188,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -211,7 +211,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -235,7 +235,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -258,7 +258,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -282,7 +282,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -317,7 +317,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -349,7 +349,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -372,7 +372,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -396,7 +396,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -419,7 +419,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -443,7 +443,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -478,7 +478,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -521,7 +521,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -544,7 +544,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -568,7 +568,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -591,7 +591,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -615,7 +615,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -650,7 +650,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -693,7 +693,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -715,7 +715,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -746,7 +746,7 @@ layer { } bias_filler { type: "constant" - value: 0 + value: 0.001 } } } @@ -788,7 +788,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -811,7 +811,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -835,7 +835,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -858,7 +858,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -882,7 +882,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -917,7 +917,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -949,7 +949,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -972,7 +972,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -996,7 +996,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -1019,7 +1019,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -1043,7 +1043,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -1078,7 +1078,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -1110,7 +1110,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -1133,7 +1133,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -1157,7 +1157,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -1180,7 +1180,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -1204,7 +1204,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -1239,7 +1239,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -1282,7 +1282,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -1304,7 +1304,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -1335,7 +1335,7 @@ layer { } bias_filler { type: "constant" - value: 0 + value: 0.001 } } } @@ -1377,7 +1377,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -1400,7 +1400,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -1424,7 +1424,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -1447,7 +1447,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -1471,7 +1471,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -1506,7 +1506,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -1549,7 +1549,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -1572,7 +1572,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -1596,7 +1596,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -1619,7 +1619,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -1643,7 +1643,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -1678,7 +1678,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -1710,7 +1710,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -1733,7 +1733,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -1757,7 +1757,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -1780,7 +1780,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -1804,7 +1804,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -1839,7 +1839,7 @@ layer { } bias_filler { type: "constant" - value: 0.2 + value: 0.001 } } } @@ -1890,7 +1890,7 @@ layer { } bias_filler { type: "constant" - value: 0 + value: 0.001 } } }