improve the format

MingSun-Tse · May 11, 2019 · 0aabce1 · 0aabce1
1 parent 2f04946
commit 0aabce1
Show file tree

Hide file tree

Showing 3 changed files with 46 additions and 37 deletions.
diff --git a/include/caffe/adaptive_probabilistic_pruning.hpp b/include/caffe/adaptive_probabilistic_pruning.hpp
@@ -97,7 +97,17 @@ class APP {
     static string show_layer;
     static int show_num_layer;
     static int show_num_weight;
-}; 
+
+    // Some constants used to control the pruning process in solver.cpp
+    static Dtype MUL_LR_DECAY; // the multiplier of lr decay
+    static int MAX_CNT_LR_DECAY; // the max number of lr decay
+    static Dtype ACCURACY_GAP_THRESHOLD;
+    static Dtype INCRE_PR_BOTTOMLINE;
+    static int CNT_AFTER_MAX_ACC;
+    static Dtype COEEF_ACC_2_PR; // multiplier of acc margin to incre_pr
+    static Dtype TR_MUL_BOTTOM; // the bottomline of target_reg multiplier
+    static Dtype STANDARD_INCRE_PR;
+};
 
     template<typename Dtype>  string  APP<Dtype>::prune_method    = "None"; /// initialized for caffe test, which has no solver but this info is still needed in layer.
     template<typename Dtype>  string  APP<Dtype>::prune_unit      = "None";
@@ -180,10 +190,19 @@ class APP {
     template<typename Dtype>  vector<Dtype>  APP<Dtype>::num_param;
 
     // 3. Logging
-    template<typename Dtype>  int APP<Dtype>::show_interval = 10; // the interval to print pruning progress log
-    template<typename Dtype>  string APP<Dtype>::show_layer = "0111"; // '1' means to print the weights of the layer with the index
-    template<typename Dtype>  int APP<Dtype>::show_num_layer = 100; // work with show_interval, how many layers get printed
-    template<typename Dtype>  int APP<Dtype>::show_num_weight = 20; // work with show_layer, how many weights get printed
+    template<typename Dtype> int    APP<Dtype>::show_interval = 10; // the interval to print pruning progress log
+    template<typename Dtype> string APP<Dtype>::show_layer = "0111"; // '1' means to print the weights of the layer with the index
+    template<typename Dtype> int    APP<Dtype>::show_num_layer = 100; // work with show_interval, how many layers get printed
+    template<typename Dtype> int    APP<Dtype>::show_num_weight = 20; // work with show_layer, how many weights get printed
+
+    template<typename Dtype> Dtype APP<Dtype>::MUL_LR_DECAY = 0.1;
+    template<typename Dtype> int   APP<Dtype>::MAX_CNT_LR_DECAY = 4;
+    template<typename Dtype> Dtype APP<Dtype>::ACCURACY_GAP_THRESHOLD = 5e-4;
+    template<typename Dtype> Dtype APP<Dtype>::INCRE_PR_BOTTOMLINE = 0.01;
+    template<typename Dtype> int   APP<Dtype>::CNT_AFTER_MAX_ACC = 4;
+    template<typename Dtype> Dtype APP<Dtype>::COEEF_ACC_2_PR = 10;
+    template<typename Dtype> Dtype APP<Dtype>::TR_MUL_BOTTOM = 0.25;
+    template<typename Dtype> Dtype APP<Dtype>::STANDARD_INCRE_PR = 0.05;
 }
 
 #endif
diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp
@@ -15,16 +15,7 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <numeric>
-
 #include "boost/algorithm/string.hpp"
-#define MUL_LR_DECAY 0.1 // the multiplier of lr decay
-#define MAX_CNT_LR_DECAY 4 // the max number of lr decay
-#define ACCURACY_GAP_THRESHOLD 0.0005
-#define INCRE_PR_BOTTOMLINE 0.01
-#define CNT_AFTER_MAX_ACC 4
-#define COEEF_ACC_2_PR 10 // multiplier of acc margin to incre_pr
-#define TR_MUL_BOTTOM 0.25 // the bottomline of target_reg multiplier
-#define STANDARD_INCRE_PR 0.05
 
 namespace caffe {
 
@@ -434,7 +425,6 @@ void Solver<Dtype>::Step(int iters) {
         }
       }
 
-      // Check acc based on loss
       if (APP<Dtype>::prune_state == "losseval" && iter_ - APP<Dtype>::stage_iter_prune_finished == APP<Dtype>::losseval_interval) {
         cout << "[app]    'losseval' done, retrain to check accuracy before starting a new pruning stage. iter: " << iter_ << time_buffer_ << endl;
         SetPruneState("retrain");
@@ -445,9 +435,9 @@ void Solver<Dtype>::Step(int iters) {
             && APP<Dtype>::retrain_test_interval 
             && iter_ % APP<Dtype>::retrain_test_interval == 0) {
         if (APP<Dtype>::acc_borderline <= 0) {
-          CheckMaxAcc("retrain", CNT_AFTER_MAX_ACC + 2);
+          CheckMaxAcc("retrain", APP<Dtype>::CNT_AFTER_MAX_ACC + 2);
         } else {
-          CheckMaxAcc("retrain", CNT_AFTER_MAX_ACC);
+          CheckMaxAcc("retrain", APP<Dtype>::CNT_AFTER_MAX_ACC);
         }
       }
 
@@ -456,7 +446,7 @@ void Solver<Dtype>::Step(int iters) {
             && APP<Dtype>::retrain_test_interval
             && iter_ % APP<Dtype>::retrain_test_interval == 0
             && state_begin_iter_ != iter_) { // do not test on the the first 'final_retrain' iter, because it's unnecessary and harmful
-        CheckMaxAcc("final_retrain", CNT_AFTER_MAX_ACC + 4);
+        CheckMaxAcc("final_retrain", APP<Dtype>::CNT_AFTER_MAX_ACC + 4);
       }
 
       // Print speedup & compression ratio each iter
@@ -542,7 +532,7 @@ void Solver<Dtype>::CheckMaxAcc(const string& prune_state, const int& cnt_after_
     }
 
     // Decay lr
-    APP<Dtype>::learning_rate *= MUL_LR_DECAY; // When current learning rate has reached its ceiling accuracy, decay it.
+    APP<Dtype>::learning_rate *= APP<Dtype>::MUL_LR_DECAY; // When current learning rate has reached its ceiling accuracy, decay it.
     ++ cnt_decay_lr_;
     sprintf(logstr, "[app]    '%s' of current lr period finished, final acc = %f, iter = %d, decay lr (new: %.7f)",
           prune_state.c_str(), current_max_acc_, current_max_acc_iter_, APP<Dtype>::learning_rate);
@@ -569,13 +559,13 @@ void Solver<Dtype>::CheckMaxAcc(const string& prune_state, const int& cnt_after_
       sprintf(logstr, "[app]    All prune done. Output the best caffemodel, iter = %d, acc = %f", final_output_iter, final_output_acc);
       cout << logstr << endl;
       PrintFinalPrunedRatio();
-      // RemoveUselessSnapshot("", snapshot_iters_.back());
+      RemoveUselessSnapshot("", snapshot_iters_.back());
       exit(0);
     }
 
     // Check if retraining can be stopped in "retrain" state
-    if (cnt_decay_lr_ >= MAX_CNT_LR_DECAY + 1 || current_max_acc_ < max_acc_ || APP<Dtype>::learning_rate < 1e-6) {
-      APP<Dtype>::learning_rate /= MUL_LR_DECAY; // restore to last lr, because this lr is not used actually.
+    if (cnt_decay_lr_ >= APP<Dtype>::MAX_CNT_LR_DECAY + 1 || current_max_acc_ < max_acc_ || APP<Dtype>::learning_rate < 1e-6) {
+      APP<Dtype>::learning_rate /= APP<Dtype>::MUL_LR_DECAY; // restore to last lr, because this lr is not used actually.
       sprintf(logstr, "[app]    All '%s' done: lr has decayed enough OR max acc of this lr period is not better than the previous one.", prune_state.c_str());
       cout << logstr << " Output the best caffemodel, iter = " << max_acc_iter_ << ", acc = " << max_acc_
            << ". Resuming from iter = " << first_retrain_finished_iter_ << endl;
@@ -674,7 +664,7 @@ void Solver<Dtype>::SetPruneState(const string& prune_state) {
 
 template <typename Dtype>
 void Solver<Dtype>::CheckPruneStage(const Dtype& acc, const int& last_max_acc_iter, const Dtype& last_max_acc) {
-  if (APP<Dtype>::acc_borderline - acc > ACCURACY_GAP_THRESHOLD) { // accuracy bad
+  if (APP<Dtype>::acc_borderline - acc > APP<Dtype>::ACCURACY_GAP_THRESHOLD) { // accuracy bad
     for (int L = 0; L < APP<Dtype>::layer_index.size(); ++L) {
       if (APP<Dtype>::prune_ratio[L] == 0) { continue; }
       APP<Dtype>::last_infeasible_prune_ratio[L] = APP<Dtype>::pruned_ratio_for_comparison[L];
@@ -687,8 +677,8 @@ void Solver<Dtype>::CheckPruneStage(const Dtype& acc, const int& last_max_acc_it
     Restore(resume_file.c_str(), false); // Note to restore after SetNewCurrentPruneRatio, because restore will change the state of network, like num_pruned_col
     SetPruneState("prune");
     // Check if incre_pr is large enough
-    if (incre_pr < INCRE_PR_BOTTOMLINE) {
-      cout << "[app]\n[app] Stop: incre_pr is too small (<" << INCRE_PR_BOTTOMLINE << "), so another pruning stage is meaningless. Go to 'final_retrain'." << endl;
+    if (incre_pr < APP<Dtype>::INCRE_PR_BOTTOMLINE) {
+      cout << "[app]\n[app] Stop: incre_pr is too small (<" << APP<Dtype>::INCRE_PR_BOTTOMLINE << "), so another pruning stage is meaningless. Go to 'final_retrain'." << endl;
       const string resume_file = param_.snapshot_prefix() + lastretrain_prefix_ + "_iter_" + caffe::format_int(APP<Dtype>::last_feasible_prune_iter2) + ".solverstate";
       Restore(resume_file.c_str(), false);
       cout << "[app]    ===== resuming from: " << resume_file << endl;
@@ -747,7 +737,7 @@ const Dtype Solver<Dtype>::SetNewCurrentPruneRatio(const bool& IF_roll_back, con
     incre_pr = APP<Dtype>::last_prune_ratio_incre / (APP<Dtype>::last_feasible_acc - val_acc) 
                                                   * (APP<Dtype>::last_feasible_acc - APP<Dtype>::acc_borderline);
   } else {
-    incre_pr = min(max((Dtype)INCRE_PR_BOTTOMLINE, (val_acc - APP<Dtype>::acc_borderline) * COEEF_ACC_2_PR), (Dtype)0.2); // range: [INCRE_PR_BOTTOMLINE, 0.2]
+    incre_pr = min(max((Dtype)APP<Dtype>::INCRE_PR_BOTTOMLINE, (val_acc - APP<Dtype>::acc_borderline) * APP<Dtype>::COEEF_ACC_2_PR), (Dtype)0.2); // range: [APP<Dtype>::INCRE_PR_BOTTOMLINE, 0.2]
   }
   // Check incre_pr
   APP<Dtype>::last_prune_ratio_incre = incre_pr;
@@ -950,13 +940,13 @@ const Dtype Solver<Dtype>::IncrePR_2_TRMul(const Dtype& incre_pr) {
   y1 = 3 * (y0 - 0.5) + 1  constrain the range be in (-0.5, 2.5)
 
   s.t.
-  x = INCRE_PR_BOTTOMLINE  ->  y1 = 0.2 
-  x = STANDARD_INCRE_PR   ->  y1 = 1
+  x = APP<Dtype>::INCRE_PR_BOTTOMLINE  ->  y1 = 0.2 
+  x = APP<Dtype>::STANDARD_INCRE_PR   ->  y1 = 1
   */
-  const Dtype y0 = (TR_MUL_BOTTOM - 1) / 3 + 0.5;
-  const Dtype k = log(1/y0 - 1) / (STANDARD_INCRE_PR - INCRE_PR_BOTTOMLINE);
+  const Dtype y0 = (APP<Dtype>::TR_MUL_BOTTOM - 1) / 3 + 0.5;
+  const Dtype k = log(1/y0 - 1) / (APP<Dtype>::STANDARD_INCRE_PR - APP<Dtype>::INCRE_PR_BOTTOMLINE);
 
-  const Dtype y0_ = 1 / (1 + exp(-k * (incre_pr - STANDARD_INCRE_PR)));
+  const Dtype y0_ = 1 / (1 + exp(-k * (incre_pr - APP<Dtype>::STANDARD_INCRE_PR)));
   const Dtype y1_ = 3 * (y0_ - 0.5) + 1;
   return y1_;
 }

diff --git a/src/caffe/solvers/sgd_solver.cpp b/src/caffe/solvers/sgd_solver.cpp
@@ -227,7 +227,7 @@ void SGDSolver<Dtype>::Regularize(int param_id) {
                        temp_[param_id]->gpu_data(),
                        net_params[param_id]->mutable_gpu_diff());
 
-      } else if (regularization_type == "SelectiveReg") {
+      } else if (regularization_type == "Reg_Col") {
         // add weight decay, weight decay still used
         caffe_gpu_axpy(net_params[param_id]->count(),
                        local_decay,
@@ -261,6 +261,7 @@ void SGDSolver<Dtype>::Regularize(int param_id) {
         if (APP<Dtype>::step_ % APP<Dtype>::prune_interval == 0) {
           if (APP<Dtype>::prune_coremthd == "Reg-rank" || APP<Dtype>::prune_coremthd == "Reg") {
             // print ave-magnitude
+            /*
             cout << "ave-magnitude_col " << this->iter_ << " " << layer_name << ":";
             for (int j = 0; j < num_col; ++j) {
               Dtype sum = 0;
@@ -270,7 +271,7 @@ void SGDSolver<Dtype>::Regularize(int param_id) {
               cout << " " << sum/num_row;
             }
             cout << endl;
-
+            */
 
             // Sort 01: sort by L1-norm
             typedef std::pair<Dtype, int> mypair;
@@ -315,7 +316,6 @@ void SGDSolver<Dtype>::Regularize(int param_id) {
             const Dtype alpha21 = (num_col_to_prune_ == 1)          ? 0 : log(1/kk2) / (num_col_to_prune_-1);
             const Dtype alpha22 = (num_col_to_prune_ == num_col_-1) ? 0 : log(1/kk2) / (num_col_-1 - num_col_to_prune_);
 
-            APP<Dtype>::IF_scheme1_when_Reg_rank = false; // scheme 2 is the default.
             for (int j = 0; j < num_col_; ++j) { // j: rank
               const int col_of_rank_j = col_hrank[j + num_pruned_col].second; // Note the real rank is j + num_pruned_col
               const Dtype Delta = APP<Dtype>::IF_scheme1_when_Reg_rank
@@ -336,7 +336,7 @@ void SGDSolver<Dtype>::Regularize(int param_id) {
                   mumasks[i * num_col + col_of_rank_j] = 0;
                   muweight[i* num_col + col_of_rank_j] = 0;
                 }
-                muhistory_score[col_of_rank_j] = APP<Dtype>::step_ - 1000000 - (muhistory_punish[col_of_rank_j] - APP<Dtype>::target_reg); // This is to 
+                muhistory_score[col_of_rank_j] = APP<Dtype>::step_ - 1000000 - (muhistory_punish[col_of_rank_j] - APP<Dtype>::target_reg);
                 // make the pruned weight group sorted in left in sort 01 and 02 above, and the earlier pruned the lefter sorted
 
                 // Check whether the corresponding row in the last layer could be pruned
@@ -474,7 +474,7 @@ void SGDSolver<Dtype>::Regularize(int param_id) {
                       net_params[param_id]->gpu_diff(),
                       net_params[param_id]->mutable_gpu_diff());
 
-      } else if (regularization_type == "Auto-balanced") {
+      } else if (regularization_type == "AFP_Col") {
         const vector<int>& shape = this->net_->learnable_params()[param_id]->shape();
         const string& layer_name = this->net_->layer_names()[this->net_->param_layer_indices()[param_id].first];
         if (shape.size() != 4 || APP<Dtype>::layer_index.count(layer_name) == 0) { // not the Conv weights
@@ -573,7 +573,7 @@ void SGDSolver<Dtype>::Regularize(int param_id) {
           }
           APP<Dtype>::pruned_ratio[L] = 0.2; // just set a positive value to pass the ClearHistory check
         }
-      } else if (regularization_type == "Auto-balanced_Row") {
+      } else if (regularization_type == "AFP_Row") {
         const vector<int>& shape = this->net_->learnable_params()[param_id]->shape();
         const string& layer_name = this->net_->layer_names()[this->net_->param_layer_indices()[param_id].first];
         if (shape.size() != 4 || APP<Dtype>::layer_index.count(layer_name) == 0) { // not the Conv weights