Skip to content

Commit

Permalink
improve the format
Browse files Browse the repository at this point in the history
  • Loading branch information
MingSun-Tse committed May 11, 2019
1 parent 2f04946 commit 0aabce1
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 37 deletions.
29 changes: 24 additions & 5 deletions include/caffe/adaptive_probabilistic_pruning.hpp
Expand Up @@ -97,7 +97,17 @@ class APP {
static string show_layer;
static int show_num_layer;
static int show_num_weight;
};

// Some constants used to control the pruning process in solver.cpp
static Dtype MUL_LR_DECAY; // the multiplier of lr decay
static int MAX_CNT_LR_DECAY; // the max number of lr decay
static Dtype ACCURACY_GAP_THRESHOLD;
static Dtype INCRE_PR_BOTTOMLINE;
static int CNT_AFTER_MAX_ACC;
static Dtype COEEF_ACC_2_PR; // multiplier of acc margin to incre_pr
static Dtype TR_MUL_BOTTOM; // the bottomline of target_reg multiplier
static Dtype STANDARD_INCRE_PR;
};

template<typename Dtype> string APP<Dtype>::prune_method = "None"; /// initialized for caffe test, which has no solver but this info is still needed in layer.
template<typename Dtype> string APP<Dtype>::prune_unit = "None";
Expand Down Expand Up @@ -180,10 +190,19 @@ class APP {
template<typename Dtype> vector<Dtype> APP<Dtype>::num_param;

// 3. Logging
template<typename Dtype> int APP<Dtype>::show_interval = 10; // the interval to print pruning progress log
template<typename Dtype> string APP<Dtype>::show_layer = "0111"; // '1' means to print the weights of the layer with the index
template<typename Dtype> int APP<Dtype>::show_num_layer = 100; // work with show_interval, how many layers get printed
template<typename Dtype> int APP<Dtype>::show_num_weight = 20; // work with show_layer, how many weights get printed
template<typename Dtype> int APP<Dtype>::show_interval = 10; // the interval to print pruning progress log
template<typename Dtype> string APP<Dtype>::show_layer = "0111"; // '1' means to print the weights of the layer with the index
template<typename Dtype> int APP<Dtype>::show_num_layer = 100; // work with show_interval, how many layers get printed
template<typename Dtype> int APP<Dtype>::show_num_weight = 20; // work with show_layer, how many weights get printed

template<typename Dtype> Dtype APP<Dtype>::MUL_LR_DECAY = 0.1;
template<typename Dtype> int APP<Dtype>::MAX_CNT_LR_DECAY = 4;
template<typename Dtype> Dtype APP<Dtype>::ACCURACY_GAP_THRESHOLD = 5e-4;
template<typename Dtype> Dtype APP<Dtype>::INCRE_PR_BOTTOMLINE = 0.01;
template<typename Dtype> int APP<Dtype>::CNT_AFTER_MAX_ACC = 4;
template<typename Dtype> Dtype APP<Dtype>::COEEF_ACC_2_PR = 10;
template<typename Dtype> Dtype APP<Dtype>::TR_MUL_BOTTOM = 0.25;
template<typename Dtype> Dtype APP<Dtype>::STANDARD_INCRE_PR = 0.05;
}

#endif
42 changes: 16 additions & 26 deletions src/caffe/solver.cpp
Expand Up @@ -15,16 +15,7 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <numeric>

#include "boost/algorithm/string.hpp"
#define MUL_LR_DECAY 0.1 // the multiplier of lr decay
#define MAX_CNT_LR_DECAY 4 // the max number of lr decay
#define ACCURACY_GAP_THRESHOLD 0.0005
#define INCRE_PR_BOTTOMLINE 0.01
#define CNT_AFTER_MAX_ACC 4
#define COEEF_ACC_2_PR 10 // multiplier of acc margin to incre_pr
#define TR_MUL_BOTTOM 0.25 // the bottomline of target_reg multiplier
#define STANDARD_INCRE_PR 0.05

namespace caffe {

Expand Down Expand Up @@ -434,7 +425,6 @@ void Solver<Dtype>::Step(int iters) {
}
}

// Check acc based on loss
if (APP<Dtype>::prune_state == "losseval" && iter_ - APP<Dtype>::stage_iter_prune_finished == APP<Dtype>::losseval_interval) {
cout << "[app] 'losseval' done, retrain to check accuracy before starting a new pruning stage. iter: " << iter_ << time_buffer_ << endl;
SetPruneState("retrain");
Expand All @@ -445,9 +435,9 @@ void Solver<Dtype>::Step(int iters) {
&& APP<Dtype>::retrain_test_interval
&& iter_ % APP<Dtype>::retrain_test_interval == 0) {
if (APP<Dtype>::acc_borderline <= 0) {
CheckMaxAcc("retrain", CNT_AFTER_MAX_ACC + 2);
CheckMaxAcc("retrain", APP<Dtype>::CNT_AFTER_MAX_ACC + 2);
} else {
CheckMaxAcc("retrain", CNT_AFTER_MAX_ACC);
CheckMaxAcc("retrain", APP<Dtype>::CNT_AFTER_MAX_ACC);
}
}

Expand All @@ -456,7 +446,7 @@ void Solver<Dtype>::Step(int iters) {
&& APP<Dtype>::retrain_test_interval
&& iter_ % APP<Dtype>::retrain_test_interval == 0
&& state_begin_iter_ != iter_) { // do not test on the the first 'final_retrain' iter, because it's unnecessary and harmful
CheckMaxAcc("final_retrain", CNT_AFTER_MAX_ACC + 4);
CheckMaxAcc("final_retrain", APP<Dtype>::CNT_AFTER_MAX_ACC + 4);
}

// Print speedup & compression ratio each iter
Expand Down Expand Up @@ -542,7 +532,7 @@ void Solver<Dtype>::CheckMaxAcc(const string& prune_state, const int& cnt_after_
}

// Decay lr
APP<Dtype>::learning_rate *= MUL_LR_DECAY; // When current learning rate has reached its ceiling accuracy, decay it.
APP<Dtype>::learning_rate *= APP<Dtype>::MUL_LR_DECAY; // When current learning rate has reached its ceiling accuracy, decay it.
++ cnt_decay_lr_;
sprintf(logstr, "[app] '%s' of current lr period finished, final acc = %f, iter = %d, decay lr (new: %.7f)",
prune_state.c_str(), current_max_acc_, current_max_acc_iter_, APP<Dtype>::learning_rate);
Expand All @@ -569,13 +559,13 @@ void Solver<Dtype>::CheckMaxAcc(const string& prune_state, const int& cnt_after_
sprintf(logstr, "[app] All prune done. Output the best caffemodel, iter = %d, acc = %f", final_output_iter, final_output_acc);
cout << logstr << endl;
PrintFinalPrunedRatio();
// RemoveUselessSnapshot("", snapshot_iters_.back());
RemoveUselessSnapshot("", snapshot_iters_.back());
exit(0);
}

// Check if retraining can be stopped in "retrain" state
if (cnt_decay_lr_ >= MAX_CNT_LR_DECAY + 1 || current_max_acc_ < max_acc_ || APP<Dtype>::learning_rate < 1e-6) {
APP<Dtype>::learning_rate /= MUL_LR_DECAY; // restore to last lr, because this lr is not used actually.
if (cnt_decay_lr_ >= APP<Dtype>::MAX_CNT_LR_DECAY + 1 || current_max_acc_ < max_acc_ || APP<Dtype>::learning_rate < 1e-6) {
APP<Dtype>::learning_rate /= APP<Dtype>::MUL_LR_DECAY; // restore to last lr, because this lr is not used actually.
sprintf(logstr, "[app] All '%s' done: lr has decayed enough OR max acc of this lr period is not better than the previous one.", prune_state.c_str());
cout << logstr << " Output the best caffemodel, iter = " << max_acc_iter_ << ", acc = " << max_acc_
<< ". Resuming from iter = " << first_retrain_finished_iter_ << endl;
Expand Down Expand Up @@ -674,7 +664,7 @@ void Solver<Dtype>::SetPruneState(const string& prune_state) {

template <typename Dtype>
void Solver<Dtype>::CheckPruneStage(const Dtype& acc, const int& last_max_acc_iter, const Dtype& last_max_acc) {
if (APP<Dtype>::acc_borderline - acc > ACCURACY_GAP_THRESHOLD) { // accuracy bad
if (APP<Dtype>::acc_borderline - acc > APP<Dtype>::ACCURACY_GAP_THRESHOLD) { // accuracy bad
for (int L = 0; L < APP<Dtype>::layer_index.size(); ++L) {
if (APP<Dtype>::prune_ratio[L] == 0) { continue; }
APP<Dtype>::last_infeasible_prune_ratio[L] = APP<Dtype>::pruned_ratio_for_comparison[L];
Expand All @@ -687,8 +677,8 @@ void Solver<Dtype>::CheckPruneStage(const Dtype& acc, const int& last_max_acc_it
Restore(resume_file.c_str(), false); // Note to restore after SetNewCurrentPruneRatio, because restore will change the state of network, like num_pruned_col
SetPruneState("prune");
// Check if incre_pr is large enough
if (incre_pr < INCRE_PR_BOTTOMLINE) {
cout << "[app]\n[app] Stop: incre_pr is too small (<" << INCRE_PR_BOTTOMLINE << "), so another pruning stage is meaningless. Go to 'final_retrain'." << endl;
if (incre_pr < APP<Dtype>::INCRE_PR_BOTTOMLINE) {
cout << "[app]\n[app] Stop: incre_pr is too small (<" << APP<Dtype>::INCRE_PR_BOTTOMLINE << "), so another pruning stage is meaningless. Go to 'final_retrain'." << endl;
const string resume_file = param_.snapshot_prefix() + lastretrain_prefix_ + "_iter_" + caffe::format_int(APP<Dtype>::last_feasible_prune_iter2) + ".solverstate";
Restore(resume_file.c_str(), false);
cout << "[app] ===== resuming from: " << resume_file << endl;
Expand Down Expand Up @@ -747,7 +737,7 @@ const Dtype Solver<Dtype>::SetNewCurrentPruneRatio(const bool& IF_roll_back, con
incre_pr = APP<Dtype>::last_prune_ratio_incre / (APP<Dtype>::last_feasible_acc - val_acc)
* (APP<Dtype>::last_feasible_acc - APP<Dtype>::acc_borderline);
} else {
incre_pr = min(max((Dtype)INCRE_PR_BOTTOMLINE, (val_acc - APP<Dtype>::acc_borderline) * COEEF_ACC_2_PR), (Dtype)0.2); // range: [INCRE_PR_BOTTOMLINE, 0.2]
incre_pr = min(max((Dtype)APP<Dtype>::INCRE_PR_BOTTOMLINE, (val_acc - APP<Dtype>::acc_borderline) * APP<Dtype>::COEEF_ACC_2_PR), (Dtype)0.2); // range: [APP<Dtype>::INCRE_PR_BOTTOMLINE, 0.2]
}
// Check incre_pr
APP<Dtype>::last_prune_ratio_incre = incre_pr;
Expand Down Expand Up @@ -950,13 +940,13 @@ const Dtype Solver<Dtype>::IncrePR_2_TRMul(const Dtype& incre_pr) {
y1 = 3 * (y0 - 0.5) + 1 constrain the range be in (-0.5, 2.5)
s.t.
x = INCRE_PR_BOTTOMLINE -> y1 = 0.2
x = STANDARD_INCRE_PR -> y1 = 1
x = APP<Dtype>::INCRE_PR_BOTTOMLINE -> y1 = 0.2
x = APP<Dtype>::STANDARD_INCRE_PR -> y1 = 1
*/
const Dtype y0 = (TR_MUL_BOTTOM - 1) / 3 + 0.5;
const Dtype k = log(1/y0 - 1) / (STANDARD_INCRE_PR - INCRE_PR_BOTTOMLINE);
const Dtype y0 = (APP<Dtype>::TR_MUL_BOTTOM - 1) / 3 + 0.5;
const Dtype k = log(1/y0 - 1) / (APP<Dtype>::STANDARD_INCRE_PR - APP<Dtype>::INCRE_PR_BOTTOMLINE);

const Dtype y0_ = 1 / (1 + exp(-k * (incre_pr - STANDARD_INCRE_PR)));
const Dtype y0_ = 1 / (1 + exp(-k * (incre_pr - APP<Dtype>::STANDARD_INCRE_PR)));
const Dtype y1_ = 3 * (y0_ - 0.5) + 1;
return y1_;
}
Expand Down
12 changes: 6 additions & 6 deletions src/caffe/solvers/sgd_solver.cpp
Expand Up @@ -227,7 +227,7 @@ void SGDSolver<Dtype>::Regularize(int param_id) {
temp_[param_id]->gpu_data(),
net_params[param_id]->mutable_gpu_diff());

} else if (regularization_type == "SelectiveReg") {
} else if (regularization_type == "Reg_Col") {
// add weight decay, weight decay still used
caffe_gpu_axpy(net_params[param_id]->count(),
local_decay,
Expand Down Expand Up @@ -261,6 +261,7 @@ void SGDSolver<Dtype>::Regularize(int param_id) {
if (APP<Dtype>::step_ % APP<Dtype>::prune_interval == 0) {
if (APP<Dtype>::prune_coremthd == "Reg-rank" || APP<Dtype>::prune_coremthd == "Reg") {
// print ave-magnitude
/*
cout << "ave-magnitude_col " << this->iter_ << " " << layer_name << ":";
for (int j = 0; j < num_col; ++j) {
Dtype sum = 0;
Expand All @@ -270,7 +271,7 @@ void SGDSolver<Dtype>::Regularize(int param_id) {
cout << " " << sum/num_row;
}
cout << endl;

*/

// Sort 01: sort by L1-norm
typedef std::pair<Dtype, int> mypair;
Expand Down Expand Up @@ -315,7 +316,6 @@ void SGDSolver<Dtype>::Regularize(int param_id) {
const Dtype alpha21 = (num_col_to_prune_ == 1) ? 0 : log(1/kk2) / (num_col_to_prune_-1);
const Dtype alpha22 = (num_col_to_prune_ == num_col_-1) ? 0 : log(1/kk2) / (num_col_-1 - num_col_to_prune_);

APP<Dtype>::IF_scheme1_when_Reg_rank = false; // scheme 2 is the default.
for (int j = 0; j < num_col_; ++j) { // j: rank
const int col_of_rank_j = col_hrank[j + num_pruned_col].second; // Note the real rank is j + num_pruned_col
const Dtype Delta = APP<Dtype>::IF_scheme1_when_Reg_rank
Expand All @@ -336,7 +336,7 @@ void SGDSolver<Dtype>::Regularize(int param_id) {
mumasks[i * num_col + col_of_rank_j] = 0;
muweight[i* num_col + col_of_rank_j] = 0;
}
muhistory_score[col_of_rank_j] = APP<Dtype>::step_ - 1000000 - (muhistory_punish[col_of_rank_j] - APP<Dtype>::target_reg); // This is to
muhistory_score[col_of_rank_j] = APP<Dtype>::step_ - 1000000 - (muhistory_punish[col_of_rank_j] - APP<Dtype>::target_reg);
// make the pruned weight group sorted in left in sort 01 and 02 above, and the earlier pruned the lefter sorted

// Check whether the corresponding row in the last layer could be pruned
Expand Down Expand Up @@ -474,7 +474,7 @@ void SGDSolver<Dtype>::Regularize(int param_id) {
net_params[param_id]->gpu_diff(),
net_params[param_id]->mutable_gpu_diff());

} else if (regularization_type == "Auto-balanced") {
} else if (regularization_type == "AFP_Col") {
const vector<int>& shape = this->net_->learnable_params()[param_id]->shape();
const string& layer_name = this->net_->layer_names()[this->net_->param_layer_indices()[param_id].first];
if (shape.size() != 4 || APP<Dtype>::layer_index.count(layer_name) == 0) { // not the Conv weights
Expand Down Expand Up @@ -573,7 +573,7 @@ void SGDSolver<Dtype>::Regularize(int param_id) {
}
APP<Dtype>::pruned_ratio[L] = 0.2; // just set a positive value to pass the ClearHistory check
}
} else if (regularization_type == "Auto-balanced_Row") {
} else if (regularization_type == "AFP_Row") {
const vector<int>& shape = this->net_->learnable_params()[param_id]->shape();
const string& layer_name = this->net_->layer_names()[this->net_->param_layer_indices()[param_id].first];
if (shape.size() != 4 || APP<Dtype>::layer_index.count(layer_name) == 0) { // not the Conv weights
Expand Down

0 comments on commit 0aabce1

Please sign in to comment.