PaddlePaddle · NHZlX · Jun 26, 2017 · Jun 26, 2017 · Jun 30, 2017 · Jul 1, 2017
diff --git a/paddle/api/PaddleAPI.h b/paddle/api/PaddleAPI.h
@@ -573,7 +573,7 @@ class Parameter {
 
   ParameterConfig* getConfig();
   void setValueUpdated();
-
+  void handleBeforeSave();
   bool save(const std::string& filename) const;
 
   bool load(const std::string& filename) const;
@@ -880,6 +880,7 @@ class ParameterUpdater {
    * @param param
    */
   void update(Parameter* param);
+  void preprocess(Parameter* param, size_t currentPass, size_t currentBatch);
 
   /**
    * @breif only get required sparse rows by default.

diff --git a/paddle/api/Parameter.cpp b/paddle/api/Parameter.cpp
@@ -61,6 +61,10 @@ bool Parameter::save(const std::string& filename) const {
   return m->getPtr()->save(filename);
 }
 
+void Parameter::handleBeforeSave() {
+    return m->getPtr()->handleBeforeSave();   
+}
+
 bool Parameter::load(const std::string& filename) const {
   return m->getPtr()->load(filename);
 }

diff --git a/paddle/api/ParameterUpdater.cpp b/paddle/api/ParameterUpdater.cpp
@@ -87,6 +87,11 @@ void ParameterUpdater::update(Parameter *param) {
   m->updater->update(paddleParam);
 }
 
+void ParameterUpdater::preprocess(Parameter *param, size_t currentPass, size_t currentBatch) {
+  auto paddleParam = param->m->getPtr();
+  m->updater->preprocess(paddleParam, currentPass, currentBatch);
+}
+
 void ParameterUpdater::getParametersRemote(bool fullSize, bool apply) {
   m->updater->getParametersRemote(fullSize, apply);
 }

diff --git a/paddle/parameter/Parameter.h b/paddle/parameter/Parameter.h
@@ -257,6 +257,18 @@ class Parameter {
    * It could modify gradient/momentum/etc here. Such as drop some gradient,
    * etc.
    */
+  void handleBeforeSave(){
+    for (auto& hook : updaterHooks_) {
+      hook->handleBeforeSave(this);
+    }
+  }
+
+  void preProcessHook(size_t currentPass, size_t currentBatch) {
+    for (auto& hook : updaterHooks_) {
+      hook->preprocess(this, currentPass, currentBatch);
+    }
+  }
+
   void updateHook() {
     for (auto& hook : updaterHooks_) {
       hook->update(this);

diff --git a/paddle/parameter/ParameterUpdaterBase.h b/paddle/parameter/ParameterUpdaterBase.h
@@ -61,6 +61,10 @@ class ParameterUpdater {
     this->updateImpl(para);
   }
 
+  void preprocess(Parameter* para, size_t currentPass, size_t currentBatch) {
+    SetDevice setDevice(para->getDeviceId());
+    para->preProcessHook(currentPass, currentBatch);
+  }
   // only get required sparse rows by default,
   // get full matrix parameter if *fullSize* set
   // get PARAMETER_APPLY on pserver if *apply* set

diff --git a/paddle/parameter/ParameterUpdaterHook.cpp b/paddle/parameter/ParameterUpdaterHook.cpp
@@ -17,6 +17,7 @@ limitations under the License. */
 #include <algorithm>
 #include <atomic>
 #include <fstream>
+#include <iostream>
 #include <mutex>
 #include <thread>
 #include <unordered_map>
@@ -29,42 +30,22 @@ limitations under the License. */
 
 namespace paddle {
 
-/**
- * The static pruning hook
- * Static means user specify a sparsity_ratio before training started, and the
- * network will prune the parameters based on the sparsity_ratio. More details
- * can be found https://arxiv.org/pdf/1506.02626.pdf.
- */
-
-class StaticPruningHook : public IParameterUpdaterHook {
+class ParameterPruningHook : public IParameterUpdaterHook {
 public:
-  explicit StaticPruningHook(const ParameterUpdaterHookConfig &hookConfig)
-      : initCount_(0) {
-    sparsityRatio_ = hookConfig.sparsity_ratio();
-  }
+  explicit ParameterPruningHook() : initCount_(0) {}
 
-  static bool sortPairAscend(const std::pair<real, size_t> &pair1,
-                             const std::pair<real, size_t> &pair2) {
-    return pair1.first > pair2.first;
-  }
 
-  void update(Parameter *para) {
-    updateThreadChecker_.check();
-    auto &vec = para->getBuf(PARAMETER_GRADIENT);
-    if (vec) {
-      vec->dotMul(*maskVec_);
-    }
-  }
+  virtual void update(Parameter *para) {/*do nothing*/}
+  virtual void handleBeforeSave(Parameter *para) {/*do nothing*/}
+  virtual void preprocess(Parameter *para, size_t currentPass, size_t currentBatch) {}
 
-  void generateMask(Parameter *para) {
+  virtual void generateMask(Parameter *para, size_t nonZeroNum) {
     VectorPtr maskTemp = Vector::create(para->getSize(), false);
     maskTemp->zeroMem();
     real *maskTempData = maskTemp->getData();
-    size_t nonZeroNum = para->getSize() * (1 - sparsityRatio_);
 
     VectorPtr paraVec = para->getBuf(PARAMETER_VALUE);
     VectorPtr paraCpuCopy = Vector::create(para->getSize(), false);
-
     paraCpuCopy->copyFrom(*paraVec);
     std::vector<std::pair<real, size_t>> param;
 
@@ -73,38 +54,138 @@ class StaticPruningHook : public IParameterUpdaterHook {
 
     std::partial_sort(
         param.begin(), param.begin() + nonZeroNum, param.end(), sortPairAscend);
+
     for (size_t i = 0; i < nonZeroNum; i++) maskTempData[param[i].second] = 1.0;
 
-    // Currently just use a mask vector for hack.
     if (para->useGpu()) {
-      maskVec_ = Vector::create(para->getSize(), para->useGpu());
-      maskVec_->copyFrom(*maskTemp);
+      this-> maskVec_ = Vector::create(para->getSize(), para->useGpu());
+      this-> maskVec_->copyFrom(*maskTemp);
     } else {
-      maskVec_ = maskTemp;
+      this-> maskVec_ = maskTemp;
     }
   }
 
-  void init(Parameter *para) {
-    generateMask(para);
+  static bool sortPairAscend(const std::pair<real, size_t> &pair1,
+                             const std::pair<real, size_t> &pair2) {
+    return pair1.first > pair2.first;
+  }
+
+
+protected:
+  std::atomic<size_t> initCount_;
+  SameThreadChecker updateThreadChecker_;
+  VectorPtr maskVec_;
+};
+
+/**
+ * The static pruning hook
+ * Static means user specify a sparsity_ratio before training started, and the
+ * network will prune the parameters based on the sparsity_ratio. More details
+ * can be found https://arxiv.org/pdf/1506.02626.pdf.
+ */
+
+class StaticPruningHook : public ParameterPruningHook {
+public:
+  explicit StaticPruningHook(const ParameterUpdaterHookConfig &hookConfig)
+      : ParameterPruningHook() {
+    this->sparsityRatio_ = hookConfig.sparsity_ratio();
+  }
+
+  void update(Parameter *para) override{
+    updateThreadChecker_.check();
+    auto &vec = para->getBuf(PARAMETER_GRADIENT);
+    if (vec) {
+      vec->dotMul(*maskVec_);
+    }
+  }
+
+  void init(Parameter *para) override {
     size_t initCount = this->initCount_.fetch_add(1);
     CHECK_EQ(initCount, 0UL) << "Currently the StaticPruningHook must invoke "
                                 "in same ParamterUpdater";
     VLOG(3) << "Initialize Parameter " << para;
     SetDevice device(para->getDeviceId());
 
+    size_t nonZeroNum = para->getSize() * (1 - sparsityRatio_);
+    this->generateMask(para, nonZeroNum);
+
     auto &paraVec = para->getBuf(PARAMETER_VALUE);
-    paraVec->dotMul(*maskVec_);
+    paraVec->dotMul(*this->maskVec_);
   }
 
 private:
-  SameThreadChecker updateThreadChecker_;
-  std::atomic<size_t> initCount_;
-  VectorPtr maskVec_;
   real sparsityRatio_;
 };
 
-IParameterUpdaterHook::IParameterUpdaterHook() {}
+class DynamicPruningHook : public ParameterPruningHook {
+public:
+  explicit DynamicPruningHook(const ParameterUpdaterHookConfig &hookConfig)
+      : ParameterPruningHook() {
+    this->upperBound_ = hookConfig.upper_bound();
+    this->interPass_ = hookConfig.inter_pass();
+    this->endPass_ = hookConfig.end_pass();
+  }
+
+  void init(Parameter *para) override {
+    // init mask
+    size_t initCount = this->initCount_.fetch_add(1);
+    CHECK_EQ(initCount, 0UL) << "Currently the StaticPruningHook must invoke "
+                                "in same ParamterUpdater";
+    VLOG(3) << "Initialize Parameter " << para;
+    this->maskVec_ = Vector::create(para->getSize(), para->useGpu());
+    this->maskVec_->reset(1.0);
+
+    /*
+    real *data = this->maskVec_->getData();
+    for (size_t i = 0; i < para->getSize(); i++){
+        std::cout  << data[i] << " " ;
+    }
+    */
+  }
+
+  void handleBeforeSave(Parameter *para) override{
+    updateThreadChecker_.check();
+    auto &vec = para->getBuf(PARAMETER_VALUE);
+    if (vec) {
+      vec->dotMul(*maskVec_);
+    }
+  }
+
+  void preprocess(Parameter *para, size_t currentPass, size_t currentBatch) override {
+    if (currentPass % interPass_ == 0 && currentPass <= endPass_ && currentBatch  == 0) {
+      real boundWeight =
+          this->upperBound_ / std::log(this->endPass_ / (real)this->interPass_);
+      real sparsityRatio =
+          boundWeight * std::log(2 + currentPass / (real)interPass_);
+
+      size_t nonZeroNum = para->getSize() * (1 - sparsityRatio);
+      this->generateMask(para, nonZeroNum);
+      std::cout << para->getName() << " Current sparsity ratio: " <<
+       sparsityRatio <<" " << nonZeroNum<<std::endl;
+    }
+    //add the the temp
+    auto &paraVec = para->getBuf(PARAMETER_VALUE);
+    paraVec->dotMul(*this->maskVec_);
+    /*
+    VectorPtr paraCopyCpu = Vector::create(para->getSize(), false);
+    paraCopyCpu->copyFrom(*paraVec);
+    real *data = paraCopyCpu->getData();
+    size_t sum_non = 0;
+      for(size_t i = 0; i < para->getSize(); i++){
+          if(data[i] != 0.0)
+          sum_non += 1;
+      }
+    std::cout<<"sum_non: " <<sum_non << " " << para->getSize()<< std::endl;
+   */ 
+  }
 
+private:
+  real upperBound_;
+  size_t interPass_;
+  size_t endPass_;
+};
+
+IParameterUpdaterHook::IParameterUpdaterHook() {}
 IParameterUpdaterHook::~IParameterUpdaterHook() {}
 
 /**
@@ -139,6 +220,8 @@ static IParameterUpdaterHook *createImpl(
   auto &type = config.type();
   if (type == "pruning") {
     return new StaticPruningHook(config);
+  } else if (type == "dpruning") {
+    return new DynamicPruningHook(config);
   }
 
   LOG(FATAL) << "Unknown Hook type:  " << type;

diff --git a/paddle/parameter/ParameterUpdaterHook.h b/paddle/parameter/ParameterUpdaterHook.h
@@ -52,6 +52,8 @@ class IParameterUpdaterHook {
    * The init hook method. Invoke in ParameterUpdater::init
    */
   virtual void init(Parameter* para) = 0;
+  virtual void preprocess(Parameter* para, size_t currentPass, size_t currentBatch) = 0;
+  virtual void handleBeforeSave(Parameter* para) = 0;
 
 protected:
   /**

diff --git a/proto/ParameterConfig.proto b/proto/ParameterConfig.proto
@@ -29,6 +29,9 @@ message ParameterUpdaterHookConfig {
   required string type = 1;
   // this represents the ratio of zero element to be set by the Parameter 
   optional double sparsity_ratio = 2 [default = 0.6];
+  optional double upper_bound = 3 [default = 0.8];
+  optional int32 inter_pass = 4 [default = 1];
+  optional int32 end_pass = 5 [default = 20];
 }
 
 message ParameterConfig {

diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
@@ -3145,6 +3145,10 @@ def ParameterHook(type, **kwargs):
         if sparsity_ratio is not None:
             hook.sparsity_ratio = sparsity_ratio
         return hook
+    elif type == 'dpruning':
+        hook = ParameterUpdaterHookConfig()
+        hook.type = type
+        return hook
     else:
         return None
 

diff --git a/python/paddle/v2/parameters.py b/python/paddle/v2/parameters.py
@@ -136,6 +136,7 @@ def __getitem__(self, key):
             for each_gradient_machine in self.__gradient_machines__:
                 param = __get_parameter_in_gradient_machine__(
                     each_gradient_machine, key)
+                param.handleBeforeSave()
                 # for simplify implementation now, we always copy from C++
                 assert isinstance(param, api.Parameter)
                 val = param.getBuf(api.PARAMETER_VALUE)

diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py
@@ -140,9 +140,9 @@ def train(self, reader, num_passes=1, event_handler=None, feeding=None):
         out_args = api.Arguments.createArguments(0)
         feeder = DataFeeder(self.__data_types__, feeding)
         for pass_id in xrange(num_passes):
-            event_handler(v2_event.BeginPass(pass_id))
-            pass_evaluator.start()
             self.__parameter_updater__.startPass()
+            pass_evaluator.start()
+            event_handler(v2_event.BeginPass(pass_id))
             for batch_id, data_batch in enumerate(reader()):
                 batch_evaluator.start()
                 event_handler(
@@ -152,6 +152,9 @@ def train(self, reader, num_passes=1, event_handler=None, feeding=None):
                     len(data_batch))
                 in_args = feeder(data_batch)
                 self.__prepare_parameter__(in_args)
+                for each_param in self.__gradient_machine__.getNonStaticParameters(
+                ):
+                    self.__parameter_updater__.preprocess(each_param, pass_id, batch_id)
                 self.__gradient_machine__.forwardBackward(in_args, out_args,
                                                           pass_type)
                 self.__gradient_machine__.eval(pass_evaluator)