Skip to content
This repository has been archived by the owner on Aug 3, 2021. It is now read-only.

Commit

Permalink
Merge pull request #140 from NVIDIA/docs_fixes
Browse files Browse the repository at this point in the history
Update docs
  • Loading branch information
okuchaiev committed Jun 12, 2018
2 parents 131c393 + eb4bf9d commit 1efa595
Show file tree
Hide file tree
Showing 9 changed files with 32 additions and 58 deletions.
27 changes: 15 additions & 12 deletions docs/html/_modules/optimizers/mp_wrapper.html
Original file line number Diff line number Diff line change
Expand Up @@ -165,23 +165,28 @@ <h1>Source code for optimizers.mp_wrapper</h1><div class="highlight"><pre>


<div class="viewcode-block" id="MixedPrecisionOptimizerWrapper"><a class="viewcode-back" href="../../api-docs/optimizers.html#optimizers.mp_wrapper.MixedPrecisionOptimizerWrapper">[docs]</a><span class="k">class</span> <span class="nc">MixedPrecisionOptimizerWrapper</span><span class="p">(</span><span class="n">tf</span><span class="o">.</span><span class="n">train</span><span class="o">.</span><span class="n">Optimizer</span><span class="p">):</span>
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">optimizer</span><span class="p">,</span> <span class="n">automatic_loss_scaler</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">optimizer</span><span class="p">,</span> <span class="n">loss_scale</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="nb">super</span><span class="p">(</span><span class="n">MixedPrecisionOptimizerWrapper</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span>
<span class="n">optimizer</span><span class="o">.</span><span class="n">_use_locking</span><span class="p">,</span>
<span class="n">optimizer</span><span class="o">.</span><span class="n">_name</span> <span class="o">+</span> <span class="s1">&#39;-MP&#39;</span><span class="p">,</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_optimizer</span> <span class="o">=</span> <span class="n">optimizer</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_fp32_to_fp16</span> <span class="o">=</span> <span class="p">{}</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_loss_scaler</span> <span class="o">=</span> <span class="n">automatic_loss_scaler</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_loss_scaler</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">if</span> <span class="n">loss_scale</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_loss_scale</span> <span class="o">=</span> <span class="mf">1.0</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">loss_scale</span><span class="p">,</span> <span class="nb">float</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_loss_scale</span> <span class="o">=</span> <span class="n">loss_scale</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">loss_scale</span><span class="p">,</span> <span class="n">AutomaticLossScaler</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_loss_scaler</span> <span class="o">=</span> <span class="n">loss_scale</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_loss_scale</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_loss_scaler</span><span class="o">.</span><span class="n">loss_scale</span>

<div class="viewcode-block" id="MixedPrecisionOptimizerWrapper.compute_gradients"><a class="viewcode-back" href="../../api-docs/optimizers.html#optimizers.mp_wrapper.MixedPrecisionOptimizerWrapper.compute_gradients">[docs]</a> <span class="k">def</span> <span class="nf">compute_gradients</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">loss</span><span class="p">,</span> <span class="n">var_list</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">gate_gradients</span><span class="o">=</span><span class="n">tf</span><span class="o">.</span><span class="n">train</span><span class="o">.</span><span class="n">Optimizer</span><span class="o">.</span><span class="n">GATE_OP</span><span class="p">,</span>
<span class="n">aggregation_method</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">colocate_gradients_with_ops</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">grad_loss</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_loss_scaler</span><span class="p">:</span>
<span class="n">loss</span> <span class="o">*=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_loss_scaler</span><span class="o">.</span><span class="n">loss_scale</span>

<span class="n">loss</span> <span class="o">*=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_loss_scale</span>
<span class="n">grads_and_vars_fp16</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_optimizer</span><span class="o">.</span><span class="n">compute_gradients</span><span class="p">(</span>
<span class="n">loss</span><span class="p">,</span> <span class="n">var_list</span><span class="o">=</span><span class="n">var_list</span><span class="p">,</span>
<span class="n">gate_gradients</span><span class="o">=</span><span class="n">gate_gradients</span><span class="p">,</span>
Expand Down Expand Up @@ -214,7 +219,7 @@ <h1>Source code for optimizers.mp_wrapper</h1><div class="highlight"><pre>
<span class="n">fp32_grad</span> <span class="o">=</span> <span class="n">tf</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">grad</span><span class="p">,</span> <span class="n">tf</span><span class="o">.</span><span class="n">float32</span><span class="p">)</span>
<span class="c1"># adding regularization part with respect to fp32 copy</span>
<span class="k">if</span> <span class="n">var</span><span class="o">.</span><span class="n">name</span> <span class="ow">in</span> <span class="n">reg_funcs</span><span class="p">:</span>
<span class="n">fp32_grad</span> <span class="o">+=</span> <span class="n">tf</span><span class="o">.</span><span class="n">gradients</span><span class="p">(</span>
<span class="n">fp32_grad</span> <span class="o">+=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_loss_scale</span> <span class="o">*</span> <span class="n">tf</span><span class="o">.</span><span class="n">gradients</span><span class="p">(</span>
<span class="n">tf</span><span class="o">.</span><span class="n">contrib</span><span class="o">.</span><span class="n">layers</span><span class="o">.</span><span class="n">apply_regularization</span><span class="p">(</span>
<span class="n">reg_funcs</span><span class="p">[</span><span class="n">var</span><span class="o">.</span><span class="n">name</span><span class="p">],</span>
<span class="p">[</span><span class="n">fp32_var</span><span class="p">],</span>
Expand All @@ -225,11 +230,8 @@ <h1>Source code for optimizers.mp_wrapper</h1><div class="highlight"><pre>
<span class="k">else</span><span class="p">:</span>
<span class="n">grads_and_vars_fp32</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">grad</span><span class="p">,</span> <span class="n">var</span><span class="p">))</span>

<span class="c1"># Unscale gradients if necessary</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_loss_scaler</span><span class="p">:</span>
<span class="n">grads_and_vars_fp32</span> <span class="o">=</span> <span class="n">_scale_grads</span><span class="p">(</span><span class="n">grads_and_vars_fp32</span><span class="p">,</span>
<span class="mf">1.</span> <span class="o">/</span> <span class="bp">self</span><span class="o">.</span><span class="n">_loss_scaler</span><span class="o">.</span><span class="n">loss_scale</span><span class="p">)</span>

<span class="n">grads_and_vars_fp32</span> <span class="o">=</span> <span class="n">_scale_grads</span><span class="p">(</span><span class="n">grads_and_vars_fp32</span><span class="p">,</span>
<span class="mf">1.0</span> <span class="o">/</span> <span class="bp">self</span><span class="o">.</span><span class="n">_loss_scale</span><span class="p">)</span>
<span class="k">return</span> <span class="n">grads_and_vars_fp32</span></div>

<div class="viewcode-block" id="MixedPrecisionOptimizerWrapper.apply_gradients"><a class="viewcode-back" href="../../api-docs/optimizers.html#optimizers.mp_wrapper.MixedPrecisionOptimizerWrapper.apply_gradients">[docs]</a> <span class="k">def</span> <span class="nf">apply_gradients</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">grads_and_vars</span><span class="p">,</span> <span class="n">global_step</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
Expand All @@ -243,7 +245,7 @@ <h1>Source code for optimizers.mp_wrapper</h1><div class="highlight"><pre>
<span class="k">if</span> <span class="n">var</span><span class="o">.</span><span class="n">name</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_fp32_to_fp16</span><span class="p">:</span>
<span class="n">dst_var</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_fp32_to_fp16</span><span class="p">[</span><span class="n">var</span><span class="o">.</span><span class="n">name</span><span class="p">]</span>
<span class="n">apply_ops</span><span class="o">.</span><span class="n">append</span><span class="p">(</span>
<span class="n">tf</span><span class="o">.</span><span class="n">assign</span><span class="p">(</span><span class="n">dst_var</span><span class="p">,</span> <span class="n">tf</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">var</span><span class="p">,</span> <span class="n">tf</span><span class="o">.</span><span class="n">float16</span><span class="p">)))</span>
<span class="n">tf</span><span class="o">.</span><span class="n">assign</span><span class="p">(</span><span class="n">dst_var</span><span class="p">,</span> <span class="n">tf</span><span class="o">.</span><span class="n">saturate_cast</span><span class="p">(</span><span class="n">var</span><span class="p">,</span> <span class="n">tf</span><span class="o">.</span><span class="n">float16</span><span class="p">)))</span>
<span class="k">if</span> <span class="n">apply_ops</span><span class="p">:</span>
<span class="k">return</span> <span class="n">tf</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="n">apply_ops</span><span class="p">)</span>
<span class="k">return</span> <span class="n">update_op</span>
Expand Down Expand Up @@ -284,6 +286,7 @@ <h1>Source code for optimizers.mp_wrapper</h1><div class="highlight"><pre>
<span class="n">grad</span> <span class="o">*=</span> <span class="n">scale</span>
<span class="n">scaled_grads_and_vars</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">grad</span><span class="p">,</span> <span class="n">var</span><span class="p">))</span>
<span class="k">return</span> <span class="n">scaled_grads_and_vars</span>

</pre></div>

</div>
Expand Down
33 changes: 6 additions & 27 deletions docs/html/_modules/optimizers/optimizers.html
Original file line number Diff line number Diff line change
Expand Up @@ -478,32 +478,24 @@ <h1>Source code for optimizers.optimizers</h1><div class="highlight"><pre>
<span class="n">variables</span> <span class="o">=</span> <span class="n">vars_</span><span class="o">.</span><span class="n">trainable_variables</span><span class="p">()</span>

<span class="k">if</span> <span class="n">automatic_loss_scaling</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">automatic_loss_scaling</span> <span class="ow">in</span> <span class="n">AutomaticLossScaler</span><span class="o">.</span><span class="n">SUPPORTED_ALGOS</span><span class="p">:</span>
<span class="k">if</span> <span class="n">automatic_loss_scaling</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">AutomaticLossScaler</span><span class="o">.</span><span class="n">SUPPORTED_ALGOS</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Unknown automatic loss scaling algorithm: </span><span class="si">%s</span><span class="s2">.&quot;</span>
<span class="o">%</span> <span class="n">automatic_loss_sclaing</span><span class="p">)</span>
<span class="k">if</span> <span class="n">dtype</span> <span class="o">!=</span> <span class="s2">&quot;mixed&quot;</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Automatic loss scaling can be used only with &quot;</span>
<span class="s2">&quot;dtype=mixed.&quot;</span><span class="p">)</span>
<span class="n">loss_scaler</span> <span class="o">=</span> <span class="n">AutomaticLossScaler</span><span class="p">(</span><span class="n">algorithm</span><span class="o">=</span><span class="n">automatic_loss_scaling</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">loss_scaler</span> <span class="o">=</span> <span class="kc">None</span>
<span class="n">loss_scale</span> <span class="o">=</span> <span class="n">AutomaticLossScaler</span><span class="p">(</span><span class="n">algorithm</span><span class="o">=</span><span class="n">automatic_loss_scaling</span><span class="p">)</span>

<span class="k">if</span> <span class="n">dtype</span> <span class="o">==</span> <span class="s1">&#39;mixed&#39;</span><span class="p">:</span>
<span class="n">opt</span> <span class="o">=</span> <span class="n">MixedPrecisionOptimizerWrapper</span><span class="p">(</span>
<span class="n">opt</span><span class="p">,</span>
<span class="n">automatic_loss_scaler</span><span class="o">=</span><span class="n">loss_scaler</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">opt</span> <span class="o">=</span> <span class="n">MixedPrecisionOptimizerWrapper</span><span class="p">(</span><span class="n">opt</span><span class="p">,</span> <span class="n">loss_scale</span><span class="o">=</span><span class="n">loss_scale</span><span class="p">)</span>
<span class="k">if</span> <span class="n">on_horovod</span><span class="p">:</span>
<span class="n">opt</span> <span class="o">=</span> <span class="n">DistributedOptimizer</span><span class="p">(</span><span class="n">opt</span><span class="p">)</span>

<span class="c1"># Compute gradients.</span>
<span class="n">gradients</span> <span class="o">=</span> <span class="n">opt</span><span class="o">.</span><span class="n">compute_gradients</span><span class="p">(</span>
<span class="n">loss</span> <span class="k">if</span> <span class="n">loss_scale</span> <span class="o">==</span> <span class="mf">1.0</span> <span class="k">else</span> <span class="n">loss</span> <span class="o">*</span> <span class="n">loss_scale</span><span class="p">,</span>
<span class="n">variables</span><span class="p">,</span>
<span class="n">colocate_gradients_with_ops</span><span class="o">=</span><span class="n">colocate_gradients_with_ops</span><span class="p">)</span>

<span class="k">if</span> <span class="n">loss_scale</span> <span class="o">!=</span> <span class="mf">1.0</span><span class="p">:</span>
<span class="n">gradients</span> <span class="o">=</span> <span class="n">_multiply_gradients_const</span><span class="p">(</span><span class="n">gradients</span><span class="p">,</span> <span class="mf">1.0</span> <span class="o">/</span> <span class="n">loss_scale</span><span class="p">)</span>
<span class="n">loss</span><span class="p">,</span> <span class="n">variables</span><span class="p">,</span>
<span class="n">colocate_gradients_with_ops</span><span class="o">=</span><span class="n">colocate_gradients_with_ops</span><span class="p">,</span>
<span class="p">)</span>

<span class="c1"># Optionally add gradient noise.</span>
<span class="k">if</span> <span class="n">gradient_noise_scale</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
Expand Down Expand Up @@ -772,19 +764,6 @@ <h1>Source code for optimizers.optimizers</h1><div class="highlight"><pre>
<span class="n">multiplied_grads_and_vars</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">grad</span><span class="p">,</span> <span class="n">var</span><span class="p">))</span>
<span class="k">return</span> <span class="n">multiplied_grads_and_vars</span></div>


<div class="viewcode-block" id="_multiply_gradients_const"><a class="viewcode-back" href="../../api-docs/optimizers.html#optimizers.optimizers._multiply_gradients_const">[docs]</a><span class="k">def</span> <span class="nf">_multiply_gradients_const</span><span class="p">(</span><span class="n">grads_and_vars</span><span class="p">,</span> <span class="n">multiplier</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;Multiply specified gradients.&quot;&quot;&quot;</span>
<span class="n">multiplied_grads_and_vars</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">grad</span><span class="p">,</span> <span class="n">var</span> <span class="ow">in</span> <span class="n">grads_and_vars</span><span class="p">:</span>
<span class="k">if</span> <span class="n">grad</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">grad</span><span class="p">,</span> <span class="n">ops</span><span class="o">.</span><span class="n">IndexedSlices</span><span class="p">):</span>
<span class="n">grad_values</span> <span class="o">=</span> <span class="n">grad</span><span class="o">.</span><span class="n">values</span> <span class="o">*</span> <span class="n">multiplier</span>
<span class="n">grad</span> <span class="o">=</span> <span class="n">ops</span><span class="o">.</span><span class="n">IndexedSlices</span><span class="p">(</span><span class="n">grad_values</span><span class="p">,</span> <span class="n">grad</span><span class="o">.</span><span class="n">indices</span><span class="p">,</span> <span class="n">grad</span><span class="o">.</span><span class="n">dense_shape</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">grad</span> <span class="o">*=</span> <span class="n">multiplier</span>
<span class="n">multiplied_grads_and_vars</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">grad</span><span class="p">,</span> <span class="n">var</span><span class="p">))</span>
<span class="k">return</span> <span class="n">multiplied_grads_and_vars</span></div>
</pre></div>

</div>
Expand Down
Loading

0 comments on commit 1efa595

Please sign in to comment.