8621 · pull · Mar 19, 2021 · Mar 19, 2021
diff --git a/lib/gui/control_helper.py b/lib/gui/control_helper.py
@@ -57,14 +57,18 @@ def set_slider_rounding(value, var, d_type, round_to, min_max):
         The variable to set the value for
     d_type: [:class:`int`, :class:`float`]
         The type of value that is stored in :attr:`var`
-    round_to: int
-        If :attr:`dtype` is :class:`float` then this is the decimal place rounding for :attr:`var`.
-        If :attr:`dtype` is :class:`int` then this is the number of steps between each increment
-        for :attr:`var`
+    round_to: int or list
+        If :attr:`d_type` is :class:`float` then this is the decimal place rounding for
+        :attr:`var`. If :attr:`d_type` is :class:`int` then this is the number of steps between
+        each increment for :attr:`var`. If a list is provided, then this must be a list of
+        discreet values that are of the correct :attr:`d_type`.
     min_max: tuple (`int`, `int`)
         The (``min``, ``max``) values that this slider accepts
     """
-    if d_type == float:
+    if isinstance(round_to, list):
+        # Lock to nearest item
+        var.set(min(round_to, key=lambda x: abs(x-float(value))))
+    elif d_type == float:
         var.set(round(float(value), round_to))
     else:
         steps = range(min_max[0], min_max[1] + round_to, round_to)

diff --git a/plugins/train/_config.py b/plugins/train/_config.py
@@ -108,7 +108,7 @@ def _set_globals(self):
                  "that is based on adaptive estimation of first-order and second-order moments."
                  "\n\t nadam - Adaptive Moment Optimization with Nesterov Momentum. Much like "
                  "Adam but uses a different formula for calculating momentum."
-                 "\n\t rms-prop - Root Mean Square Propogation. Maintains a moving (discounted) "
+                 "\n\t rms-prop - Root Mean Square Propagation. Maintains a moving (discounted) "
                  "average of the square of the gradients. Divides the gradient by the root of "
                  "this average.")
         self.add_item(
@@ -125,6 +125,24 @@ def _set_globals(self):
                  "are too large might result in model crashes and the inability of the model to "
                  "find the best solution. Values that are too small might be unable to escape "
                  "from dead-ends and find the best global minimum.")
+        self.add_item(
+            section=section,
+            title="epsilon_exponent",
+            datatype=int,
+            default=-7,
+            min_max=(-10, 0),
+            rounding=1,
+            fixed=False,
+            group="optimizer",
+            info="The epsilon adds a small constant to weight updates to attempt to avoid 'divide "
+                 "by zero' errors. Generally this option should be left at default value, however "
+                 "if you are getting 'NaN' loss values, and have been unable to resolve the issue "
+                 "any other way (for example, increasing batch size, or lowering learning rate, "
+                 "then raising the epsilon can lead to a more stable model. It may, however, come "
+                 "at the cost of slower training and a less accurate final result.\n"
+                 "NB: The value given here is the 'exponent' to the epsilon. For example, "
+                 "choosing '-7' will set the epsilon to 1e-7. Choosing '-3' will set the epsilon "
+                 "to 0.001 (1e-3).")
         self.add_item(
             section=section,
             title="reflect_padding",
@@ -163,6 +181,17 @@ def _set_globals(self):
                  "because they have Tensor Cores. Older GPUs offer no math performance benefit "
                  "for using mixed precision, however memory and bandwidth savings can enable some "
                  "speedups. Generally RTX GPUs and later will offer the most benefit.")
+        self.add_item(
+            section=section,
+            title="nan_protection",
+            datatype=bool,
+            default=True,
+            group="network",
+            info="If a 'NaN' is generated in the model, this means that the model has corrupted "
+                 "and the model is likely to start deteriorating from this point on. Enabling NaN "
+                 "protection will stop training immediately in the event of a NaN. The last save "
+                 "will not contain the NaN, so you may still be able to rescue your model.",
+            fixed=False)
         self.add_item(
             section=section,
             title="convert_batchsize",
@@ -213,8 +242,8 @@ def _set_loss(self):
                  "a median, it can potentially ignore some infrequent image types in the dataset."
                  "\n\t MSE - Mean squared error will guide reconstructions of each pixel "
                  "towards its average value in the training dataset. As an avg, it will be "
-                 "suspectible to outliers and typically produces slightly blurrier results."
-                 "\n\t LogCosh - log(cosh(x)) acts similiar to MSE for small errors and to "
+                 "susceptible to outliers and typically produces slightly blurrier results."
+                 "\n\t LogCosh - log(cosh(x)) acts similar to MSE for small errors and to "
                  "MAE for large errors. Like MSE, it is very stable and prevents overshoots "
                  "when errors are near zero. Like MAE, it is robust to outliers. NB: Due to a bug "
                  "in PlaidML, this loss does not work on AMD cards."
@@ -228,12 +257,12 @@ def _set_loss(self):
                  "statistics of an image. Potentially delivers more realistic looking images."
                  "\n\t GMSD - Gradient Magnitude Similarity Deviation seeks to match "
                  "the global standard deviation of the pixel to pixel differences between two "
-                 "images. Similiar in approach to SSIM. NB: This loss does not currently work on "
+                 "images. Similar in approach to SSIM. NB: This loss does not currently work on "
                  "AMD cards."
                  "\n\t Pixel_Gradient_Difference - Instead of minimizing the difference between "
                  "the absolute value of each pixel in two reference images, compute the pixel to "
                  "pixel spatial difference in each image and then minimize that difference "
-                 "between two images. Allows for large color shifts,but maintains the structure "
+                 "between two images. Allows for large color shifts, but maintains the structure "
                  "of the image.")
         self.add_item(
             section=section,
@@ -247,8 +276,8 @@ def _set_loss(self):
                  "towards its median value in the training dataset. Robust to outliers but as "
                  "a median, it can potentially ignore some infrequent image types in the dataset."
                  "\n\t MSE - Mean squared error will guide reconstructions of each pixel "
-                 "towards its average value in the training dataset. As an avg, it will be "
-                 "suspectible to outliers and typically produces slightly blurrier results.")
+                 "towards its average value in the training dataset. As an average, it will be "
+                 "susceptible to outliers and typically produces slightly blurrier results.")
         self.add_item(
             section=section,
             title="l2_reg_term",
@@ -307,7 +336,7 @@ def _set_loss(self):
             default=True,
             group="loss",
             info="Image loss function is weighted by mask presence. For areas of "
-                 "the image without the facial mask, reconstuction errors will be "
+                 "the image without the facial mask, reconstruction errors will be "
                  "ignored while the masked face area is prioritized. May increase "
                  "overall quality by focusing attention on the core face area.")
         self.add_item(

diff --git a/plugins/train/model/_base.py b/plugins/train/model/_base.py
@@ -386,7 +386,8 @@ def _output_summary(self):
         if hasattr(self._args, "summary") and self._args.summary:
             print_fn = None  # Print straight to stdout
         else:
-            print_fn = lambda x: logger.verbose("%s", x)  # print to logger
+            # print to logger
+            print_fn = lambda x: logger.verbose("%s", x)  # noqa
         for model in _get_all_sub_models(self._model):
             model.summary(print_fn=print_fn)
 
@@ -411,6 +412,7 @@ def _compile_model(self):
         optimizer = _Optimizer(self.config["optimizer"],
                                self.config["learning_rate"],
                                self.config.get("clipnorm", False),
+                               10 ** int(self.config["epsilon_exponent"]),
                                self._args).optimizer
         if self._settings.use_mixed_precision:
             optimizer = self._settings.loss_scale_optimizer(optimizer)
@@ -1076,20 +1078,22 @@ class _Optimizer():  # pylint:disable=too-few-public-methods
         The selected learning rate to use
     clipnorm: bool
         Whether to clip gradients to avoid exploding/vanishing gradients
+    epsilon: float
+        The value to use for the epsilon of the optimizer
     arguments: :class:`argparse.Namespace`
         The arguments that were passed to the train or convert process as generated from
         Faceswap's command line arguments
     """
-    def __init__(self, optimizer, learning_rate, clipnorm, arguments):
+    def __init__(self, optimizer, learning_rate, clipnorm, epsilon, arguments):
         logger.debug("Initializing %s: (optimizer: %s, learning_rate: %s, clipnorm: %s, "
-                     "arguments: %s", self.__class__.__name__, optimizer, learning_rate, clipnorm,
-                     arguments)
+                     "epsilon: %s, arguments: %s)", self.__class__.__name__,
+                     optimizer, learning_rate, clipnorm, epsilon, arguments)
         optimizers = {"adam": Adam, "nadam": Nadam, "rms-prop": RMSprop}
         self._optimizer = optimizers[optimizer]
 
-        base_kwargs = {"adam": dict(beta_1=0.5, beta_2=0.99),
-                       "nadam": dict(beta_1=0.5, beta_2=0.99),
-                       "rms-prop": dict()}
+        base_kwargs = {"adam": dict(beta_1=0.5, beta_2=0.99, epsilon=epsilon),
+                       "nadam": dict(beta_1=0.5, beta_2=0.99, epsilon=epsilon),
+                       "rms-prop": dict(epsilon=epsilon)}
         self._kwargs = base_kwargs[optimizer]
 
         self._configure(learning_rate, clipnorm, arguments)

diff --git a/plugins/train/model/original_defaults.py b/plugins/train/model/original_defaults.py
@@ -18,7 +18,7 @@
     The following keys are expected for the _DEFAULTS <metadata> dict:
         datatype:  [required] A python type class. This limits the type of data that can be
                    provided in the .ini file and ensures that the value is returned in the
-                   correct type to faceswap. Valid datatypes are: <class 'int'>, <class 'float'>,
+                   correct type to faceswap. Valid data types are: <class 'int'>, <class 'float'>,
                    <class 'str'>, <class 'bool'>.
         default:   [required] The default value for this option.
         info:      [required] A string describing what this option does.
@@ -27,12 +27,15 @@
                    a combobox / radio option in the GUI.
         gui_radio: [optional] If <choices> are defined, this indicates that the GUI should use
                    radio buttons rather than a combobox to display this option.
-        min_max:   [partial] For <class 'int'> and <class 'float'> datatypes this is required
+        min_max:   [partial] For <class 'int'> and <class 'float'> data types this is required
                    otherwise it is ignored. Should be a tuple of min and max accepted values.
                    This is used for controlling the GUI slider range. Values are not enforced.
-        rounding:  [partial] For <class 'int'> and <class 'float'> datatypes this is
+        rounding:  [partial] For <class 'int'> and <class 'float'> data types this is
                    required otherwise it is ignored. Used for the GUI slider. For floats, this
                    is the number of decimal places to display. For ints this is the step size.
+                   You can also pass in a list of discreet values for this item, which should be
+                   of the same data type as the given 'datatype'. This will lock the scale to
+                   only those values displayed in the list.
         fixed:     [optional] [train only]. Training configurations are fixed when the model is
                    created, and then reloaded from the state file. Marking an item as fixed=False
                    indicates that this value can be changed for existing models, and will override
@@ -57,4 +60,4 @@
         fixed=True,
         group="settings",
     ),
-)
+)
diff --git a/plugins/train/trainer/_base.py b/plugins/train/trainer/_base.py
@@ -307,9 +307,11 @@ def _log_tensorboard(self, loss):
     def _collate_and_store_loss(self, loss):
         """ Collate the loss into totals for each side.
 
-        The losses are then into a total for each side. Loss totals are added to
+        The losses are summed into a total for each side. Loss totals are added to
         :attr:`model.state._history` to track the loss drop per save iteration for backup purposes.
 
+        If NaN protection is enabled, Checks for NaNs and raises an error if detected.
+
         Parameters
         ----------
         loss: list
@@ -319,7 +321,18 @@ def _collate_and_store_loss(self, loss):
         -------
         list
             List of 2 ``floats`` which is the total loss for each side
+
+        Raises
+        ------
+        FaceswapError
+            If a NaN is detected, a :class:`FaceswapError` will be raised
         """
+        # NaN protection
+        if self._config["nan_protection"] and not all(np.isfinite(val) for val in loss):
+            logger.critical("NaN Detected. Loss: %s", loss)
+            raise FaceswapError("A NaN was detected and you have NaN protection enabled. Training "
+                                "has been terminated.")
+
         split = len(loss) // 2
         combined_loss = [sum(loss[:split]), sum(loss[split:])]
         self._model.add_history(combined_loss)