From 10fdc463f1f22cedfe0484e328a5fb211d4c91c4 Mon Sep 17 00:00:00 2001 From: Pawel Gadzinski Date: Mon, 1 Sep 2025 10:17:09 +0000 Subject: [PATCH 01/20] initial code drop Signed-off-by: Pawel Gadzinski --- transformer_engine/debug/features/api.py | 6 +++--- .../debug/features/log_fp8_tensor_stats.py | 6 +++++- .../debug/features/log_tensor_stats.py | 6 +++++- .../debug/pytorch/debug_quantization.py | 17 +++++++++++++++++ transformer_engine/pytorch/module/base.py | 6 ++++-- 5 files changed, 34 insertions(+), 7 deletions(-) diff --git a/transformer_engine/debug/features/api.py b/transformer_engine/debug/features/api.py index 94fc6d129c..176d285cb6 100644 --- a/transformer_engine/debug/features/api.py +++ b/transformer_engine/debug/features/api.py @@ -244,7 +244,7 @@ def inspect_tensor( config: Dict, layer_name: str, tensor_name: str, - tensor: torch.Tensor, + tensor: Optional[torch.Tensor], rowwise_quantized_tensor: Optional[torch.Tensor], columnwise_quantized_tensor: Optional[torch.Tensor], quantizer: Optional[Quantizer], @@ -262,8 +262,8 @@ def inspect_tensor( layer_name: str tensor_name: str one of [`activation`, `weight`, `gradient`, `output`, `wgrad`, `dgrad`], - tensor: torch.Tensor - tensor in high precision, + tensor: Optional[torch.Tensor] + tensor in high precision, can be None only if fp8 model parameters are used and tensor name is `weight`. rowwise_quantized_tensor: Optional[torch.Tensor] rowwise quantized tensor, columnwise_quantized_tensor: Optional[torch.Tensor] diff --git a/transformer_engine/debug/features/log_fp8_tensor_stats.py b/transformer_engine/debug/features/log_fp8_tensor_stats.py index 31620211d0..2d4a2c0e2f 100644 --- a/transformer_engine/debug/features/log_fp8_tensor_stats.py +++ b/transformer_engine/debug/features/log_fp8_tensor_stats.py @@ -268,7 +268,7 @@ def inspect_tensor( tensor_name: str, iteration: int, tp_group: torch.distributed.ProcessGroup, - tensor: torch.Tensor, + tensor: Optional[torch.Tensor], rowwise_quantized_tensor: Optional[torch.Tensor | QuantizedTensor] = None, columnwise_quantized_tensor: Optional[torch.Tensor | QuantizedTensor] = None, quantizer: Optional[Quantizer] = None, @@ -281,6 +281,10 @@ def inspect_tensor( quantizer is not None ), "[NVTORCH INSPECT ERROR] LogFp8TensorStats cannot be run without low-precision recipe." + assert tensor is not None, \ + f"[NVTORCH INSPECT ERROR] LogFp8TensorStats needs tensor in high precision for tensor: {tensor_name}. \ + This feature cannot be used for weight tensor with fp8 model parameters." + quantized_tensor = rowwise_quantized_tensor assert isinstance( quantized_tensor, QuantizedTensor diff --git a/transformer_engine/debug/features/log_tensor_stats.py b/transformer_engine/debug/features/log_tensor_stats.py index 7ba2f9f771..beeecf19bf 100644 --- a/transformer_engine/debug/features/log_tensor_stats.py +++ b/transformer_engine/debug/features/log_tensor_stats.py @@ -115,13 +115,17 @@ def inspect_tensor( tensor_name: str, iteration: int, tp_group: torch.distributed.ProcessGroup, - tensor: torch.Tensor, + tensor: Optional[torch.Tensor], rowwise_quantized_tensor: Optional[torch.Tensor | QuantizedTensor] = None, columnwise_quantized_tensor: Optional[torch.Tensor | QuantizedTensor] = None, quantizer: Optional[Quantizer] = None, ): # pylint: disable=unused-argument """API call used to collect the data about the tensor before process_tensor()/quantization.""" + if tensor is None: + assert isinstance(rowwise_quantized_tensor, QuantizedTensor) + tensor = rowwise_quantized_tensor.dequantize() + assert ( type(tensor) not in [Float8Tensor, Float8TensorBase, MXFP8Tensor, MXFP8TensorBase] and tensor.dtype != torch.uint8 diff --git a/transformer_engine/debug/pytorch/debug_quantization.py b/transformer_engine/debug/pytorch/debug_quantization.py index d564ca8e92..4312989731 100644 --- a/transformer_engine/debug/pytorch/debug_quantization.py +++ b/transformer_engine/debug/pytorch/debug_quantization.py @@ -555,6 +555,23 @@ def set_usage(self, rowwise: bool = None, columnwise: bool = None): super().set_usage(rowwise=rowwise, columnwise=columnwise) if not self.output_tensor: self._update_parent_quantizer_usage() + + def wrap_quantized_tensor(self, tensor: QuantizedTensor): + """Wraps the quantized tensor with the debug quantizer.""" + + assert API_CALL_MODIFY not in [self.rowwise_tensor_plan, self.columnwise_tensor_plan], \ + "Modify tensor is not allowed in debug mode for weight if fp8 model parameters are used." + + + self._call_inspect_tensor_api(None, tensor, tensor) + + return DebugQuantizedTensor( + rowwise_gemm_tensor=tensor, + columnwise_gemm_tensor=tensor, + quantizer=self, + layer_name=self.layer_name, + tensor_name=self.tensor_name, + ) class DebugQuantizedTensor(QuantizedTensorBase): diff --git a/transformer_engine/pytorch/module/base.py b/transformer_engine/pytorch/module/base.py index 3bbfaacdf5..78e649c4c4 100644 --- a/transformer_engine/pytorch/module/base.py +++ b/transformer_engine/pytorch/module/base.py @@ -1367,6 +1367,10 @@ def get_weight_workspace( rowwise_usage=update_rowwise_usage, columnwise_usage=update_columnwise_usage, ) + + if isinstance(quantizer, DebugQuantizer): + tensor = quantizer.wrap_quantized_tensor(tensor) + return tensor # Try getting workspace from cache @@ -1530,8 +1534,6 @@ def no_debug_features_active(self, quantizers): if not run_current: return True - if self.primary_weights_in_fp8: - raise RuntimeError("FP8 weights are not supported in debug mode.") return False def _validate_name(self): From 2e5debbb3dda1873de7fed2b6825c2384de4978d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 1 Sep 2025 10:33:26 +0000 Subject: [PATCH 02/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../debug/features/log_fp8_tensor_stats.py | 8 +++++--- .../debug/pytorch/debug_quantization.py | 11 ++++++----- transformer_engine/pytorch/module/base.py | 2 +- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/transformer_engine/debug/features/log_fp8_tensor_stats.py b/transformer_engine/debug/features/log_fp8_tensor_stats.py index 2d4a2c0e2f..cc2829d996 100644 --- a/transformer_engine/debug/features/log_fp8_tensor_stats.py +++ b/transformer_engine/debug/features/log_fp8_tensor_stats.py @@ -281,9 +281,11 @@ def inspect_tensor( quantizer is not None ), "[NVTORCH INSPECT ERROR] LogFp8TensorStats cannot be run without low-precision recipe." - assert tensor is not None, \ - f"[NVTORCH INSPECT ERROR] LogFp8TensorStats needs tensor in high precision for tensor: {tensor_name}. \ - This feature cannot be used for weight tensor with fp8 model parameters." + assert tensor is not None, ( + "[NVTORCH INSPECT ERROR] LogFp8TensorStats needs tensor in high precision for tensor:" + f" {tensor_name}. This feature cannot be used for weight tensor with fp8" + " model parameters." + ) quantized_tensor = rowwise_quantized_tensor assert isinstance( diff --git a/transformer_engine/debug/pytorch/debug_quantization.py b/transformer_engine/debug/pytorch/debug_quantization.py index 4312989731..3fa548e75f 100644 --- a/transformer_engine/debug/pytorch/debug_quantization.py +++ b/transformer_engine/debug/pytorch/debug_quantization.py @@ -555,16 +555,17 @@ def set_usage(self, rowwise: bool = None, columnwise: bool = None): super().set_usage(rowwise=rowwise, columnwise=columnwise) if not self.output_tensor: self._update_parent_quantizer_usage() - + def wrap_quantized_tensor(self, tensor: QuantizedTensor): """Wraps the quantized tensor with the debug quantizer.""" - assert API_CALL_MODIFY not in [self.rowwise_tensor_plan, self.columnwise_tensor_plan], \ - "Modify tensor is not allowed in debug mode for weight if fp8 model parameters are used." - + assert API_CALL_MODIFY not in [ + self.rowwise_tensor_plan, + self.columnwise_tensor_plan, + ], "Modify tensor is not allowed in debug mode for weight if fp8 model parameters are used." self._call_inspect_tensor_api(None, tensor, tensor) - + return DebugQuantizedTensor( rowwise_gemm_tensor=tensor, columnwise_gemm_tensor=tensor, diff --git a/transformer_engine/pytorch/module/base.py b/transformer_engine/pytorch/module/base.py index 78e649c4c4..3cd39673cb 100644 --- a/transformer_engine/pytorch/module/base.py +++ b/transformer_engine/pytorch/module/base.py @@ -1370,7 +1370,7 @@ def get_weight_workspace( if isinstance(quantizer, DebugQuantizer): tensor = quantizer.wrap_quantized_tensor(tensor) - + return tensor # Try getting workspace from cache From 11b4c263df9e2789b10ac661ff1f2d1e554bc727 Mon Sep 17 00:00:00 2001 From: Pawel Gadzinski Date: Mon, 1 Sep 2025 10:45:18 +0000 Subject: [PATCH 03/20] fixes Signed-off-by: Pawel Gadzinski --- tests/pytorch/test_sanity.py | 6 ------ transformer_engine/debug/features/api.py | 2 +- .../debug/features/log_fp8_tensor_stats.py | 20 ++++++++++++------- .../debug/features/log_tensor_stats.py | 2 ++ 4 files changed, 16 insertions(+), 14 deletions(-) diff --git a/tests/pytorch/test_sanity.py b/tests/pytorch/test_sanity.py index 5151aa96e7..17de4b3096 100644 --- a/tests/pytorch/test_sanity.py +++ b/tests/pytorch/test_sanity.py @@ -428,8 +428,6 @@ def test_sanity_linear(dtype, fp8_recipe, model, skip_wgrad, skip_dgrad, microba @pytest.mark.parametrize("fp8_model_params", all_boolean) @pytest.mark.parametrize("use_bias", all_boolean) def test_sanity_linear_with_zero_tokens(dtype, bs, model, fp8_recipe, fp8_model_params, use_bias): - if NVTE_TEST_NVINSPECT_ENABLED and fp8_model_params: - pytest.skip("Quantized model parameters are not supported in debug mode.") config = model_configs[model] ffn_hidden_size = 4 * config.hidden_size num_tokens = bs * config.max_seqlen_q @@ -465,8 +463,6 @@ def test_sanity_linear_with_zero_tokens(dtype, bs, model, fp8_recipe, fp8_model_ def test_sanity_grouped_linear( dtype, bs, model, fp8_recipe, fp8_model_params, use_bias, num_gemms, empty_split ): - if NVTE_TEST_NVINSPECT_ENABLED and fp8_model_params: - pytest.skip("FP8 model parameters are not supported in debug mode.") config = model_configs[model] ffn_hidden_size = 4 * config.hidden_size # Small batch size used to catch bug from https://github.com/NVIDIA/TransformerEngine/pull/1527. @@ -1045,8 +1041,6 @@ def test_inference_mode( quantization: Optional[str], ) -> None: """Test heuristics for initializing quantized weights""" - if NVTE_TEST_NVINSPECT_ENABLED and quantization is not None: - pytest.skip("Quantized model parameters are not supported in debug mode.") # Tensor dimensions sequence_length = 32 diff --git a/transformer_engine/debug/features/api.py b/transformer_engine/debug/features/api.py index 176d285cb6..2b0b780e51 100644 --- a/transformer_engine/debug/features/api.py +++ b/transformer_engine/debug/features/api.py @@ -263,7 +263,7 @@ def inspect_tensor( tensor_name: str one of [`activation`, `weight`, `gradient`, `output`, `wgrad`, `dgrad`], tensor: Optional[torch.Tensor] - tensor in high precision, can be None only if fp8 model parameters are used and tensor name is `weight`. + tensor in high precision. It can be None only if fp8 model parameters are used and tensor name is `weight`. rowwise_quantized_tensor: Optional[torch.Tensor] rowwise quantized tensor, columnwise_quantized_tensor: Optional[torch.Tensor] diff --git a/transformer_engine/debug/features/log_fp8_tensor_stats.py b/transformer_engine/debug/features/log_fp8_tensor_stats.py index cc2829d996..9af89748e7 100644 --- a/transformer_engine/debug/features/log_fp8_tensor_stats.py +++ b/transformer_engine/debug/features/log_fp8_tensor_stats.py @@ -28,6 +28,7 @@ ALL_RECIPE_NAMES = ["fp8_delayed_scaling", "fp8_current_scaling", "mxfp8", "fp8_block_scaling"] + def _get_recipe_name(quantizer: Optional[Quantizer]): if quantizer is None: return "" @@ -110,6 +111,9 @@ class LogFp8TensorStats(BaseLogTensorStats): - scale_inv_min - minimum of the inverse of the scaling factors, - scale_inv_max - maximum of the inverse of the scaling factors, - mse - mean squared error of the quantized tensor and the original tensor = sum((quantized_tensor - original_tensor)**2) / num_elements, + + If stats are collected for weight tensora and fp8 model parameters are used, only "scale_inv_min", "scale_inv_max" are supported. + The other stats need high precision tensor to be computed. tensors/tensors_struct: List[str] list of tensors to log @@ -148,7 +152,7 @@ class LogFp8TensorStats(BaseLogTensorStats): end_step: 80 """ - def check_if_stat_is_supported(self, stat: str, current_recipe: str): + def check_if_stat_is_supported(self, stat: str, current_recipe: str, high_precision_tensor_provided: bool): """Returns True if stat is supported, raises ValueError otherwise.""" columnwise = stat.endswith("_columnwise") if columnwise: @@ -156,6 +160,13 @@ def check_if_stat_is_supported(self, stat: str, current_recipe: str): recipe_from_stat, _ = self.get_recipe_from_stat(stat, default_recipe=current_recipe) stat_without_recipe = stat.replace(recipe_from_stat + "_", "") + need_high_precision_tensor_stats = ["underflows%", "overflows%", "mse"] + if stat_without_recipe in need_high_precision_tensor_stats and not high_precision_tensor_provided: + raise ValueError( + f"Stat {stat} needs high precision tensor to be provided. \ + This feature cannot be used for weight tensor with fp8 model parameters." + ) + if current_recipe == "" and recipe_from_stat == "": raise ValueError( f"Stat {stat} does not contain a recipe name and the current recipe is not set." @@ -281,11 +292,6 @@ def inspect_tensor( quantizer is not None ), "[NVTORCH INSPECT ERROR] LogFp8TensorStats cannot be run without low-precision recipe." - assert tensor is not None, ( - "[NVTORCH INSPECT ERROR] LogFp8TensorStats needs tensor in high precision for tensor:" - f" {tensor_name}. This feature cannot be used for weight tensor with fp8" - " model parameters." - ) quantized_tensor = rowwise_quantized_tensor assert isinstance( @@ -294,7 +300,7 @@ def inspect_tensor( recipe_name = _get_recipe_name(quantizer) for stat in config["stats"]: - self.check_if_stat_is_supported(stat, recipe_name) + self.check_if_stat_is_supported(stat, recipe_name, high_precision_tensor_provided=tensor is not None) options = ( config.get("start_step", None), diff --git a/transformer_engine/debug/features/log_tensor_stats.py b/transformer_engine/debug/features/log_tensor_stats.py index beeecf19bf..c1f1f1080e 100644 --- a/transformer_engine/debug/features/log_tensor_stats.py +++ b/transformer_engine/debug/features/log_tensor_stats.py @@ -122,6 +122,8 @@ def inspect_tensor( ): # pylint: disable=unused-argument """API call used to collect the data about the tensor before process_tensor()/quantization.""" + # Tensor is None only if fp8 model parameters are used and tensor name is `weight`. + # If one wants to collect stats for this tensor, we need to dequantize it. if tensor is None: assert isinstance(rowwise_quantized_tensor, QuantizedTensor) tensor = rowwise_quantized_tensor.dequantize() From 8a8eed3212bd3c195dac8dc9c14fd59a856d7d1c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 1 Sep 2025 10:47:41 +0000 Subject: [PATCH 04/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../debug/features/log_fp8_tensor_stats.py | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/transformer_engine/debug/features/log_fp8_tensor_stats.py b/transformer_engine/debug/features/log_fp8_tensor_stats.py index 9af89748e7..4a9416568e 100644 --- a/transformer_engine/debug/features/log_fp8_tensor_stats.py +++ b/transformer_engine/debug/features/log_fp8_tensor_stats.py @@ -28,7 +28,6 @@ ALL_RECIPE_NAMES = ["fp8_delayed_scaling", "fp8_current_scaling", "mxfp8", "fp8_block_scaling"] - def _get_recipe_name(quantizer: Optional[Quantizer]): if quantizer is None: return "" @@ -111,7 +110,7 @@ class LogFp8TensorStats(BaseLogTensorStats): - scale_inv_min - minimum of the inverse of the scaling factors, - scale_inv_max - maximum of the inverse of the scaling factors, - mse - mean squared error of the quantized tensor and the original tensor = sum((quantized_tensor - original_tensor)**2) / num_elements, - + If stats are collected for weight tensora and fp8 model parameters are used, only "scale_inv_min", "scale_inv_max" are supported. The other stats need high precision tensor to be computed. @@ -152,7 +151,9 @@ class LogFp8TensorStats(BaseLogTensorStats): end_step: 80 """ - def check_if_stat_is_supported(self, stat: str, current_recipe: str, high_precision_tensor_provided: bool): + def check_if_stat_is_supported( + self, stat: str, current_recipe: str, high_precision_tensor_provided: bool + ): """Returns True if stat is supported, raises ValueError otherwise.""" columnwise = stat.endswith("_columnwise") if columnwise: @@ -161,10 +162,13 @@ def check_if_stat_is_supported(self, stat: str, current_recipe: str, high_precis stat_without_recipe = stat.replace(recipe_from_stat + "_", "") need_high_precision_tensor_stats = ["underflows%", "overflows%", "mse"] - if stat_without_recipe in need_high_precision_tensor_stats and not high_precision_tensor_provided: + if ( + stat_without_recipe in need_high_precision_tensor_stats + and not high_precision_tensor_provided + ): raise ValueError( - f"Stat {stat} needs high precision tensor to be provided. \ - This feature cannot be used for weight tensor with fp8 model parameters." + f"Stat {stat} needs high precision tensor to be provided. This" + " feature cannot be used for weight tensor with fp8 model parameters." ) if current_recipe == "" and recipe_from_stat == "": @@ -292,7 +296,6 @@ def inspect_tensor( quantizer is not None ), "[NVTORCH INSPECT ERROR] LogFp8TensorStats cannot be run without low-precision recipe." - quantized_tensor = rowwise_quantized_tensor assert isinstance( quantized_tensor, QuantizedTensor @@ -300,7 +303,9 @@ def inspect_tensor( recipe_name = _get_recipe_name(quantizer) for stat in config["stats"]: - self.check_if_stat_is_supported(stat, recipe_name, high_precision_tensor_provided=tensor is not None) + self.check_if_stat_is_supported( + stat, recipe_name, high_precision_tensor_provided=tensor is not None + ) options = ( config.get("start_step", None), From ecdc7273f2133f8d16983063a85db85e7363bc8b Mon Sep 17 00:00:00 2001 From: Pawel Gadzinski Date: Mon, 1 Sep 2025 11:02:15 +0000 Subject: [PATCH 05/20] fix Signed-off-by: Pawel Gadzinski --- .../debug/features/log_fp8_tensor_stats.py | 9 ++++----- transformer_engine/debug/pytorch/debug_quantization.py | 7 +++---- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/transformer_engine/debug/features/log_fp8_tensor_stats.py b/transformer_engine/debug/features/log_fp8_tensor_stats.py index 4a9416568e..04020476fe 100644 --- a/transformer_engine/debug/features/log_fp8_tensor_stats.py +++ b/transformer_engine/debug/features/log_fp8_tensor_stats.py @@ -111,8 +111,7 @@ class LogFp8TensorStats(BaseLogTensorStats): - scale_inv_max - maximum of the inverse of the scaling factors, - mse - mean squared error of the quantized tensor and the original tensor = sum((quantized_tensor - original_tensor)**2) / num_elements, - If stats are collected for weight tensora and fp8 model parameters are used, only "scale_inv_min", "scale_inv_max" are supported. - The other stats need high precision tensor to be computed. + When collecting stats for the weight tensor with FP8 model parameters enabled, only "scale_inv_min" and "scale_inv_max" are available. All other statistics require access to the high precision tensor. tensors/tensors_struct: List[str] list of tensors to log @@ -167,10 +166,10 @@ def check_if_stat_is_supported( and not high_precision_tensor_provided ): raise ValueError( - f"Stat {stat} needs high precision tensor to be provided. This" - " feature cannot be used for weight tensor with fp8 model parameters." + f"Stat {stat} requires a high precision tensor to be provided. \ + This feature is not supported for weight tensors when using fp8 model parameters." ) - + if current_recipe == "" and recipe_from_stat == "": raise ValueError( f"Stat {stat} does not contain a recipe name and the current recipe is not set." diff --git a/transformer_engine/debug/pytorch/debug_quantization.py b/transformer_engine/debug/pytorch/debug_quantization.py index 3fa548e75f..5dd2d42565 100644 --- a/transformer_engine/debug/pytorch/debug_quantization.py +++ b/transformer_engine/debug/pytorch/debug_quantization.py @@ -559,10 +559,9 @@ def set_usage(self, rowwise: bool = None, columnwise: bool = None): def wrap_quantized_tensor(self, tensor: QuantizedTensor): """Wraps the quantized tensor with the debug quantizer.""" - assert API_CALL_MODIFY not in [ - self.rowwise_tensor_plan, - self.columnwise_tensor_plan, - ], "Modify tensor is not allowed in debug mode for weight if fp8 model parameters are used." + assert self.rowwise_tensor_plan == HIGH_PRECISION and self.columnwise_tensor_plan == HIGH_PRECISION, \ + "[NVTORCH INSPECT ERROR] Weight tensor with fp8 model paramters enabled cannot be modified by any feature." + self._call_inspect_tensor_api(None, tensor, tensor) From b0162af18e08a845c7e079da29c0c21d048ee7b4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 1 Sep 2025 11:02:46 +0000 Subject: [PATCH 06/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../debug/features/log_fp8_tensor_stats.py | 7 ++++--- transformer_engine/debug/pytorch/debug_quantization.py | 10 +++++++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/transformer_engine/debug/features/log_fp8_tensor_stats.py b/transformer_engine/debug/features/log_fp8_tensor_stats.py index 04020476fe..a6e30fc07f 100644 --- a/transformer_engine/debug/features/log_fp8_tensor_stats.py +++ b/transformer_engine/debug/features/log_fp8_tensor_stats.py @@ -166,10 +166,11 @@ def check_if_stat_is_supported( and not high_precision_tensor_provided ): raise ValueError( - f"Stat {stat} requires a high precision tensor to be provided. \ - This feature is not supported for weight tensors when using fp8 model parameters." + f"Stat {stat} requires a high precision tensor to be provided. " + " This feature is not supported for weight tensors when using fp8 model" + " parameters." ) - + if current_recipe == "" and recipe_from_stat == "": raise ValueError( f"Stat {stat} does not contain a recipe name and the current recipe is not set." diff --git a/transformer_engine/debug/pytorch/debug_quantization.py b/transformer_engine/debug/pytorch/debug_quantization.py index 5dd2d42565..cf7eb812b1 100644 --- a/transformer_engine/debug/pytorch/debug_quantization.py +++ b/transformer_engine/debug/pytorch/debug_quantization.py @@ -559,9 +559,13 @@ def set_usage(self, rowwise: bool = None, columnwise: bool = None): def wrap_quantized_tensor(self, tensor: QuantizedTensor): """Wraps the quantized tensor with the debug quantizer.""" - assert self.rowwise_tensor_plan == HIGH_PRECISION and self.columnwise_tensor_plan == HIGH_PRECISION, \ - "[NVTORCH INSPECT ERROR] Weight tensor with fp8 model paramters enabled cannot be modified by any feature." - + assert ( + self.rowwise_tensor_plan == HIGH_PRECISION + and self.columnwise_tensor_plan == HIGH_PRECISION + ), ( + "[NVTORCH INSPECT ERROR] Weight tensor with fp8 model paramters enabled cannot be" + " modified by any feature." + ) self._call_inspect_tensor_api(None, tensor, tensor) From 20515c7d82886d9993a623f4d6033b573aaad426 Mon Sep 17 00:00:00 2001 From: Pawel Gadzinski Date: Mon, 1 Sep 2025 12:54:16 +0000 Subject: [PATCH 07/20] fixes Signed-off-by: Pawel Gadzinski --- tests/pytorch/debug/test_log.py | 44 ++++++++++++++++++- .../debug/features/utils/stats_buffer.py | 19 +++++--- .../debug/pytorch/debug_quantization.py | 11 +++-- 3 files changed, 63 insertions(+), 11 deletions(-) diff --git a/tests/pytorch/debug/test_log.py b/tests/pytorch/debug/test_log.py index ca8e10ad69..e32f5eeee6 100644 --- a/tests/pytorch/debug/test_log.py +++ b/tests/pytorch/debug/test_log.py @@ -78,6 +78,8 @@ all_stats.append("fp8_delayed_scaling_overflows%") # only delayed-scaling supports overflows% +# remove all contai + @contextlib.contextmanager def debug_session(config_str: str, feature_dirs): @@ -117,7 +119,6 @@ def read_log(log_dir: str) -> str: with open(stat_path, "r") as f: return f.read() - def test_sanity(feature_dirs): if not fp8_available: pytest.skip(reason_for_no_fp8) @@ -140,6 +141,47 @@ def test_sanity(feature_dirs): for stat in all_stats: assert stat in output, f"Stat {stat} not found in output" +LOG_FP8_MODEL_PARAMETERS_CONFIG_BASE = """ +log: + layers: + layer_name_regex_pattern: .* + enabled: + True + transformer_engine: + LogTensorStats: + enabled: + True + stats: [min] + tensors: [weight, activation] + freq: 1 + LogFp8TensorStats: + enabled: + True + tensors_struct: + - tensor: activation + stats: [scale_inv_min, scale_inv_max, underflows%] + - tensor: weight + stats: [scale_inv_min, scale_inv_max] + freq: 1 +""" + +def test_sanity_log_fp8_model_parameters(feature_dirs): + if not fp8_available: + pytest.skip(reason_for_no_fp8) + + with debug_session(LOG_FP8_MODEL_PARAMETERS_CONFIG_BASE, feature_dirs) as log_dir: + with te.fp8_model_init(): + model = te.Linear(128, 128, params_dtype=torch.bfloat16) + inp = torch.zeros(128, 128, dtype=torch.bfloat16).cuda() + for _ in range(10): + with te.fp8_autocast(fp8_recipe=recipe.DelayedScaling()): + output = model(inp) + loss = output.sum() + loss.backward() + debug_api.step() + output = read_log(log_dir) + assert output, "Output is empty" + TEDebugState._reset() fp8_recipes = [ recipe.MXFP8BlockScaling(), diff --git a/transformer_engine/debug/features/utils/stats_buffer.py b/transformer_engine/debug/features/utils/stats_buffer.py index f07602d234..556eb53643 100644 --- a/transformer_engine/debug/features/utils/stats_buffer.py +++ b/transformer_engine/debug/features/utils/stats_buffer.py @@ -90,12 +90,19 @@ def feed(self, tensor, iteration, aux_dict=None): if self.modified[0] and not self.reduce_within_microbatch: return - if ( - tensor.numel() == 0 - if hasattr(tensor, "numel") - else all((t is None or t.numel() == 0) for t in tensor.get_data_tensors()) - ): - return + if tensor is not None: + # tensor can be None if we compute fp8 stats for weight and fp8 model parameters are used + # then high precision is not provided and quantized tensor from aux_dict is used. + + # This condition prevents computation of stats for empty tensor. + # This will not happen for weight - since it is the only situation then tensor can be None, + # we do not need to check similar condition for weight. + if ( + tensor.numel() == 0 + if hasattr(tensor, "numel") + else all((t is None or t.numel() == 0) for t in tensor.get_data_tensors()) + ): + return # save stats for tensor to tmp buffer for stat_name in self.stats_to_compute: diff --git a/transformer_engine/debug/pytorch/debug_quantization.py b/transformer_engine/debug/pytorch/debug_quantization.py index cf7eb812b1..3540785fc3 100644 --- a/transformer_engine/debug/pytorch/debug_quantization.py +++ b/transformer_engine/debug/pytorch/debug_quantization.py @@ -262,7 +262,7 @@ def _call_inspect_tensor_api( "rowwise_quantized_tensor": rowwise_gemm_tensor, "quantizer": self.parent_quantizer, } - if tensor is not None and self.inspect_tensor_enabled: + if self.inspect_tensor_enabled: debug_api.transformer_engine.inspect_tensor(**args) if self.output_tensor: @@ -557,11 +557,14 @@ def set_usage(self, rowwise: bool = None, columnwise: bool = None): self._update_parent_quantizer_usage() def wrap_quantized_tensor(self, tensor: QuantizedTensor): - """Wraps the quantized tensor with the debug quantizer.""" + """ + Wraps the quantized tensor with the debug quantizer. + It is used for weight tensors when fp8 model parameters are enabled. + """ assert ( - self.rowwise_tensor_plan == HIGH_PRECISION - and self.columnwise_tensor_plan == HIGH_PRECISION + self.rowwise_tensor_plan == STANDARD_FP8_QUANTIZE + and self.columnwise_tensor_plan == STANDARD_FP8_QUANTIZE ), ( "[NVTORCH INSPECT ERROR] Weight tensor with fp8 model paramters enabled cannot be" " modified by any feature." From 26926ccbfcd8483d3a5fe298018e13e047e9e5d7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 1 Sep 2025 12:54:59 +0000 Subject: [PATCH 08/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/pytorch/debug/test_log.py | 12 ++++++++---- .../debug/pytorch/debug_quantization.py | 4 ++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/pytorch/debug/test_log.py b/tests/pytorch/debug/test_log.py index e32f5eeee6..e9fe2e9fab 100644 --- a/tests/pytorch/debug/test_log.py +++ b/tests/pytorch/debug/test_log.py @@ -119,6 +119,7 @@ def read_log(log_dir: str) -> str: with open(stat_path, "r") as f: return f.read() + def test_sanity(feature_dirs): if not fp8_available: pytest.skip(reason_for_no_fp8) @@ -141,21 +142,22 @@ def test_sanity(feature_dirs): for stat in all_stats: assert stat in output, f"Stat {stat} not found in output" + LOG_FP8_MODEL_PARAMETERS_CONFIG_BASE = """ log: layers: layer_name_regex_pattern: .* - enabled: + enabled: True transformer_engine: LogTensorStats: - enabled: + enabled: True stats: [min] tensors: [weight, activation] freq: 1 LogFp8TensorStats: - enabled: + enabled: True tensors_struct: - tensor: activation @@ -165,10 +167,11 @@ def test_sanity(feature_dirs): freq: 1 """ + def test_sanity_log_fp8_model_parameters(feature_dirs): if not fp8_available: pytest.skip(reason_for_no_fp8) - + with debug_session(LOG_FP8_MODEL_PARAMETERS_CONFIG_BASE, feature_dirs) as log_dir: with te.fp8_model_init(): model = te.Linear(128, 128, params_dtype=torch.bfloat16) @@ -183,6 +186,7 @@ def test_sanity_log_fp8_model_parameters(feature_dirs): assert output, "Output is empty" TEDebugState._reset() + fp8_recipes = [ recipe.MXFP8BlockScaling(), recipe.DelayedScaling(), diff --git a/transformer_engine/debug/pytorch/debug_quantization.py b/transformer_engine/debug/pytorch/debug_quantization.py index 3540785fc3..a0f5cc2a27 100644 --- a/transformer_engine/debug/pytorch/debug_quantization.py +++ b/transformer_engine/debug/pytorch/debug_quantization.py @@ -558,8 +558,8 @@ def set_usage(self, rowwise: bool = None, columnwise: bool = None): def wrap_quantized_tensor(self, tensor: QuantizedTensor): """ - Wraps the quantized tensor with the debug quantizer. - It is used for weight tensors when fp8 model parameters are enabled. + Wraps the quantized tensor with the debug quantizer. + It is used for weight tensors when fp8 model parameters are enabled. """ assert ( From 39d500a017291f56d26947a4ec7aecf61229bf9d Mon Sep 17 00:00:00 2001 From: Pawel Gadzinski Date: Mon, 1 Sep 2025 13:06:11 +0000 Subject: [PATCH 09/20] fix Signed-off-by: Pawel Gadzinski --- tests/pytorch/debug/test_log.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/pytorch/debug/test_log.py b/tests/pytorch/debug/test_log.py index e9fe2e9fab..c0e916350c 100644 --- a/tests/pytorch/debug/test_log.py +++ b/tests/pytorch/debug/test_log.py @@ -78,8 +78,6 @@ all_stats.append("fp8_delayed_scaling_overflows%") # only delayed-scaling supports overflows% -# remove all contai - @contextlib.contextmanager def debug_session(config_str: str, feature_dirs): @@ -154,7 +152,7 @@ def test_sanity(feature_dirs): enabled: True stats: [min] - tensors: [weight, activation] + tensors: [weight, activation, gradient] freq: 1 LogFp8TensorStats: enabled: @@ -169,6 +167,14 @@ def test_sanity(feature_dirs): def test_sanity_log_fp8_model_parameters(feature_dirs): + """ + Tests logging stats when model parameters are in fp8. + It tests 3 things: + - LogTensorStats for weight tensor should work without change, + - LogTensorStats and LogFp8TensorStats for non-weight tensors should work without change, + - LogFp8TensorStats should support scale_inv_min, scale_inv_max for weight tensor. + + """ if not fp8_available: pytest.skip(reason_for_no_fp8) From 206cbc61ebbfd09160e4bb19fc3ca6800443aeb6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 1 Sep 2025 13:06:53 +0000 Subject: [PATCH 10/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/pytorch/debug/test_log.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/pytorch/debug/test_log.py b/tests/pytorch/debug/test_log.py index c0e916350c..df9aac2194 100644 --- a/tests/pytorch/debug/test_log.py +++ b/tests/pytorch/debug/test_log.py @@ -168,11 +168,11 @@ def test_sanity(feature_dirs): def test_sanity_log_fp8_model_parameters(feature_dirs): """ - Tests logging stats when model parameters are in fp8. - It tests 3 things: - - LogTensorStats for weight tensor should work without change, - - LogTensorStats and LogFp8TensorStats for non-weight tensors should work without change, - - LogFp8TensorStats should support scale_inv_min, scale_inv_max for weight tensor. + Tests logging stats when model parameters are in fp8. + It tests 3 things: + - LogTensorStats for weight tensor should work without change, + - LogTensorStats and LogFp8TensorStats for non-weight tensors should work without change, + - LogFp8TensorStats should support scale_inv_min, scale_inv_max for weight tensor. """ if not fp8_available: From 82e3f2cc0e53e8154f71537ebc6b3901ff0aee2a Mon Sep 17 00:00:00 2001 From: Pawel Gadzinski Date: Mon, 15 Sep 2025 10:15:06 +0200 Subject: [PATCH 11/20] fix Signed-off-by: Pawel Gadzinski --- tests/pytorch/debug/test_log.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pytorch/debug/test_log.py b/tests/pytorch/debug/test_log.py index df9aac2194..4434157d02 100644 --- a/tests/pytorch/debug/test_log.py +++ b/tests/pytorch/debug/test_log.py @@ -179,7 +179,7 @@ def test_sanity_log_fp8_model_parameters(feature_dirs): pytest.skip(reason_for_no_fp8) with debug_session(LOG_FP8_MODEL_PARAMETERS_CONFIG_BASE, feature_dirs) as log_dir: - with te.fp8_model_init(): + with te.fp8_model_init(recipe=recipe.DelayedScaling()): model = te.Linear(128, 128, params_dtype=torch.bfloat16) inp = torch.zeros(128, 128, dtype=torch.bfloat16).cuda() for _ in range(10): From c9867087effe22c59b4bfdc650fb10e52f9dff14 Mon Sep 17 00:00:00 2001 From: Pawel Gadzinski Date: Fri, 24 Oct 2025 13:50:30 +0000 Subject: [PATCH 12/20] fix Signed-off-by: Pawel Gadzinski --- transformer_engine/debug/features/log_fp8_tensor_stats.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/transformer_engine/debug/features/log_fp8_tensor_stats.py b/transformer_engine/debug/features/log_fp8_tensor_stats.py index a6e30fc07f..f0a158b2ef 100644 --- a/transformer_engine/debug/features/log_fp8_tensor_stats.py +++ b/transformer_engine/debug/features/log_fp8_tensor_stats.py @@ -111,7 +111,9 @@ class LogFp8TensorStats(BaseLogTensorStats): - scale_inv_max - maximum of the inverse of the scaling factors, - mse - mean squared error of the quantized tensor and the original tensor = sum((quantized_tensor - original_tensor)**2) / num_elements, - When collecting stats for the weight tensor with FP8 model parameters enabled, only "scale_inv_min" and "scale_inv_max" are available. All other statistics require access to the high precision tensor. + When collecting stats for the weight tensor with FP8 model parameters enabled, + only "scale_inv_min" and "scale_inv_max" are available. + All other statistics require access to the high precision tensor. tensors/tensors_struct: List[str] list of tensors to log From 3740ff6fe7e429d3597fc60aaa844b8e2244f929 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 24 Oct 2025 13:52:16 +0000 Subject: [PATCH 13/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- transformer_engine/debug/features/log_fp8_tensor_stats.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/transformer_engine/debug/features/log_fp8_tensor_stats.py b/transformer_engine/debug/features/log_fp8_tensor_stats.py index ceb0a8a579..8c6b17d57d 100644 --- a/transformer_engine/debug/features/log_fp8_tensor_stats.py +++ b/transformer_engine/debug/features/log_fp8_tensor_stats.py @@ -111,8 +111,8 @@ class LogFp8TensorStats(BaseLogTensorStats): - scale_inv_max - maximum of the inverse of the scaling factors, - mse - mean squared error of the quantized tensor and the original tensor = sum((quantized_tensor - original_tensor)**2) / num_elements, - When collecting stats for the weight tensor with FP8 model parameters enabled, - only "scale_inv_min" and "scale_inv_max" are available. + When collecting stats for the weight tensor with FP8 model parameters enabled, + only "scale_inv_min" and "scale_inv_max" are available. All other statistics require access to the high precision tensor. tensors/tensors_struct: List[str] From 1aa421ce0e1d9c18f8b3ce84907c17ec15b5b5a0 Mon Sep 17 00:00:00 2001 From: Pawel Gadzinski Date: Fri, 31 Oct 2025 10:08:52 +0000 Subject: [PATCH 14/20] fix Signed-off-by: Pawel Gadzinski --- transformer_engine/debug/features/log_fp8_tensor_stats.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/transformer_engine/debug/features/log_fp8_tensor_stats.py b/transformer_engine/debug/features/log_fp8_tensor_stats.py index 8c6b17d57d..c285d45ee1 100644 --- a/transformer_engine/debug/features/log_fp8_tensor_stats.py +++ b/transformer_engine/debug/features/log_fp8_tensor_stats.py @@ -168,9 +168,9 @@ def check_if_stat_is_supported( and not high_precision_tensor_provided ): raise ValueError( - f"Stat {stat} requires a high precision tensor to be provided. " - " This feature is not supported for weight tensors when using fp8 model" - " parameters." + f"Stat {stat} requires a high precision tensor to be provided." + "This feature is not supported for weight tensors when using fp8 model" + "parameters." ) if current_recipe == "" and recipe_from_stat == "": From e0ba7e7eb0e3c780068769fe4f8dd9dc9a4ec4cf Mon Sep 17 00:00:00 2001 From: Pawel Gadzinski Date: Tue, 4 Nov 2025 15:13:47 +0000 Subject: [PATCH 15/20] fix Signed-off-by: Pawel Gadzinski --- transformer_engine/debug/features/log_fp8_tensor_stats.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/transformer_engine/debug/features/log_fp8_tensor_stats.py b/transformer_engine/debug/features/log_fp8_tensor_stats.py index c285d45ee1..4541ab0018 100644 --- a/transformer_engine/debug/features/log_fp8_tensor_stats.py +++ b/transformer_engine/debug/features/log_fp8_tensor_stats.py @@ -168,8 +168,8 @@ def check_if_stat_is_supported( and not high_precision_tensor_provided ): raise ValueError( - f"Stat {stat} requires a high precision tensor to be provided." - "This feature is not supported for weight tensors when using fp8 model" + f"Stat {stat} requires a high precision tensor to be provided. " + "This feature is not supported for weight tensors when using fp8 model " "parameters." ) From a3ec90d9cc86c94f973999a4401aebb79190ee58 Mon Sep 17 00:00:00 2001 From: Pawel Gadzinski Date: Tue, 18 Nov 2025 16:51:04 +0000 Subject: [PATCH 16/20] fix Signed-off-by: Pawel Gadzinski --- transformer_engine/pytorch/module/linear.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/transformer_engine/pytorch/module/linear.py b/transformer_engine/pytorch/module/linear.py index b3f8165a77..f7307b8d3f 100644 --- a/transformer_engine/pytorch/module/linear.py +++ b/transformer_engine/pytorch/module/linear.py @@ -253,7 +253,7 @@ def forward( if fp8 or debug: # Configure quantizer # No need to set the quantizer states if weight is already quantized - if weight_quantizer is not None and not isinstance(weight, QuantizedTensor): + if weight_quantizer is not None and not isinstance(weight, QuantizedTensor) or debug: columnwise_usage = is_grad_enabled and inp.requires_grad if not columnwise_usage: columnwise_usage = ( From 91e0332ebd0aa2f3af21b4fd16105fdc1a16c451 Mon Sep 17 00:00:00 2001 From: Pawel Gadzinski Date: Tue, 18 Nov 2025 16:59:57 +0000 Subject: [PATCH 17/20] fix Signed-off-by: Pawel Gadzinski --- transformer_engine/pytorch/module/linear.py | 1 + 1 file changed, 1 insertion(+) diff --git a/transformer_engine/pytorch/module/linear.py b/transformer_engine/pytorch/module/linear.py index f7307b8d3f..d60b32fe27 100644 --- a/transformer_engine/pytorch/module/linear.py +++ b/transformer_engine/pytorch/module/linear.py @@ -253,6 +253,7 @@ def forward( if fp8 or debug: # Configure quantizer # No need to set the quantizer states if weight is already quantized + # for debug mode we create quantizer every iteration, thus we need to set the quantizer states if weight_quantizer is not None and not isinstance(weight, QuantizedTensor) or debug: columnwise_usage = is_grad_enabled and inp.requires_grad if not columnwise_usage: From 863de121b97528032b337a611cb6a512e65d9809 Mon Sep 17 00:00:00 2001 From: Pawel Gadzinski Date: Thu, 11 Dec 2025 12:53:14 +0000 Subject: [PATCH 18/20] Fix weight quantizer logic in debug mode Add 'or debug' / 'and not debug' conditions to weight quantizer configuration in linear.py, grouped_linear.py, layernorm_linear.py, and layernorm_mlp.py. In debug mode, quantizers are recreated every iteration, so we need to set quantizer states even when weights are already quantized. Signed-off-by: Pawel Gadzinski --- transformer_engine/pytorch/module/grouped_linear.py | 8 +++----- transformer_engine/pytorch/module/layernorm_linear.py | 3 ++- transformer_engine/pytorch/module/layernorm_mlp.py | 5 +++-- transformer_engine/pytorch/module/linear.py | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/transformer_engine/pytorch/module/grouped_linear.py b/transformer_engine/pytorch/module/grouped_linear.py index 004c95c372..42edce2cc2 100644 --- a/transformer_engine/pytorch/module/grouped_linear.py +++ b/transformer_engine/pytorch/module/grouped_linear.py @@ -121,8 +121,9 @@ def forward( and not in_fp8_activation_recompute_phase() ) # No need to set the quantizer states if weight is already quantized - if weight_quantizers[0] is not None and not isinstance( - weights[0], QuantizedTensorStorage + # for debug mode we create quantizer every iteration, thus we need to set the quantizer states + if weight_quantizers[0] is not None and ( + not isinstance(weights[0], QuantizedTensorStorage) or debug ): for weight_quantizer in weight_quantizers: weight_quantizer.set_usage(rowwise=True, columnwise=columnwise_usage) @@ -798,9 +799,6 @@ def forward( debug = False quantizers = self._get_quantizers() - if isinstance(weight_tensors, QuantizedTensorStorage): - raise RuntimeError("FP8 weights are not supported in debug mode.") - ( input_quantizers, weight_quantizers, diff --git a/transformer_engine/pytorch/module/layernorm_linear.py b/transformer_engine/pytorch/module/layernorm_linear.py index 667c199c49..c5c622f00a 100644 --- a/transformer_engine/pytorch/module/layernorm_linear.py +++ b/transformer_engine/pytorch/module/layernorm_linear.py @@ -296,7 +296,8 @@ def forward( # Configure quantizer # If weight is already quantized, no need to set quantizer states - if is_weight_param_quantized: + # for debug mode we create quantizer every iteration, thus we need to set the quantizer states + if is_weight_param_quantized and not debug: weight_quantizer = weight._quantizer elif weight_quantizer is not None: weight_quantizer.set_usage(rowwise=True, columnwise=is_grad_enabled) diff --git a/transformer_engine/pytorch/module/layernorm_mlp.py b/transformer_engine/pytorch/module/layernorm_mlp.py index 56e050fe88..1fd8334e95 100644 --- a/transformer_engine/pytorch/module/layernorm_mlp.py +++ b/transformer_engine/pytorch/module/layernorm_mlp.py @@ -472,12 +472,13 @@ def _forward( # FP8 cast to workspace buffer update_workspace = is_first_microbatch is None or is_first_microbatch # No need to set the quantizer states if weights are already quantized - if isinstance(fc1_weight, QuantizedTensorStorage): + # for debug mode we create quantizer every iteration, thus we need to set the quantizer states + if isinstance(fc1_weight, QuantizedTensorStorage) and not debug: fc1_weight_quantizer = fc1_weight._quantizer elif fc1_weight_quantizer is not None: fc1_weight_quantizer.set_usage(rowwise=True, columnwise=is_grad_enabled) - if isinstance(fc2_weight, QuantizedTensorStorage): + if isinstance(fc2_weight, QuantizedTensorStorage) and not debug: fc2_weight_quantizer = fc2_weight._quantizer elif fc2_weight_quantizer is not None: fc2_weight_quantizer.set_usage(rowwise=True, columnwise=is_grad_enabled) diff --git a/transformer_engine/pytorch/module/linear.py b/transformer_engine/pytorch/module/linear.py index f4a8ec2da0..341278ad1c 100644 --- a/transformer_engine/pytorch/module/linear.py +++ b/transformer_engine/pytorch/module/linear.py @@ -254,7 +254,7 @@ def forward( # Configure quantizer # No need to set the quantizer states if weight is already quantized # for debug mode we create quantizer every iteration, thus we need to set the quantizer states - if weight_quantizer is not None and not isinstance(weight, QuantizedTensor) or debug: + if weight_quantizer is not None and (not isinstance(weight, QuantizedTensor) or debug): columnwise_usage = is_grad_enabled and inp.requires_grad if not columnwise_usage: columnwise_usage = ( From ae85aefee674279ea8065b618814d90cea38a82e Mon Sep 17 00:00:00 2001 From: Pawel Gadzinski Date: Mon, 16 Feb 2026 11:11:48 +0000 Subject: [PATCH 19/20] fix Signed-off-by: Pawel Gadzinski --- transformer_engine/debug/pytorch/debug_quantization.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/transformer_engine/debug/pytorch/debug_quantization.py b/transformer_engine/debug/pytorch/debug_quantization.py index 7590679d80..565e1f9541 100644 --- a/transformer_engine/debug/pytorch/debug_quantization.py +++ b/transformer_engine/debug/pytorch/debug_quantization.py @@ -566,8 +566,8 @@ def wrap_quantized_tensor(self, tensor: QuantizedTensor): """ assert ( - self.rowwise_tensor_plan == STANDARD_FP8_QUANTIZE - and self.columnwise_tensor_plan == STANDARD_FP8_QUANTIZE + self.rowwise_tensor_plan == STANDARD_QUANTIZE + and self.columnwise_tensor_plan == STANDARD_QUANTIZE ), ( "[NVTORCH INSPECT ERROR] Weight tensor with fp8 model paramters enabled cannot be" " modified by any feature." From fca78e9d52d7c5049961d4300c4286664f314a25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Gadzi=C5=84ski?= <62263673+pggPL@users.noreply.github.com> Date: Mon, 16 Feb 2026 12:32:20 +0100 Subject: [PATCH 20/20] Update transformer_engine/debug/pytorch/debug_quantization.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> Signed-off-by: Paweł Gadziński <62263673+pggPL@users.noreply.github.com> --- transformer_engine/debug/pytorch/debug_quantization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/transformer_engine/debug/pytorch/debug_quantization.py b/transformer_engine/debug/pytorch/debug_quantization.py index 565e1f9541..5624970547 100644 --- a/transformer_engine/debug/pytorch/debug_quantization.py +++ b/transformer_engine/debug/pytorch/debug_quantization.py @@ -569,7 +569,7 @@ def wrap_quantized_tensor(self, tensor: QuantizedTensor): self.rowwise_tensor_plan == STANDARD_QUANTIZE and self.columnwise_tensor_plan == STANDARD_QUANTIZE ), ( - "[NVTORCH INSPECT ERROR] Weight tensor with fp8 model paramters enabled cannot be" + "[NVTORCH INSPECT ERROR] Weight tensor with fp8 model parameters enabled cannot be" " modified by any feature." )